From 243abd5b7803d540280f029bc5224a4a2892579a Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Mon, 10 Jul 2017 15:47:32 -0700 Subject: mm: hugetlb: prevent reuse of hwpoisoned free hugepages Patch series "mm: hwpoison: fixlet for hugetlb migration". This patchset updates the hwpoison/hugetlb code to address 2 reported issues. One is madvise(MADV_HWPOISON) failure reported by Intel's lkp robot (see http://lkml.kernel.org/r/20170417055948.GM31394@yexl-desktop.) First half was already fixed in mainline, and another half about hugetlb cases are solved in this series. Another issue is "narrow-down error affected region into a single 4kB page instead of a whole hugetlb page" issue, which was tried by Anshuman (http://lkml.kernel.org/r/20170420110627.12307-1-khandual@linux.vnet.ibm.com) and I updated it to apply it more widely. This patch (of 9): We no longer use MIGRATE_ISOLATE to prevent reuse of hwpoison hugepages as we did before. So current dequeue_huge_page_node() doesn't work as intended because it still uses is_migrate_isolate_page() for this check. This patch fixes it with PageHWPoison flag. Link: http://lkml.kernel.org/r/1496305019-5493-2-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Cc: Michal Hocko Cc: "Aneesh Kumar K.V" Cc: Anshuman Khandual Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 1 - 1 file changed, 1 deletion(-) (limited to 'mm/memory-failure.c') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index dbe3e50c9aa5..e3bf6432ed25 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -49,7 +49,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From b37ff71cc626a0c1b5e098ff9a0b723815f6aaeb Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Mon, 10 Jul 2017 15:47:38 -0700 Subject: mm: hwpoison: change PageHWPoison behavior on hugetlb pages We'd like to narrow down the error region in memory error on hugetlb pages. However, currently we set PageHWPoison flags on all subpages in the error hugepage and add # of subpages to num_hwpoison_pages, which doesn't fit our purpose. So this patch changes the behavior and we only set PageHWPoison on the head page then increase num_hwpoison_pages only by 1. This is a preparation for narrow-down part which comes in later patches. Link: http://lkml.kernel.org/r/1496305019-5493-4-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Cc: Michal Hocko Cc: "Aneesh Kumar K.V" Cc: Anshuman Khandual Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 87 +++++++++++++++-------------------------------------- 1 file changed, 24 insertions(+), 63 deletions(-) (limited to 'mm/memory-failure.c') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index e3bf6432ed25..a9ddb0e72f5b 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1009,22 +1009,6 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, return unmap_success; } -static void set_page_hwpoison_huge_page(struct page *hpage) -{ - int i; - int nr_pages = 1 << compound_order(hpage); - for (i = 0; i < nr_pages; i++) - SetPageHWPoison(hpage + i); -} - -static void clear_page_hwpoison_huge_page(struct page *hpage) -{ - int i; - int nr_pages = 1 << compound_order(hpage); - for (i = 0; i < nr_pages; i++) - ClearPageHWPoison(hpage + i); -} - /** * memory_failure - Handle memory failure of a page. * @pfn: Page Number of the corrupted page @@ -1050,7 +1034,6 @@ int memory_failure(unsigned long pfn, int trapno, int flags) struct page *hpage; struct page *orig_head; int res; - unsigned int nr_pages; unsigned long page_flags; if (!sysctl_memory_failure_recovery) @@ -1064,24 +1047,23 @@ int memory_failure(unsigned long pfn, int trapno, int flags) p = pfn_to_page(pfn); orig_head = hpage = compound_head(p); + + /* tmporary check code, to be updated in later patches */ + if (PageHuge(p)) { + if (TestSetPageHWPoison(hpage)) { + pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); + return 0; + } + goto tmp; + } if (TestSetPageHWPoison(p)) { pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); return 0; } - /* - * Currently errors on hugetlbfs pages are measured in hugepage units, - * so nr_pages should be 1 << compound_order. OTOH when errors are on - * transparent hugepages, they are supposed to be split and error - * measurement is done in normal page units. So nr_pages should be one - * in this case. - */ - if (PageHuge(p)) - nr_pages = 1 << compound_order(hpage); - else /* normal page or thp */ - nr_pages = 1; - num_poisoned_pages_add(nr_pages); +tmp: + num_poisoned_pages_inc(); /* * We need/can do nothing about count=0 pages. @@ -1109,12 +1091,11 @@ int memory_failure(unsigned long pfn, int trapno, int flags) if (PageHWPoison(hpage)) { if ((hwpoison_filter(p) && TestClearPageHWPoison(p)) || (p != hpage && TestSetPageHWPoison(hpage))) { - num_poisoned_pages_sub(nr_pages); + num_poisoned_pages_dec(); unlock_page(hpage); return 0; } } - set_page_hwpoison_huge_page(hpage); res = dequeue_hwpoisoned_huge_page(hpage); action_result(pfn, MF_MSG_FREE_HUGE, res ? MF_IGNORED : MF_DELAYED); @@ -1137,7 +1118,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) pr_err("Memory failure: %#lx: thp split failed\n", pfn); if (TestClearPageHWPoison(p)) - num_poisoned_pages_sub(nr_pages); + num_poisoned_pages_dec(); put_hwpoison_page(p); return -EBUSY; } @@ -1193,14 +1174,14 @@ int memory_failure(unsigned long pfn, int trapno, int flags) */ if (!PageHWPoison(p)) { pr_err("Memory failure: %#lx: just unpoisoned\n", pfn); - num_poisoned_pages_sub(nr_pages); + num_poisoned_pages_dec(); unlock_page(hpage); put_hwpoison_page(hpage); return 0; } if (hwpoison_filter(p)) { if (TestClearPageHWPoison(p)) - num_poisoned_pages_sub(nr_pages); + num_poisoned_pages_dec(); unlock_page(hpage); put_hwpoison_page(hpage); return 0; @@ -1219,14 +1200,6 @@ int memory_failure(unsigned long pfn, int trapno, int flags) put_hwpoison_page(hpage); return 0; } - /* - * Set PG_hwpoison on all pages in an error hugepage, - * because containment is done in hugepage unit for now. - * Since we have done TestSetPageHWPoison() for the head page with - * page lock held, we can safely set PG_hwpoison bits on tail pages. - */ - if (PageHuge(p)) - set_page_hwpoison_huge_page(hpage); /* * It's very difficult to mess with pages currently under IO @@ -1397,7 +1370,6 @@ int unpoison_memory(unsigned long pfn) struct page *page; struct page *p; int freeit = 0; - unsigned int nr_pages; static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); @@ -1442,8 +1414,6 @@ int unpoison_memory(unsigned long pfn) return 0; } - nr_pages = 1 << compound_order(page); - if (!get_hwpoison_page(p)) { /* * Since HWPoisoned hugepage should have non-zero refcount, @@ -1473,10 +1443,8 @@ int unpoison_memory(unsigned long pfn) if (TestClearPageHWPoison(page)) { unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", pfn, &unpoison_rs); - num_poisoned_pages_sub(nr_pages); + num_poisoned_pages_dec(); freeit = 1; - if (PageHuge(page)) - clear_page_hwpoison_huge_page(page); } unlock_page(page); @@ -1608,14 +1576,10 @@ static int soft_offline_huge_page(struct page *page, int flags) ret = -EIO; } else { /* overcommit hugetlb page will be freed to buddy */ - if (PageHuge(page)) { - set_page_hwpoison_huge_page(hpage); + SetPageHWPoison(page); + if (PageHuge(page)) dequeue_hwpoisoned_huge_page(hpage); - num_poisoned_pages_add(1 << compound_order(hpage)); - } else { - SetPageHWPoison(page); - num_poisoned_pages_inc(); - } + num_poisoned_pages_inc(); } return ret; } @@ -1731,15 +1695,12 @@ static int soft_offline_in_use_page(struct page *page, int flags) static void soft_offline_free_page(struct page *page) { - if (PageHuge(page)) { - struct page *hpage = compound_head(page); + struct page *head = compound_head(page); - set_page_hwpoison_huge_page(hpage); - if (!dequeue_hwpoisoned_huge_page(hpage)) - num_poisoned_pages_add(1 << compound_order(hpage)); - } else { - if (!TestSetPageHWPoison(page)) - num_poisoned_pages_inc(); + if (!TestSetPageHWPoison(head)) { + num_poisoned_pages_inc(); + if (PageHuge(head)) + dequeue_hwpoisoned_huge_page(head); } } -- cgit v1.2.3 From c3114a84f7f96c9d5c73c8bfa7e21ff42fda97e2 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 10 Jul 2017 15:47:41 -0700 Subject: mm: hugetlb: soft-offline: dissolve source hugepage after successful migration Currently hugepage migrated by soft-offline (i.e. due to correctable memory errors) is contained as a hugepage, which means many non-error pages in it are unreusable, i.e. wasted. This patch solves this issue by dissolving source hugepages into buddy. As done in previous patch, PageHWPoison is set only on a head page of the error hugepage. Then in dissoliving we move the PageHWPoison flag to the raw error page so that all healthy subpages return back to buddy. [arnd@arndb.de: fix warnings: replace some macros with inline functions] Link: http://lkml.kernel.org/r/20170609102544.2947326-1-arnd@arndb.de Link: http://lkml.kernel.org/r/1496305019-5493-5-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Anshuman Khandual Signed-off-by: Naoya Horiguchi Signed-off-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'mm/memory-failure.c') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index a9ddb0e72f5b..42c5803e6275 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1575,11 +1575,8 @@ static int soft_offline_huge_page(struct page *page, int flags) if (ret > 0) ret = -EIO; } else { - /* overcommit hugetlb page will be freed to buddy */ - SetPageHWPoison(page); if (PageHuge(page)) - dequeue_hwpoisoned_huge_page(hpage); - num_poisoned_pages_inc(); + dissolve_free_huge_page(page); } return ret; } -- cgit v1.2.3 From d4a3a60b37bf4609f9b17961a0db2f6e7ec746cd Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Mon, 10 Jul 2017 15:47:44 -0700 Subject: mm: soft-offline: dissolve free hugepage if soft-offlined Now we have code to rescue most of healthy pages from a hwpoisoned hugepage. So let's apply it to soft_offline_free_page too. Link: http://lkml.kernel.org/r/1496305019-5493-6-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/memory-failure.c') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 42c5803e6275..8a7b39486b9d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1697,7 +1697,7 @@ static void soft_offline_free_page(struct page *page) if (!TestSetPageHWPoison(head)) { num_poisoned_pages_inc(); if (PageHuge(head)) - dequeue_hwpoisoned_huge_page(head); + dissolve_free_huge_page(page); } } -- cgit v1.2.3 From 761ad8d7c7b5485bb66fd5bccb58a891fe784544 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Mon, 10 Jul 2017 15:47:47 -0700 Subject: mm: hwpoison: introduce memory_failure_hugetlb() memory_failure() is a big function and hard to maintain. Handling hugetlb- and non-hugetlb- case in a single function is not good, so this patch separates PageHuge() branch into a new function, which saves many PageHuge() check. Link: http://lkml.kernel.org/r/1496305019-5493-7-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 134 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 82 insertions(+), 52 deletions(-) (limited to 'mm/memory-failure.c') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 8a7b39486b9d..5db3827f0d36 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1009,6 +1009,76 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, return unmap_success; } +static int memory_failure_hugetlb(unsigned long pfn, int trapno, int flags) +{ + struct page_state *ps; + struct page *p = pfn_to_page(pfn); + struct page *head = compound_head(p); + int res; + unsigned long page_flags; + + if (TestSetPageHWPoison(head)) { + pr_err("Memory failure: %#lx: already hardware poisoned\n", + pfn); + return 0; + } + + num_poisoned_pages_inc(); + + if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p)) { + /* + * Check "filter hit" and "race with other subpage." + */ + lock_page(head); + if (PageHWPoison(head)) { + if ((hwpoison_filter(p) && TestClearPageHWPoison(p)) + || (p != head && TestSetPageHWPoison(head))) { + num_poisoned_pages_dec(); + unlock_page(head); + return 0; + } + } + unlock_page(head); + dissolve_free_huge_page(p); + action_result(pfn, MF_MSG_FREE_HUGE, MF_DELAYED); + return 0; + } + + lock_page(head); + page_flags = head->flags; + + if (!PageHWPoison(head)) { + pr_err("Memory failure: %#lx: just unpoisoned\n", pfn); + num_poisoned_pages_dec(); + unlock_page(head); + put_hwpoison_page(head); + return 0; + } + + if (!hwpoison_user_mappings(p, pfn, trapno, flags, &head)) { + action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); + res = -EBUSY; + goto out; + } + + res = -EBUSY; + + for (ps = error_states;; ps++) + if ((p->flags & ps->mask) == ps->res) + break; + + page_flags |= (p->flags & (1UL << PG_dirty)); + + if (!ps->mask) + for (ps = error_states;; ps++) + if ((page_flags & ps->mask) == ps->res) + break; + res = page_action(ps, p, pfn); +out: + unlock_page(head); + return res; +} + /** * memory_failure - Handle memory failure of a page. * @pfn: Page Number of the corrupted page @@ -1046,33 +1116,22 @@ int memory_failure(unsigned long pfn, int trapno, int flags) } p = pfn_to_page(pfn); - orig_head = hpage = compound_head(p); - - /* tmporary check code, to be updated in later patches */ - if (PageHuge(p)) { - if (TestSetPageHWPoison(hpage)) { - pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); - return 0; - } - goto tmp; - } + if (PageHuge(p)) + return memory_failure_hugetlb(pfn, trapno, flags); if (TestSetPageHWPoison(p)) { pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); return 0; } -tmp: + orig_head = hpage = compound_head(p); num_poisoned_pages_inc(); /* * We need/can do nothing about count=0 pages. * 1) it's a free page, and therefore in safe hand: * prep_new_page() will be the gate keeper. - * 2) it's a free hugepage, which is also safe: - * an affected hugepage will be dequeued from hugepage freelist, - * so there's no concern about reusing it ever after. - * 3) it's part of a non-compound high order page. + * 2) it's part of a non-compound high order page. * Implies some kernel user: cannot stop them from * R/W the page; let's pray that the page has been * used and will be freed some time later. @@ -1083,31 +1142,13 @@ tmp: if (is_free_buddy_page(p)) { action_result(pfn, MF_MSG_BUDDY, MF_DELAYED); return 0; - } else if (PageHuge(hpage)) { - /* - * Check "filter hit" and "race with other subpage." - */ - lock_page(hpage); - if (PageHWPoison(hpage)) { - if ((hwpoison_filter(p) && TestClearPageHWPoison(p)) - || (p != hpage && TestSetPageHWPoison(hpage))) { - num_poisoned_pages_dec(); - unlock_page(hpage); - return 0; - } - } - res = dequeue_hwpoisoned_huge_page(hpage); - action_result(pfn, MF_MSG_FREE_HUGE, - res ? MF_IGNORED : MF_DELAYED); - unlock_page(hpage); - return res; } else { action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED); return -EBUSY; } } - if (!PageHuge(p) && PageTransHuge(hpage)) { + if (PageTransHuge(hpage)) { lock_page(p); if (!PageAnon(p) || unlikely(split_huge_page(p))) { unlock_page(p); @@ -1145,7 +1186,7 @@ tmp: return 0; } - lock_page(hpage); + lock_page(p); /* * The page could have changed compound pages during the locking. @@ -1175,32 +1216,21 @@ tmp: if (!PageHWPoison(p)) { pr_err("Memory failure: %#lx: just unpoisoned\n", pfn); num_poisoned_pages_dec(); - unlock_page(hpage); - put_hwpoison_page(hpage); + unlock_page(p); + put_hwpoison_page(p); return 0; } if (hwpoison_filter(p)) { if (TestClearPageHWPoison(p)) num_poisoned_pages_dec(); - unlock_page(hpage); - put_hwpoison_page(hpage); + unlock_page(p); + put_hwpoison_page(p); return 0; } - if (!PageHuge(p) && !PageTransTail(p) && !PageLRU(p)) + if (!PageTransTail(p) && !PageLRU(p)) goto identify_page_state; - /* - * For error on the tail page, we should set PG_hwpoison - * on the head page to show that the hugepage is hwpoisoned - */ - if (PageHuge(p) && PageTail(p) && TestSetPageHWPoison(hpage)) { - action_result(pfn, MF_MSG_POISONED_HUGE, MF_IGNORED); - unlock_page(hpage); - put_hwpoison_page(hpage); - return 0; - } - /* * It's very difficult to mess with pages currently under IO * and in many cases impossible, so we just avoid it here. @@ -1248,7 +1278,7 @@ identify_page_state: break; res = page_action(ps, p, pfn); out: - unlock_page(hpage); + unlock_page(p); return res; } EXPORT_SYMBOL_GPL(memory_failure); -- cgit v1.2.3 From 78bb920344b8a6f04b79a7c254041723b931c94f Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Mon, 10 Jul 2017 15:47:50 -0700 Subject: mm: hwpoison: dissolve in-use hugepage in unrecoverable memory error Currently me_huge_page() relies on dequeue_hwpoisoned_huge_page() to keep the error hugepage away from the system, which is OK but not good enough because the hugepage still has a refcount and unpoison doesn't work on the error hugepage (PageHWPoison flags are cleared but pages are still leaked.) And there's "wasting health subpages" issue too. This patch reworks on me_huge_page() to solve these issues. For hugetlb file, recently we have truncating code so let's use it in hugetlbfs specific ->error_remove_page(). For anonymous hugepage, it's helpful to dissolve the error page after freeing it into free hugepage list. Migration entry and PageHWPoison in the head page prevent the access to it. TODO: dissolve_free_huge_page() can fail but we don't considered it yet. It's not critical (and at least no worse that now) because in such case the error hugepage just stays in free hugepage list without being dissolved. By virtue of PageHWPoison in head page, it's never allocated to processes. [akpm@linux-foundation.org: fix unused var warnings] Fixes: 23a003bfd23ea9ea0b7756b920e51f64b284b468 ("mm/madvise: pass return code of memory_failure() to userspace") Link: http://lkml.kernel.org/r/20170417055948.GM31394@yexl-desktop Link: http://lkml.kernel.org/r/1496305019-5493-8-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Reported-by: kernel test robot Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 93 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 53 insertions(+), 40 deletions(-) (limited to 'mm/memory-failure.c') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 5db3827f0d36..6f8f69f4a986 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -554,6 +554,39 @@ static int delete_from_lru_cache(struct page *p) return -EIO; } +static int truncate_error_page(struct page *p, unsigned long pfn, + struct address_space *mapping) +{ + int ret = MF_FAILED; + + if (mapping->a_ops->error_remove_page) { + int err = mapping->a_ops->error_remove_page(mapping, p); + + if (err != 0) { + pr_info("Memory failure: %#lx: Failed to punch page: %d\n", + pfn, err); + } else if (page_has_private(p) && + !try_to_release_page(p, GFP_NOIO)) { + pr_info("Memory failure: %#lx: failed to release buffers\n", + pfn); + } else { + ret = MF_RECOVERED; + } + } else { + /* + * If the file system doesn't support it just invalidate + * This fails on dirty or anything with private pages + */ + if (invalidate_inode_page(p)) + ret = MF_RECOVERED; + else + pr_info("Memory failure: %#lx: Failed to invalidate\n", + pfn); + } + + return ret; +} + /* * Error hit kernel page. * Do nothing, try to be lucky and not touch this instead. For a few cases we @@ -578,8 +611,6 @@ static int me_unknown(struct page *p, unsigned long pfn) */ static int me_pagecache_clean(struct page *p, unsigned long pfn) { - int err; - int ret = MF_FAILED; struct address_space *mapping; delete_from_lru_cache(p); @@ -611,30 +642,7 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) * * Open: to take i_mutex or not for this? Right now we don't. */ - if (mapping->a_ops->error_remove_page) { - err = mapping->a_ops->error_remove_page(mapping, p); - if (err != 0) { - pr_info("Memory failure: %#lx: Failed to punch page: %d\n", - pfn, err); - } else if (page_has_private(p) && - !try_to_release_page(p, GFP_NOIO)) { - pr_info("Memory failure: %#lx: failed to release buffers\n", - pfn); - } else { - ret = MF_RECOVERED; - } - } else { - /* - * If the file system doesn't support it just invalidate - * This fails on dirty or anything with private pages - */ - if (invalidate_inode_page(p)) - ret = MF_RECOVERED; - else - pr_info("Memory failure: %#lx: Failed to invalidate\n", - pfn); - } - return ret; + return truncate_error_page(p, pfn, mapping); } /* @@ -740,24 +748,29 @@ static int me_huge_page(struct page *p, unsigned long pfn) { int res = 0; struct page *hpage = compound_head(p); + struct address_space *mapping; if (!PageHuge(hpage)) return MF_DELAYED; - /* - * We can safely recover from error on free or reserved (i.e. - * not in-use) hugepage by dequeuing it from freelist. - * To check whether a hugepage is in-use or not, we can't use - * page->lru because it can be used in other hugepage operations, - * such as __unmap_hugepage_range() and gather_surplus_pages(). - * So instead we use page_mapping() and PageAnon(). - */ - if (!(page_mapping(hpage) || PageAnon(hpage))) { - res = dequeue_hwpoisoned_huge_page(hpage); - if (!res) - return MF_RECOVERED; + mapping = page_mapping(hpage); + if (mapping) { + res = truncate_error_page(hpage, pfn, mapping); + } else { + unlock_page(hpage); + /* + * migration entry prevents later access on error anonymous + * hugepage, so we can free and dissolve it into buddy to + * save healthy subpages. + */ + if (PageAnon(hpage)) + put_page(hpage); + dissolve_free_huge_page(p); + res = MF_RECOVERED; + lock_page(hpage); } - return MF_DELAYED; + + return res; } /* @@ -856,7 +869,7 @@ static int page_action(struct page_state *ps, struct page *p, count = page_count(p) - 1; if (ps->action == me_swapcache_dirty && result == MF_DELAYED) count--; - if (count != 0) { + if (count > 0) { pr_err("Memory failure: %#lx: %s still referenced by %d users\n", pfn, action_page_types[ps->type], count); result = MF_FAILED; -- cgit v1.2.3 From ddd40d8a2c4ef8f2152ea6d227e11475cf7e5bfa Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Mon, 10 Jul 2017 15:47:53 -0700 Subject: mm: hugetlb: delete dequeue_hwpoisoned_huge_page() dequeue_hwpoisoned_huge_page() is no longer used, so let's remove it. Link: http://lkml.kernel.org/r/1496305019-5493-9-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'mm/memory-failure.c') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 6f8f69f4a986..2aec57c07652 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1458,17 +1458,6 @@ int unpoison_memory(unsigned long pfn) } if (!get_hwpoison_page(p)) { - /* - * Since HWPoisoned hugepage should have non-zero refcount, - * race between memory failure and unpoison seems to happen. - * In such case unpoison fails and memory failure runs - * to the end. - */ - if (PageHuge(page)) { - unpoison_pr_info("Unpoison: Memory failure is now running on free hugepage %#lx\n", - pfn, &unpoison_rs); - return 0; - } if (TestClearPageHWPoison(p)) num_poisoned_pages_dec(); unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n", -- cgit v1.2.3 From 0348d2ebec9b00ea87b42dffdb3f393007303b82 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Mon, 10 Jul 2017 15:47:56 -0700 Subject: mm: hwpoison: introduce idenfity_page_state Factoring duplicate code into a function. Link: http://lkml.kernel.org/r/1496305019-5493-10-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 57 +++++++++++++++++++++++------------------------------ 1 file changed, 25 insertions(+), 32 deletions(-) (limited to 'mm/memory-failure.c') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 2aec57c07652..e2e0cb0e1d0f 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1022,9 +1022,31 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, return unmap_success; } -static int memory_failure_hugetlb(unsigned long pfn, int trapno, int flags) +static int identify_page_state(unsigned long pfn, struct page *p, + unsigned long page_flags) { struct page_state *ps; + + /* + * The first check uses the current page flags which may not have any + * relevant information. The second check with the saved page flags is + * carried out only if the first check can't determine the page status. + */ + for (ps = error_states;; ps++) + if ((p->flags & ps->mask) == ps->res) + break; + + page_flags |= (p->flags & (1UL << PG_dirty)); + + if (!ps->mask) + for (ps = error_states;; ps++) + if ((page_flags & ps->mask) == ps->res) + break; + return page_action(ps, p, pfn); +} + +static int memory_failure_hugetlb(unsigned long pfn, int trapno, int flags) +{ struct page *p = pfn_to_page(pfn); struct page *head = compound_head(p); int res; @@ -1074,19 +1096,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int trapno, int flags) goto out; } - res = -EBUSY; - - for (ps = error_states;; ps++) - if ((p->flags & ps->mask) == ps->res) - break; - - page_flags |= (p->flags & (1UL << PG_dirty)); - - if (!ps->mask) - for (ps = error_states;; ps++) - if ((page_flags & ps->mask) == ps->res) - break; - res = page_action(ps, p, pfn); + res = identify_page_state(pfn, p, page_flags); out: unlock_page(head); return res; @@ -1112,7 +1122,6 @@ out: */ int memory_failure(unsigned long pfn, int trapno, int flags) { - struct page_state *ps; struct page *p; struct page *hpage; struct page *orig_head; @@ -1273,23 +1282,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) } identify_page_state: - res = -EBUSY; - /* - * The first check uses the current page flags which may not have any - * relevant information. The second check with the saved page flagss is - * carried out only if the first check can't determine the page status. - */ - for (ps = error_states;; ps++) - if ((p->flags & ps->mask) == ps->res) - break; - - page_flags |= (p->flags & (1UL << PG_dirty)); - - if (!ps->mask) - for (ps = error_states;; ps++) - if ((page_flags & ps->mask) == ps->res) - break; - res = page_action(ps, p, pfn); + res = identify_page_state(pfn, p, page_flags); out: unlock_page(p); return res; -- cgit v1.2.3 From ef77ba5ce6b0e6b657036ee9fc455fc164b215f8 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 10 Jul 2017 15:49:14 -0700 Subject: mm, hugetlb, soft_offline: use new_page_nodemask for soft offline migration new_page is yet another duplication of the migration callback which has to handle hugetlb migration specially. We can safely use the generic new_page_nodemask for the same purpose. Please note that gigantic hugetlb pages do not need any special handling because alloc_huge_page_nodemask will make sure to check pages in all per node pools. The reason this was done previously was that alloc_huge_page_node treated NO_NUMA_NODE and a specific node differently and so alloc_huge_page_node(nid) would check on this specific node. Link: http://lkml.kernel.org/r/20170622193034.28972-4-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Reported-by: Vlastimil Babka Reviewed-by: Mike Kravetz Tested-by: Mike Kravetz Cc: Naoya Horiguchi Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'mm/memory-failure.c') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index e2e0cb0e1d0f..1cd3b3569af8 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1484,16 +1484,8 @@ EXPORT_SYMBOL(unpoison_memory); static struct page *new_page(struct page *p, unsigned long private, int **x) { int nid = page_to_nid(p); - if (PageHuge(p)) { - struct hstate *hstate = page_hstate(compound_head(p)); - if (hstate_is_gigantic(hstate)) - return alloc_huge_page_node(hstate, NUMA_NO_NODE); - - return alloc_huge_page_node(hstate, nid); - } else { - return __alloc_pages_node(nid, GFP_HIGHUSER_MOVABLE, 0); - } + return new_page_nodemask(p, nid, &node_states[N_MEMORY]); } /* -- cgit v1.2.3