mm: soft-offline: close the race against page allocation
A process can be killed with SIGBUS(BUS_MCEERR_AR) when it tries to allocate a page that was just freed on the way of soft-offline. This is undesirable because soft-offline (which is about corrected error) is less aggressive than hard-offline (which is about uncorrected error), and we can make soft-offline fail and keep using the page for good reason like "system is busy." Two main changes of this patch are: - setting migrate type of the target page to MIGRATE_ISOLATE. As done in free_unref_page_commit(), this makes kernel bypass pcplist when freeing the page. So we can assume that the page is in freelist just after put_page() returns, - setting PG_hwpoison on free page under zone->lock which protects freelists, so this allows us to avoid setting PG_hwpoison on a page that is decided to be allocated soon. [akpm@linux-foundation.org: tweak set_hwpoison_free_buddy_page() comment] Link: http://lkml.kernel.org/r/1531452366-11661-3-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Reported-by: Xishi Qiu <xishi.qiuxishi@alibaba-inc.com> Tested-by: Mike Kravetz <mike.kravetz@oracle.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: <zy.zhengyi@alibaba-inc.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:

committed by
Linus Torvalds

parent
6bc9b56433
commit
d4ae9916ea
@@ -8096,3 +8096,33 @@ bool is_free_buddy_page(struct page *page)
|
||||
|
||||
return order < MAX_ORDER;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_FAILURE
|
||||
/*
|
||||
* Set PG_hwpoison flag if a given page is confirmed to be a free page. This
|
||||
* test is performed under the zone lock to prevent a race against page
|
||||
* allocation.
|
||||
*/
|
||||
bool set_hwpoison_free_buddy_page(struct page *page)
|
||||
{
|
||||
struct zone *zone = page_zone(page);
|
||||
unsigned long pfn = page_to_pfn(page);
|
||||
unsigned long flags;
|
||||
unsigned int order;
|
||||
bool hwpoisoned = false;
|
||||
|
||||
spin_lock_irqsave(&zone->lock, flags);
|
||||
for (order = 0; order < MAX_ORDER; order++) {
|
||||
struct page *page_head = page - (pfn & ((1 << order) - 1));
|
||||
|
||||
if (PageBuddy(page_head) && page_order(page_head) >= order) {
|
||||
if (!TestSetPageHWPoison(page))
|
||||
hwpoisoned = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&zone->lock, flags);
|
||||
|
||||
return hwpoisoned;
|
||||
}
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user