ANDROID: mm: page_pinner: introduce failure_tracking feature

CMA allocation can fail by temporal page refcount increasement
by get_page API as well as get_user_pages friends.
However, since get_page is one of the most hot function, it is
hard to hook get_page to get callstack everytime due to
performance concern. Furthermore, get_page could be nested
multiple times so we couldn't track all of the pin sites on
limited space of page_pinner.

Thus, here approach is keep tracking of put_page callsite rather
than get_page once VM found the page migration failed.
It's based on assumption:

1. Since it's temporal page refcount, it could be released soon
   before overflowing dmesg log buffer
2. developer can find the pair of get_page by reviewing put_page.

By default, it's eanbled. If you want to disable it:

  echo 0 > $debugfs/page_pinner/failure_tracking

You can capture the tracking using:

  cat $debugfs/page_pinner/alloc_contig_failed

note: the example below is artificial:

Page pinned ts 386067292 us count 0
PFN 10162530 Block 9924 type Isolate Flags 0x800000000008000c(uptodate|dirty|swapbacked)
 __page_pinner_migration_failed+0x30/0x104
 putback_lru_page+0x90/0xac
 putback_movable_pages+0xc4/0x204
 __alloc_contig_migrate_range+0x290/0x31c
 alloc_contig_range+0x114/0x2bc
 cma_alloc+0x2d8/0x698
 cma_alloc_write+0x58/0xb8
 simple_attr_write+0xd4/0x124
 debugfs_attr_write+0x50/0xd8
 full_proxy_write+0x70/0xf8
 vfs_write+0x168/0x3a8
 ksys_write+0x7c/0xec
 __arm64_sys_write+0x20/0x30
 el0_svc_common+0xa4/0x180
 do_el0_svc+0x28/0x88
 el0_svc+0x14/0x24

Page pinned ts 385867394 us count 0
PFN 10162530 Block 9924 type Isolate Flags 0x800000000008000c(uptodate|dirty|swapbacked)
 __page_pinner_migration_failed+0x30/0x104
 __alloc_contig_migrate_range+0x200/0x31c
 alloc_contig_range+0x114/0x2bc
 cma_alloc+0x2d8/0x698
 cma_alloc_write+0x58/0xb8
 simple_attr_write+0xd4/0x124
 debugfs_attr_write+0x50/0xd8
 full_proxy_write+0x70/0xf8
 vfs_write+0x168/0x3a8
 ksys_write+0x7c/0xec
 __arm64_sys_write+0x20/0x30
 el0_svc_common+0xa4/0x180
 do_el0_svc+0x28/0x88
 el0_svc+0x14/0x24
 el0_sync_handler+0x88/0xec
 el0_sync+0x198/0x1c0

Bug: 183414571
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Minchan Kim <minchan@google.com>
Change-Id: Ie79902c18390eb9f320d823839bb9d9a7fdcdb31
This commit is contained in:
Minchan Kim
2021-03-29 16:48:47 -07:00
committed by Minchan Kim
parent 6e12c5b7d4
commit ddc4a48797
5 changed files with 150 additions and 0 deletions

View File

@@ -1232,6 +1232,8 @@ static inline void put_page(struct page *page)
{ {
page = compound_head(page); page = compound_head(page);
page_pinner_migration_failed(page);
/* /*
* For devmap managed pages we need to catch refcount transition from * For devmap managed pages we need to catch refcount transition from
* 2 to 1, when refcount reach one it means the page is free and we * 2 to 1, when refcount reach one it means the page is free and we

View File

@@ -22,6 +22,8 @@ enum page_ext_flags {
#if defined(CONFIG_PAGE_PINNER) #if defined(CONFIG_PAGE_PINNER)
/* page refcount was increased by GUP or follow_page(FOLL_GET) */ /* page refcount was increased by GUP or follow_page(FOLL_GET) */
PAGE_EXT_GET, PAGE_EXT_GET,
/* page migration failed */
PAGE_EXT_PINNER_MIGRATION_FAILED,
#endif #endif
#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT) #if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
PAGE_EXT_YOUNG, PAGE_EXT_YOUNG,

View File

@@ -6,11 +6,14 @@
#ifdef CONFIG_PAGE_PINNER #ifdef CONFIG_PAGE_PINNER
extern struct static_key_false page_pinner_inited; extern struct static_key_false page_pinner_inited;
extern struct static_key_true failure_tracking;
extern struct page_ext_operations page_pinner_ops; extern struct page_ext_operations page_pinner_ops;
extern void __reset_page_pinner(struct page *page, unsigned int order, bool free); extern void __reset_page_pinner(struct page *page, unsigned int order, bool free);
extern void __set_page_pinner(struct page *page, unsigned int order); extern void __set_page_pinner(struct page *page, unsigned int order);
extern void __dump_page_pinner(struct page *page); extern void __dump_page_pinner(struct page *page);
void __page_pinner_migration_failed(struct page *page);
void __page_pinner_mark_migration_failed_pages(struct list_head *page_list);
static inline void reset_page_pinner(struct page *page, unsigned int order) static inline void reset_page_pinner(struct page *page, unsigned int order)
{ {
@@ -35,6 +38,22 @@ static inline void dump_page_pinner(struct page *page)
if (static_branch_unlikely(&page_pinner_inited)) if (static_branch_unlikely(&page_pinner_inited))
__dump_page_pinner(page); __dump_page_pinner(page);
} }
static inline void page_pinner_migration_failed(struct page *page)
{
if (!static_branch_unlikely(&failure_tracking))
return;
__page_pinner_migration_failed(page);
}
static inline void page_pinner_mark_migration_failed_pages(struct list_head *page_list)
{
if (!static_branch_unlikely(&failure_tracking))
return;
__page_pinner_mark_migration_failed_pages(page_list);
}
#else #else
static inline void reset_page_pinner(struct page *page, unsigned int order) static inline void reset_page_pinner(struct page *page, unsigned int order)
{ {
@@ -48,5 +67,11 @@ static inline void set_page_pinner(struct page *page, unsigned int order)
static inline void dump_page_pinner(struct page *page) static inline void dump_page_pinner(struct page *page)
{ {
} }
static inline void page_pinner_migration_failed(struct page *page)
{
}
static inline void page_pinner_mark_migration_failed_pages(struct list_head *page_list)
{
}
#endif /* CONFIG_PAGE_PINNER */ #endif /* CONFIG_PAGE_PINNER */
#endif /* __LINUX_PAGE_PINNER_H */ #endif /* __LINUX_PAGE_PINNER_H */

View File

@@ -8608,6 +8608,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
lru_cache_enable(); lru_cache_enable();
if (ret < 0) { if (ret < 0) {
alloc_contig_dump_pages(&cc->migratepages); alloc_contig_dump_pages(&cc->migratepages);
page_pinner_mark_migration_failed_pages(&cc->migratepages);
putback_movable_pages(&cc->migratepages); putback_movable_pages(&cc->migratepages);
return ret; return ret;
} }

View File

@@ -43,9 +43,17 @@ static struct longterm_pinner lt_pinner = {
static s64 threshold_usec = 300000; static s64 threshold_usec = 300000;
/* alloc_contig failed pinner */
static struct longterm_pinner acf_pinner = {
.lock = __SPIN_LOCK_UNLOCKED(acf_pinner.lock),
};
static bool page_pinner_enabled; static bool page_pinner_enabled;
DEFINE_STATIC_KEY_FALSE(page_pinner_inited); DEFINE_STATIC_KEY_FALSE(page_pinner_inited);
DEFINE_STATIC_KEY_TRUE(failure_tracking);
EXPORT_SYMBOL(failure_tracking);
static depot_stack_handle_t failure_handle; static depot_stack_handle_t failure_handle;
static int __init early_page_pinner_param(char *buf) static int __init early_page_pinner_param(char *buf)
@@ -150,6 +158,7 @@ void __reset_page_pinner(struct page *page, unsigned int order, bool free)
if (free) { if (free) {
WARN_ON_ONCE(atomic_read(&page_pinner->count)); WARN_ON_ONCE(atomic_read(&page_pinner->count));
atomic_set(&page_pinner->count, 0); atomic_set(&page_pinner->count, 0);
__clear_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
} else { } else {
WARN_ON_ONCE(atomic_dec_if_positive( WARN_ON_ONCE(atomic_dec_if_positive(
&page_pinner->count) < 0); &page_pinner->count) < 0);
@@ -289,6 +298,49 @@ void __dump_page_pinner(struct page *page)
} }
} }
void __page_pinner_migration_failed(struct page *page)
{
struct page_ext *page_ext = lookup_page_ext(page);
struct page_pinner *page_pinner;
depot_stack_handle_t handle;
unsigned long flags;
unsigned int idx;
if (unlikely(!page_ext))
return;
page_pinner = get_page_pinner(page_ext);
if (!test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags))
return;
handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
spin_lock_irqsave(&acf_pinner.lock, flags);
idx = acf_pinner.index++;
acf_pinner.index %= LONTERM_PIN_BUCKETS;
acf_pinner.pinner[idx].handle = handle;
acf_pinner.pinner[idx].ts_usec = ktime_to_us(ktime_get_boottime());
acf_pinner.pinner[idx].page_flags = page->flags;
acf_pinner.pinner[idx].page_mt = get_pageblock_migratetype(page);
acf_pinner.pinner[idx].pfn = page_to_pfn(page);
spin_unlock_irqrestore(&acf_pinner.lock, flags);
}
EXPORT_SYMBOL(__page_pinner_migration_failed);
void __page_pinner_mark_migration_failed_pages(struct list_head *page_list)
{
struct page *page;
struct page_ext *page_ext;
list_for_each_entry(page, page_list, lru) {
page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue;
__set_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
}
}
static ssize_t static ssize_t
read_longterm_page_pinner(struct file *file, char __user *buf, size_t count, read_longterm_page_pinner(struct file *file, char __user *buf, size_t count,
loff_t *ppos) loff_t *ppos)
@@ -327,6 +379,44 @@ static const struct file_operations proc_longterm_pinner_operations = {
.read = read_longterm_page_pinner, .read = read_longterm_page_pinner,
}; };
static ssize_t read_alloc_contig_failed(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
loff_t i, idx;
struct captured_pinner record;
unsigned long flags;
if (!static_branch_unlikely(&failure_tracking))
return -EINVAL;
if (*ppos >= LONTERM_PIN_BUCKETS)
return 0;
i = *ppos;
*ppos = i + 1;
/*
* reading the records in the reverse order with newest one
* being read first followed by older ones
*/
idx = (acf_pinner.index - 1 - i + LONTERM_PIN_BUCKETS) %
LONTERM_PIN_BUCKETS;
spin_lock_irqsave(&acf_pinner.lock, flags);
record = acf_pinner.pinner[idx];
spin_unlock_irqrestore(&acf_pinner.lock, flags);
if (!record.handle)
return 0;
return print_page_pinner(buf, count, record.pfn, record.page_mt,
record.page_flags, record.ts_usec,
record.handle, 0);
}
static const struct file_operations proc_alloc_contig_failed_operations = {
.read = read_alloc_contig_failed,
};
static int pp_threshold_set(void *data, unsigned long long val) static int pp_threshold_set(void *data, unsigned long long val)
{ {
unsigned long flags; unsigned long flags;
@@ -350,6 +440,27 @@ static int pp_threshold_get(void *data, unsigned long long *val)
DEFINE_DEBUGFS_ATTRIBUTE(pp_threshold_fops, pp_threshold_get, DEFINE_DEBUGFS_ATTRIBUTE(pp_threshold_fops, pp_threshold_get,
pp_threshold_set, "%lld\n"); pp_threshold_set, "%lld\n");
static int failure_tracking_set(void *data, u64 val)
{
bool on;
on = (bool)val;
if (on)
static_branch_enable(&failure_tracking);
else
static_branch_disable(&failure_tracking);
return 0;
}
static int failure_tracking_get(void *data, u64 *val)
{
*val = static_branch_unlikely(&failure_tracking);
return 0;
}
DEFINE_DEBUGFS_ATTRIBUTE(failure_tracking_fops,
failure_tracking_get,
failure_tracking_set, "%llu\n");
static int __init page_pinner_init(void) static int __init page_pinner_init(void)
{ {
struct dentry *pp_debugfs_root; struct dentry *pp_debugfs_root;
@@ -358,6 +469,7 @@ static int __init page_pinner_init(void)
return 0; return 0;
pr_info("page_pinner enabled\n"); pr_info("page_pinner enabled\n");
pp_debugfs_root = debugfs_create_dir("page_pinner", NULL); pp_debugfs_root = debugfs_create_dir("page_pinner", NULL);
debugfs_create_file("longterm_pinner", 0400, pp_debugfs_root, NULL, debugfs_create_file("longterm_pinner", 0400, pp_debugfs_root, NULL,
@@ -365,6 +477,14 @@ static int __init page_pinner_init(void)
debugfs_create_file("threshold", 0444, pp_debugfs_root, NULL, debugfs_create_file("threshold", 0444, pp_debugfs_root, NULL,
&pp_threshold_fops); &pp_threshold_fops);
debugfs_create_file("alloc_contig_failed", 0400,
pp_debugfs_root, NULL,
&proc_alloc_contig_failed_operations);
debugfs_create_file("failure_tracking", 0444,
pp_debugfs_root, NULL,
&failure_tracking_fops);
return 0; return 0;
} }
late_initcall(page_pinner_init) late_initcall(page_pinner_init)