From ddc4a48797352076586ef3ab79c6bfaba08fd06e Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 29 Mar 2021 16:48:47 -0700 Subject: [PATCH] ANDROID: mm: page_pinner: introduce failure_tracking feature CMA allocation can fail by temporal page refcount increasement by get_page API as well as get_user_pages friends. However, since get_page is one of the most hot function, it is hard to hook get_page to get callstack everytime due to performance concern. Furthermore, get_page could be nested multiple times so we couldn't track all of the pin sites on limited space of page_pinner. Thus, here approach is keep tracking of put_page callsite rather than get_page once VM found the page migration failed. It's based on assumption: 1. Since it's temporal page refcount, it could be released soon before overflowing dmesg log buffer 2. developer can find the pair of get_page by reviewing put_page. By default, it's eanbled. If you want to disable it: echo 0 > $debugfs/page_pinner/failure_tracking You can capture the tracking using: cat $debugfs/page_pinner/alloc_contig_failed note: the example below is artificial: Page pinned ts 386067292 us count 0 PFN 10162530 Block 9924 type Isolate Flags 0x800000000008000c(uptodate|dirty|swapbacked) __page_pinner_migration_failed+0x30/0x104 putback_lru_page+0x90/0xac putback_movable_pages+0xc4/0x204 __alloc_contig_migrate_range+0x290/0x31c alloc_contig_range+0x114/0x2bc cma_alloc+0x2d8/0x698 cma_alloc_write+0x58/0xb8 simple_attr_write+0xd4/0x124 debugfs_attr_write+0x50/0xd8 full_proxy_write+0x70/0xf8 vfs_write+0x168/0x3a8 ksys_write+0x7c/0xec __arm64_sys_write+0x20/0x30 el0_svc_common+0xa4/0x180 do_el0_svc+0x28/0x88 el0_svc+0x14/0x24 Page pinned ts 385867394 us count 0 PFN 10162530 Block 9924 type Isolate Flags 0x800000000008000c(uptodate|dirty|swapbacked) __page_pinner_migration_failed+0x30/0x104 __alloc_contig_migrate_range+0x200/0x31c alloc_contig_range+0x114/0x2bc cma_alloc+0x2d8/0x698 cma_alloc_write+0x58/0xb8 simple_attr_write+0xd4/0x124 debugfs_attr_write+0x50/0xd8 full_proxy_write+0x70/0xf8 vfs_write+0x168/0x3a8 ksys_write+0x7c/0xec __arm64_sys_write+0x20/0x30 el0_svc_common+0xa4/0x180 do_el0_svc+0x28/0x88 el0_svc+0x14/0x24 el0_sync_handler+0x88/0xec el0_sync+0x198/0x1c0 Bug: 183414571 Signed-off-by: Minchan Kim Signed-off-by: Minchan Kim Change-Id: Ie79902c18390eb9f320d823839bb9d9a7fdcdb31 --- include/linux/mm.h | 2 + include/linux/page_ext.h | 2 + include/linux/page_pinner.h | 25 ++++++++ mm/page_alloc.c | 1 + mm/page_pinner.c | 120 ++++++++++++++++++++++++++++++++++++ 5 files changed, 150 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index ed171ec53a0a..1883e5922114 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1232,6 +1232,8 @@ static inline void put_page(struct page *page) { page = compound_head(page); + page_pinner_migration_failed(page); + /* * For devmap managed pages we need to catch refcount transition from * 2 to 1, when refcount reach one it means the page is free and we diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index ce55d8f6bc27..cd45c1927d90 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -22,6 +22,8 @@ enum page_ext_flags { #if defined(CONFIG_PAGE_PINNER) /* page refcount was increased by GUP or follow_page(FOLL_GET) */ PAGE_EXT_GET, + /* page migration failed */ + PAGE_EXT_PINNER_MIGRATION_FAILED, #endif #if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT) PAGE_EXT_YOUNG, diff --git a/include/linux/page_pinner.h b/include/linux/page_pinner.h index 013e3bcb40e8..e03ff271bea5 100644 --- a/include/linux/page_pinner.h +++ b/include/linux/page_pinner.h @@ -6,11 +6,14 @@ #ifdef CONFIG_PAGE_PINNER extern struct static_key_false page_pinner_inited; +extern struct static_key_true failure_tracking; extern struct page_ext_operations page_pinner_ops; extern void __reset_page_pinner(struct page *page, unsigned int order, bool free); extern void __set_page_pinner(struct page *page, unsigned int order); extern void __dump_page_pinner(struct page *page); +void __page_pinner_migration_failed(struct page *page); +void __page_pinner_mark_migration_failed_pages(struct list_head *page_list); static inline void reset_page_pinner(struct page *page, unsigned int order) { @@ -35,6 +38,22 @@ static inline void dump_page_pinner(struct page *page) if (static_branch_unlikely(&page_pinner_inited)) __dump_page_pinner(page); } + +static inline void page_pinner_migration_failed(struct page *page) +{ + if (!static_branch_unlikely(&failure_tracking)) + return; + + __page_pinner_migration_failed(page); +} + +static inline void page_pinner_mark_migration_failed_pages(struct list_head *page_list) +{ + if (!static_branch_unlikely(&failure_tracking)) + return; + + __page_pinner_mark_migration_failed_pages(page_list); +} #else static inline void reset_page_pinner(struct page *page, unsigned int order) { @@ -48,5 +67,11 @@ static inline void set_page_pinner(struct page *page, unsigned int order) static inline void dump_page_pinner(struct page *page) { } +static inline void page_pinner_migration_failed(struct page *page) +{ +} +static inline void page_pinner_mark_migration_failed_pages(struct list_head *page_list) +{ +} #endif /* CONFIG_PAGE_PINNER */ #endif /* __LINUX_PAGE_PINNER_H */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bbe03f36406c..9b5ec5c9bc70 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -8608,6 +8608,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, lru_cache_enable(); if (ret < 0) { alloc_contig_dump_pages(&cc->migratepages); + page_pinner_mark_migration_failed_pages(&cc->migratepages); putback_movable_pages(&cc->migratepages); return ret; } diff --git a/mm/page_pinner.c b/mm/page_pinner.c index f48a3d2554ef..9bf41de47e9a 100644 --- a/mm/page_pinner.c +++ b/mm/page_pinner.c @@ -43,9 +43,17 @@ static struct longterm_pinner lt_pinner = { static s64 threshold_usec = 300000; +/* alloc_contig failed pinner */ +static struct longterm_pinner acf_pinner = { + .lock = __SPIN_LOCK_UNLOCKED(acf_pinner.lock), +}; + static bool page_pinner_enabled; DEFINE_STATIC_KEY_FALSE(page_pinner_inited); +DEFINE_STATIC_KEY_TRUE(failure_tracking); +EXPORT_SYMBOL(failure_tracking); + static depot_stack_handle_t failure_handle; static int __init early_page_pinner_param(char *buf) @@ -150,6 +158,7 @@ void __reset_page_pinner(struct page *page, unsigned int order, bool free) if (free) { WARN_ON_ONCE(atomic_read(&page_pinner->count)); atomic_set(&page_pinner->count, 0); + __clear_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags); } else { WARN_ON_ONCE(atomic_dec_if_positive( &page_pinner->count) < 0); @@ -289,6 +298,49 @@ void __dump_page_pinner(struct page *page) } } +void __page_pinner_migration_failed(struct page *page) +{ + struct page_ext *page_ext = lookup_page_ext(page); + struct page_pinner *page_pinner; + depot_stack_handle_t handle; + unsigned long flags; + unsigned int idx; + + if (unlikely(!page_ext)) + return; + + page_pinner = get_page_pinner(page_ext); + if (!test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags)) + return; + + handle = save_stack(GFP_NOWAIT|__GFP_NOWARN); + + spin_lock_irqsave(&acf_pinner.lock, flags); + idx = acf_pinner.index++; + acf_pinner.index %= LONTERM_PIN_BUCKETS; + + acf_pinner.pinner[idx].handle = handle; + acf_pinner.pinner[idx].ts_usec = ktime_to_us(ktime_get_boottime()); + acf_pinner.pinner[idx].page_flags = page->flags; + acf_pinner.pinner[idx].page_mt = get_pageblock_migratetype(page); + acf_pinner.pinner[idx].pfn = page_to_pfn(page); + spin_unlock_irqrestore(&acf_pinner.lock, flags); +} +EXPORT_SYMBOL(__page_pinner_migration_failed); + +void __page_pinner_mark_migration_failed_pages(struct list_head *page_list) +{ + struct page *page; + struct page_ext *page_ext; + + list_for_each_entry(page, page_list, lru) { + page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + continue; + __set_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags); + } +} + static ssize_t read_longterm_page_pinner(struct file *file, char __user *buf, size_t count, loff_t *ppos) @@ -327,6 +379,44 @@ static const struct file_operations proc_longterm_pinner_operations = { .read = read_longterm_page_pinner, }; +static ssize_t read_alloc_contig_failed(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + loff_t i, idx; + struct captured_pinner record; + unsigned long flags; + + if (!static_branch_unlikely(&failure_tracking)) + return -EINVAL; + + if (*ppos >= LONTERM_PIN_BUCKETS) + return 0; + + i = *ppos; + *ppos = i + 1; + + /* + * reading the records in the reverse order with newest one + * being read first followed by older ones + */ + idx = (acf_pinner.index - 1 - i + LONTERM_PIN_BUCKETS) % + LONTERM_PIN_BUCKETS; + + spin_lock_irqsave(&acf_pinner.lock, flags); + record = acf_pinner.pinner[idx]; + spin_unlock_irqrestore(&acf_pinner.lock, flags); + if (!record.handle) + return 0; + + return print_page_pinner(buf, count, record.pfn, record.page_mt, + record.page_flags, record.ts_usec, + record.handle, 0); +} + +static const struct file_operations proc_alloc_contig_failed_operations = { + .read = read_alloc_contig_failed, +}; + static int pp_threshold_set(void *data, unsigned long long val) { unsigned long flags; @@ -350,6 +440,27 @@ static int pp_threshold_get(void *data, unsigned long long *val) DEFINE_DEBUGFS_ATTRIBUTE(pp_threshold_fops, pp_threshold_get, pp_threshold_set, "%lld\n"); +static int failure_tracking_set(void *data, u64 val) +{ + bool on; + + on = (bool)val; + if (on) + static_branch_enable(&failure_tracking); + else + static_branch_disable(&failure_tracking); + return 0; +} + +static int failure_tracking_get(void *data, u64 *val) +{ + *val = static_branch_unlikely(&failure_tracking); + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(failure_tracking_fops, + failure_tracking_get, + failure_tracking_set, "%llu\n"); + static int __init page_pinner_init(void) { struct dentry *pp_debugfs_root; @@ -358,6 +469,7 @@ static int __init page_pinner_init(void) return 0; pr_info("page_pinner enabled\n"); + pp_debugfs_root = debugfs_create_dir("page_pinner", NULL); debugfs_create_file("longterm_pinner", 0400, pp_debugfs_root, NULL, @@ -365,6 +477,14 @@ static int __init page_pinner_init(void) debugfs_create_file("threshold", 0444, pp_debugfs_root, NULL, &pp_threshold_fops); + + debugfs_create_file("alloc_contig_failed", 0400, + pp_debugfs_root, NULL, + &proc_alloc_contig_failed_operations); + + debugfs_create_file("failure_tracking", 0444, + pp_debugfs_root, NULL, + &failure_tracking_fops); return 0; } late_initcall(page_pinner_init)