ANDROID: mm: page_pinner: introduce failure_tracking feature
CMA allocation can fail by temporal page refcount increasement by get_page API as well as get_user_pages friends. However, since get_page is one of the most hot function, it is hard to hook get_page to get callstack everytime due to performance concern. Furthermore, get_page could be nested multiple times so we couldn't track all of the pin sites on limited space of page_pinner. Thus, here approach is keep tracking of put_page callsite rather than get_page once VM found the page migration failed. It's based on assumption: 1. Since it's temporal page refcount, it could be released soon before overflowing dmesg log buffer 2. developer can find the pair of get_page by reviewing put_page. By default, it's eanbled. If you want to disable it: echo 0 > $debugfs/page_pinner/failure_tracking You can capture the tracking using: cat $debugfs/page_pinner/alloc_contig_failed note: the example below is artificial: Page pinned ts 386067292 us count 0 PFN 10162530 Block 9924 type Isolate Flags 0x800000000008000c(uptodate|dirty|swapbacked) __page_pinner_migration_failed+0x30/0x104 putback_lru_page+0x90/0xac putback_movable_pages+0xc4/0x204 __alloc_contig_migrate_range+0x290/0x31c alloc_contig_range+0x114/0x2bc cma_alloc+0x2d8/0x698 cma_alloc_write+0x58/0xb8 simple_attr_write+0xd4/0x124 debugfs_attr_write+0x50/0xd8 full_proxy_write+0x70/0xf8 vfs_write+0x168/0x3a8 ksys_write+0x7c/0xec __arm64_sys_write+0x20/0x30 el0_svc_common+0xa4/0x180 do_el0_svc+0x28/0x88 el0_svc+0x14/0x24 Page pinned ts 385867394 us count 0 PFN 10162530 Block 9924 type Isolate Flags 0x800000000008000c(uptodate|dirty|swapbacked) __page_pinner_migration_failed+0x30/0x104 __alloc_contig_migrate_range+0x200/0x31c alloc_contig_range+0x114/0x2bc cma_alloc+0x2d8/0x698 cma_alloc_write+0x58/0xb8 simple_attr_write+0xd4/0x124 debugfs_attr_write+0x50/0xd8 full_proxy_write+0x70/0xf8 vfs_write+0x168/0x3a8 ksys_write+0x7c/0xec __arm64_sys_write+0x20/0x30 el0_svc_common+0xa4/0x180 do_el0_svc+0x28/0x88 el0_svc+0x14/0x24 el0_sync_handler+0x88/0xec el0_sync+0x198/0x1c0 Bug: 183414571 Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Minchan Kim <minchan@google.com> Change-Id: Ie79902c18390eb9f320d823839bb9d9a7fdcdb31
This commit is contained in:
@@ -1232,6 +1232,8 @@ static inline void put_page(struct page *page)
|
|||||||
{
|
{
|
||||||
page = compound_head(page);
|
page = compound_head(page);
|
||||||
|
|
||||||
|
page_pinner_migration_failed(page);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For devmap managed pages we need to catch refcount transition from
|
* For devmap managed pages we need to catch refcount transition from
|
||||||
* 2 to 1, when refcount reach one it means the page is free and we
|
* 2 to 1, when refcount reach one it means the page is free and we
|
||||||
|
@@ -22,6 +22,8 @@ enum page_ext_flags {
|
|||||||
#if defined(CONFIG_PAGE_PINNER)
|
#if defined(CONFIG_PAGE_PINNER)
|
||||||
/* page refcount was increased by GUP or follow_page(FOLL_GET) */
|
/* page refcount was increased by GUP or follow_page(FOLL_GET) */
|
||||||
PAGE_EXT_GET,
|
PAGE_EXT_GET,
|
||||||
|
/* page migration failed */
|
||||||
|
PAGE_EXT_PINNER_MIGRATION_FAILED,
|
||||||
#endif
|
#endif
|
||||||
#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
|
#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
|
||||||
PAGE_EXT_YOUNG,
|
PAGE_EXT_YOUNG,
|
||||||
|
@@ -6,11 +6,14 @@
|
|||||||
|
|
||||||
#ifdef CONFIG_PAGE_PINNER
|
#ifdef CONFIG_PAGE_PINNER
|
||||||
extern struct static_key_false page_pinner_inited;
|
extern struct static_key_false page_pinner_inited;
|
||||||
|
extern struct static_key_true failure_tracking;
|
||||||
extern struct page_ext_operations page_pinner_ops;
|
extern struct page_ext_operations page_pinner_ops;
|
||||||
|
|
||||||
extern void __reset_page_pinner(struct page *page, unsigned int order, bool free);
|
extern void __reset_page_pinner(struct page *page, unsigned int order, bool free);
|
||||||
extern void __set_page_pinner(struct page *page, unsigned int order);
|
extern void __set_page_pinner(struct page *page, unsigned int order);
|
||||||
extern void __dump_page_pinner(struct page *page);
|
extern void __dump_page_pinner(struct page *page);
|
||||||
|
void __page_pinner_migration_failed(struct page *page);
|
||||||
|
void __page_pinner_mark_migration_failed_pages(struct list_head *page_list);
|
||||||
|
|
||||||
static inline void reset_page_pinner(struct page *page, unsigned int order)
|
static inline void reset_page_pinner(struct page *page, unsigned int order)
|
||||||
{
|
{
|
||||||
@@ -35,6 +38,22 @@ static inline void dump_page_pinner(struct page *page)
|
|||||||
if (static_branch_unlikely(&page_pinner_inited))
|
if (static_branch_unlikely(&page_pinner_inited))
|
||||||
__dump_page_pinner(page);
|
__dump_page_pinner(page);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void page_pinner_migration_failed(struct page *page)
|
||||||
|
{
|
||||||
|
if (!static_branch_unlikely(&failure_tracking))
|
||||||
|
return;
|
||||||
|
|
||||||
|
__page_pinner_migration_failed(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void page_pinner_mark_migration_failed_pages(struct list_head *page_list)
|
||||||
|
{
|
||||||
|
if (!static_branch_unlikely(&failure_tracking))
|
||||||
|
return;
|
||||||
|
|
||||||
|
__page_pinner_mark_migration_failed_pages(page_list);
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
static inline void reset_page_pinner(struct page *page, unsigned int order)
|
static inline void reset_page_pinner(struct page *page, unsigned int order)
|
||||||
{
|
{
|
||||||
@@ -48,5 +67,11 @@ static inline void set_page_pinner(struct page *page, unsigned int order)
|
|||||||
static inline void dump_page_pinner(struct page *page)
|
static inline void dump_page_pinner(struct page *page)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
static inline void page_pinner_migration_failed(struct page *page)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
static inline void page_pinner_mark_migration_failed_pages(struct list_head *page_list)
|
||||||
|
{
|
||||||
|
}
|
||||||
#endif /* CONFIG_PAGE_PINNER */
|
#endif /* CONFIG_PAGE_PINNER */
|
||||||
#endif /* __LINUX_PAGE_PINNER_H */
|
#endif /* __LINUX_PAGE_PINNER_H */
|
||||||
|
@@ -8608,6 +8608,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
|
|||||||
lru_cache_enable();
|
lru_cache_enable();
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
alloc_contig_dump_pages(&cc->migratepages);
|
alloc_contig_dump_pages(&cc->migratepages);
|
||||||
|
page_pinner_mark_migration_failed_pages(&cc->migratepages);
|
||||||
putback_movable_pages(&cc->migratepages);
|
putback_movable_pages(&cc->migratepages);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
120
mm/page_pinner.c
120
mm/page_pinner.c
@@ -43,9 +43,17 @@ static struct longterm_pinner lt_pinner = {
|
|||||||
|
|
||||||
static s64 threshold_usec = 300000;
|
static s64 threshold_usec = 300000;
|
||||||
|
|
||||||
|
/* alloc_contig failed pinner */
|
||||||
|
static struct longterm_pinner acf_pinner = {
|
||||||
|
.lock = __SPIN_LOCK_UNLOCKED(acf_pinner.lock),
|
||||||
|
};
|
||||||
|
|
||||||
static bool page_pinner_enabled;
|
static bool page_pinner_enabled;
|
||||||
DEFINE_STATIC_KEY_FALSE(page_pinner_inited);
|
DEFINE_STATIC_KEY_FALSE(page_pinner_inited);
|
||||||
|
|
||||||
|
DEFINE_STATIC_KEY_TRUE(failure_tracking);
|
||||||
|
EXPORT_SYMBOL(failure_tracking);
|
||||||
|
|
||||||
static depot_stack_handle_t failure_handle;
|
static depot_stack_handle_t failure_handle;
|
||||||
|
|
||||||
static int __init early_page_pinner_param(char *buf)
|
static int __init early_page_pinner_param(char *buf)
|
||||||
@@ -150,6 +158,7 @@ void __reset_page_pinner(struct page *page, unsigned int order, bool free)
|
|||||||
if (free) {
|
if (free) {
|
||||||
WARN_ON_ONCE(atomic_read(&page_pinner->count));
|
WARN_ON_ONCE(atomic_read(&page_pinner->count));
|
||||||
atomic_set(&page_pinner->count, 0);
|
atomic_set(&page_pinner->count, 0);
|
||||||
|
__clear_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
|
||||||
} else {
|
} else {
|
||||||
WARN_ON_ONCE(atomic_dec_if_positive(
|
WARN_ON_ONCE(atomic_dec_if_positive(
|
||||||
&page_pinner->count) < 0);
|
&page_pinner->count) < 0);
|
||||||
@@ -289,6 +298,49 @@ void __dump_page_pinner(struct page *page)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void __page_pinner_migration_failed(struct page *page)
|
||||||
|
{
|
||||||
|
struct page_ext *page_ext = lookup_page_ext(page);
|
||||||
|
struct page_pinner *page_pinner;
|
||||||
|
depot_stack_handle_t handle;
|
||||||
|
unsigned long flags;
|
||||||
|
unsigned int idx;
|
||||||
|
|
||||||
|
if (unlikely(!page_ext))
|
||||||
|
return;
|
||||||
|
|
||||||
|
page_pinner = get_page_pinner(page_ext);
|
||||||
|
if (!test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags))
|
||||||
|
return;
|
||||||
|
|
||||||
|
handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
|
||||||
|
|
||||||
|
spin_lock_irqsave(&acf_pinner.lock, flags);
|
||||||
|
idx = acf_pinner.index++;
|
||||||
|
acf_pinner.index %= LONTERM_PIN_BUCKETS;
|
||||||
|
|
||||||
|
acf_pinner.pinner[idx].handle = handle;
|
||||||
|
acf_pinner.pinner[idx].ts_usec = ktime_to_us(ktime_get_boottime());
|
||||||
|
acf_pinner.pinner[idx].page_flags = page->flags;
|
||||||
|
acf_pinner.pinner[idx].page_mt = get_pageblock_migratetype(page);
|
||||||
|
acf_pinner.pinner[idx].pfn = page_to_pfn(page);
|
||||||
|
spin_unlock_irqrestore(&acf_pinner.lock, flags);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(__page_pinner_migration_failed);
|
||||||
|
|
||||||
|
void __page_pinner_mark_migration_failed_pages(struct list_head *page_list)
|
||||||
|
{
|
||||||
|
struct page *page;
|
||||||
|
struct page_ext *page_ext;
|
||||||
|
|
||||||
|
list_for_each_entry(page, page_list, lru) {
|
||||||
|
page_ext = lookup_page_ext(page);
|
||||||
|
if (unlikely(!page_ext))
|
||||||
|
continue;
|
||||||
|
__set_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static ssize_t
|
static ssize_t
|
||||||
read_longterm_page_pinner(struct file *file, char __user *buf, size_t count,
|
read_longterm_page_pinner(struct file *file, char __user *buf, size_t count,
|
||||||
loff_t *ppos)
|
loff_t *ppos)
|
||||||
@@ -327,6 +379,44 @@ static const struct file_operations proc_longterm_pinner_operations = {
|
|||||||
.read = read_longterm_page_pinner,
|
.read = read_longterm_page_pinner,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static ssize_t read_alloc_contig_failed(struct file *file, char __user *buf,
|
||||||
|
size_t count, loff_t *ppos)
|
||||||
|
{
|
||||||
|
loff_t i, idx;
|
||||||
|
struct captured_pinner record;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
if (!static_branch_unlikely(&failure_tracking))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (*ppos >= LONTERM_PIN_BUCKETS)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
i = *ppos;
|
||||||
|
*ppos = i + 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* reading the records in the reverse order with newest one
|
||||||
|
* being read first followed by older ones
|
||||||
|
*/
|
||||||
|
idx = (acf_pinner.index - 1 - i + LONTERM_PIN_BUCKETS) %
|
||||||
|
LONTERM_PIN_BUCKETS;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&acf_pinner.lock, flags);
|
||||||
|
record = acf_pinner.pinner[idx];
|
||||||
|
spin_unlock_irqrestore(&acf_pinner.lock, flags);
|
||||||
|
if (!record.handle)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return print_page_pinner(buf, count, record.pfn, record.page_mt,
|
||||||
|
record.page_flags, record.ts_usec,
|
||||||
|
record.handle, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct file_operations proc_alloc_contig_failed_operations = {
|
||||||
|
.read = read_alloc_contig_failed,
|
||||||
|
};
|
||||||
|
|
||||||
static int pp_threshold_set(void *data, unsigned long long val)
|
static int pp_threshold_set(void *data, unsigned long long val)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
@@ -350,6 +440,27 @@ static int pp_threshold_get(void *data, unsigned long long *val)
|
|||||||
DEFINE_DEBUGFS_ATTRIBUTE(pp_threshold_fops, pp_threshold_get,
|
DEFINE_DEBUGFS_ATTRIBUTE(pp_threshold_fops, pp_threshold_get,
|
||||||
pp_threshold_set, "%lld\n");
|
pp_threshold_set, "%lld\n");
|
||||||
|
|
||||||
|
static int failure_tracking_set(void *data, u64 val)
|
||||||
|
{
|
||||||
|
bool on;
|
||||||
|
|
||||||
|
on = (bool)val;
|
||||||
|
if (on)
|
||||||
|
static_branch_enable(&failure_tracking);
|
||||||
|
else
|
||||||
|
static_branch_disable(&failure_tracking);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int failure_tracking_get(void *data, u64 *val)
|
||||||
|
{
|
||||||
|
*val = static_branch_unlikely(&failure_tracking);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
DEFINE_DEBUGFS_ATTRIBUTE(failure_tracking_fops,
|
||||||
|
failure_tracking_get,
|
||||||
|
failure_tracking_set, "%llu\n");
|
||||||
|
|
||||||
static int __init page_pinner_init(void)
|
static int __init page_pinner_init(void)
|
||||||
{
|
{
|
||||||
struct dentry *pp_debugfs_root;
|
struct dentry *pp_debugfs_root;
|
||||||
@@ -358,6 +469,7 @@ static int __init page_pinner_init(void)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
pr_info("page_pinner enabled\n");
|
pr_info("page_pinner enabled\n");
|
||||||
|
|
||||||
pp_debugfs_root = debugfs_create_dir("page_pinner", NULL);
|
pp_debugfs_root = debugfs_create_dir("page_pinner", NULL);
|
||||||
|
|
||||||
debugfs_create_file("longterm_pinner", 0400, pp_debugfs_root, NULL,
|
debugfs_create_file("longterm_pinner", 0400, pp_debugfs_root, NULL,
|
||||||
@@ -365,6 +477,14 @@ static int __init page_pinner_init(void)
|
|||||||
|
|
||||||
debugfs_create_file("threshold", 0444, pp_debugfs_root, NULL,
|
debugfs_create_file("threshold", 0444, pp_debugfs_root, NULL,
|
||||||
&pp_threshold_fops);
|
&pp_threshold_fops);
|
||||||
|
|
||||||
|
debugfs_create_file("alloc_contig_failed", 0400,
|
||||||
|
pp_debugfs_root, NULL,
|
||||||
|
&proc_alloc_contig_failed_operations);
|
||||||
|
|
||||||
|
debugfs_create_file("failure_tracking", 0444,
|
||||||
|
pp_debugfs_root, NULL,
|
||||||
|
&failure_tracking_fops);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
late_initcall(page_pinner_init)
|
late_initcall(page_pinner_init)
|
||||||
|
Reference in New Issue
Block a user