diff --git a/include/linux/mm.h b/include/linux/mm.h index ed171ec53a0a..1883e5922114 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1232,6 +1232,8 @@ static inline void put_page(struct page *page) { page = compound_head(page); + page_pinner_migration_failed(page); + /* * For devmap managed pages we need to catch refcount transition from * 2 to 1, when refcount reach one it means the page is free and we diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index ce55d8f6bc27..cd45c1927d90 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -22,6 +22,8 @@ enum page_ext_flags { #if defined(CONFIG_PAGE_PINNER) /* page refcount was increased by GUP or follow_page(FOLL_GET) */ PAGE_EXT_GET, + /* page migration failed */ + PAGE_EXT_PINNER_MIGRATION_FAILED, #endif #if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT) PAGE_EXT_YOUNG, diff --git a/include/linux/page_pinner.h b/include/linux/page_pinner.h index 013e3bcb40e8..e03ff271bea5 100644 --- a/include/linux/page_pinner.h +++ b/include/linux/page_pinner.h @@ -6,11 +6,14 @@ #ifdef CONFIG_PAGE_PINNER extern struct static_key_false page_pinner_inited; +extern struct static_key_true failure_tracking; extern struct page_ext_operations page_pinner_ops; extern void __reset_page_pinner(struct page *page, unsigned int order, bool free); extern void __set_page_pinner(struct page *page, unsigned int order); extern void __dump_page_pinner(struct page *page); +void __page_pinner_migration_failed(struct page *page); +void __page_pinner_mark_migration_failed_pages(struct list_head *page_list); static inline void reset_page_pinner(struct page *page, unsigned int order) { @@ -35,6 +38,22 @@ static inline void dump_page_pinner(struct page *page) if (static_branch_unlikely(&page_pinner_inited)) __dump_page_pinner(page); } + +static inline void page_pinner_migration_failed(struct page *page) +{ + if (!static_branch_unlikely(&failure_tracking)) + return; + + __page_pinner_migration_failed(page); +} + +static inline void page_pinner_mark_migration_failed_pages(struct list_head *page_list) +{ + if (!static_branch_unlikely(&failure_tracking)) + return; + + __page_pinner_mark_migration_failed_pages(page_list); +} #else static inline void reset_page_pinner(struct page *page, unsigned int order) { @@ -48,5 +67,11 @@ static inline void set_page_pinner(struct page *page, unsigned int order) static inline void dump_page_pinner(struct page *page) { } +static inline void page_pinner_migration_failed(struct page *page) +{ +} +static inline void page_pinner_mark_migration_failed_pages(struct list_head *page_list) +{ +} #endif /* CONFIG_PAGE_PINNER */ #endif /* __LINUX_PAGE_PINNER_H */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bbe03f36406c..9b5ec5c9bc70 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -8608,6 +8608,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, lru_cache_enable(); if (ret < 0) { alloc_contig_dump_pages(&cc->migratepages); + page_pinner_mark_migration_failed_pages(&cc->migratepages); putback_movable_pages(&cc->migratepages); return ret; } diff --git a/mm/page_pinner.c b/mm/page_pinner.c index f48a3d2554ef..9bf41de47e9a 100644 --- a/mm/page_pinner.c +++ b/mm/page_pinner.c @@ -43,9 +43,17 @@ static struct longterm_pinner lt_pinner = { static s64 threshold_usec = 300000; +/* alloc_contig failed pinner */ +static struct longterm_pinner acf_pinner = { + .lock = __SPIN_LOCK_UNLOCKED(acf_pinner.lock), +}; + static bool page_pinner_enabled; DEFINE_STATIC_KEY_FALSE(page_pinner_inited); +DEFINE_STATIC_KEY_TRUE(failure_tracking); +EXPORT_SYMBOL(failure_tracking); + static depot_stack_handle_t failure_handle; static int __init early_page_pinner_param(char *buf) @@ -150,6 +158,7 @@ void __reset_page_pinner(struct page *page, unsigned int order, bool free) if (free) { WARN_ON_ONCE(atomic_read(&page_pinner->count)); atomic_set(&page_pinner->count, 0); + __clear_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags); } else { WARN_ON_ONCE(atomic_dec_if_positive( &page_pinner->count) < 0); @@ -289,6 +298,49 @@ void __dump_page_pinner(struct page *page) } } +void __page_pinner_migration_failed(struct page *page) +{ + struct page_ext *page_ext = lookup_page_ext(page); + struct page_pinner *page_pinner; + depot_stack_handle_t handle; + unsigned long flags; + unsigned int idx; + + if (unlikely(!page_ext)) + return; + + page_pinner = get_page_pinner(page_ext); + if (!test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags)) + return; + + handle = save_stack(GFP_NOWAIT|__GFP_NOWARN); + + spin_lock_irqsave(&acf_pinner.lock, flags); + idx = acf_pinner.index++; + acf_pinner.index %= LONTERM_PIN_BUCKETS; + + acf_pinner.pinner[idx].handle = handle; + acf_pinner.pinner[idx].ts_usec = ktime_to_us(ktime_get_boottime()); + acf_pinner.pinner[idx].page_flags = page->flags; + acf_pinner.pinner[idx].page_mt = get_pageblock_migratetype(page); + acf_pinner.pinner[idx].pfn = page_to_pfn(page); + spin_unlock_irqrestore(&acf_pinner.lock, flags); +} +EXPORT_SYMBOL(__page_pinner_migration_failed); + +void __page_pinner_mark_migration_failed_pages(struct list_head *page_list) +{ + struct page *page; + struct page_ext *page_ext; + + list_for_each_entry(page, page_list, lru) { + page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + continue; + __set_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags); + } +} + static ssize_t read_longterm_page_pinner(struct file *file, char __user *buf, size_t count, loff_t *ppos) @@ -327,6 +379,44 @@ static const struct file_operations proc_longterm_pinner_operations = { .read = read_longterm_page_pinner, }; +static ssize_t read_alloc_contig_failed(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + loff_t i, idx; + struct captured_pinner record; + unsigned long flags; + + if (!static_branch_unlikely(&failure_tracking)) + return -EINVAL; + + if (*ppos >= LONTERM_PIN_BUCKETS) + return 0; + + i = *ppos; + *ppos = i + 1; + + /* + * reading the records in the reverse order with newest one + * being read first followed by older ones + */ + idx = (acf_pinner.index - 1 - i + LONTERM_PIN_BUCKETS) % + LONTERM_PIN_BUCKETS; + + spin_lock_irqsave(&acf_pinner.lock, flags); + record = acf_pinner.pinner[idx]; + spin_unlock_irqrestore(&acf_pinner.lock, flags); + if (!record.handle) + return 0; + + return print_page_pinner(buf, count, record.pfn, record.page_mt, + record.page_flags, record.ts_usec, + record.handle, 0); +} + +static const struct file_operations proc_alloc_contig_failed_operations = { + .read = read_alloc_contig_failed, +}; + static int pp_threshold_set(void *data, unsigned long long val) { unsigned long flags; @@ -350,6 +440,27 @@ static int pp_threshold_get(void *data, unsigned long long *val) DEFINE_DEBUGFS_ATTRIBUTE(pp_threshold_fops, pp_threshold_get, pp_threshold_set, "%lld\n"); +static int failure_tracking_set(void *data, u64 val) +{ + bool on; + + on = (bool)val; + if (on) + static_branch_enable(&failure_tracking); + else + static_branch_disable(&failure_tracking); + return 0; +} + +static int failure_tracking_get(void *data, u64 *val) +{ + *val = static_branch_unlikely(&failure_tracking); + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(failure_tracking_fops, + failure_tracking_get, + failure_tracking_set, "%llu\n"); + static int __init page_pinner_init(void) { struct dentry *pp_debugfs_root; @@ -358,6 +469,7 @@ static int __init page_pinner_init(void) return 0; pr_info("page_pinner enabled\n"); + pp_debugfs_root = debugfs_create_dir("page_pinner", NULL); debugfs_create_file("longterm_pinner", 0400, pp_debugfs_root, NULL, @@ -365,6 +477,14 @@ static int __init page_pinner_init(void) debugfs_create_file("threshold", 0444, pp_debugfs_root, NULL, &pp_threshold_fops); + + debugfs_create_file("alloc_contig_failed", 0400, + pp_debugfs_root, NULL, + &proc_alloc_contig_failed_operations); + + debugfs_create_file("failure_tracking", 0444, + pp_debugfs_root, NULL, + &failure_tracking_fops); return 0; } late_initcall(page_pinner_init)