123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434 |
- // SPDX-License-Identifier: GPL-2.0
- #include <linux/debugfs.h>
- #include <linux/mm.h>
- #include <linux/slab.h>
- #include <linux/uaccess.h>
- #include <linux/memblock.h>
- #include <linux/stacktrace.h>
- #include <linux/page_pinner.h>
- #include <linux/jump_label.h>
- #include <linux/migrate.h>
- #include <linux/stackdepot.h>
- #include <linux/seq_file.h>
- #include <linux/sched/clock.h>
- #include "internal.h"
- #define PAGE_PINNER_STACK_DEPTH 16
- static unsigned long pp_buf_size = 4096;
- struct page_pinner {
- depot_stack_handle_t handle;
- u64 ts_usec;
- atomic_t count;
- };
- enum pp_state {
- PP_PUT,
- PP_FREE,
- PP_FAIL_DETECTED,
- };
- struct captured_pinner {
- depot_stack_handle_t handle;
- union {
- u64 ts_usec;
- u64 elapsed;
- };
- /* struct page fields */
- unsigned long pfn;
- int count;
- int mapcount;
- struct address_space *mapping;
- unsigned long flags;
- enum pp_state state;
- };
- struct page_pinner_buffer {
- spinlock_t lock;
- unsigned long index;
- struct captured_pinner *buffer;
- };
- /* alloc_contig failed pinner */
- static struct page_pinner_buffer pp_buffer;
- static bool page_pinner_enabled;
- DEFINE_STATIC_KEY_FALSE(page_pinner_inited);
- EXPORT_SYMBOL_GPL(page_pinner_inited);
- DEFINE_STATIC_KEY_TRUE(failure_tracking);
- static depot_stack_handle_t failure_handle;
- static int __init early_page_pinner_param(char *buf)
- {
- page_pinner_enabled = true;
- return 0;
- }
- early_param("page_pinner", early_page_pinner_param);
- static bool need_page_pinner(void)
- {
- return page_pinner_enabled;
- }
- static noinline void register_failure_stack(void)
- {
- unsigned long entries[4];
- unsigned int nr_entries;
- nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
- failure_handle = stack_depot_save(entries, nr_entries, GFP_KERNEL);
- }
- static void init_page_pinner(void)
- {
- if (!page_pinner_enabled)
- return;
- pp_buffer.buffer = kvmalloc_array(pp_buf_size, sizeof(*pp_buffer.buffer),
- GFP_KERNEL);
- if (!pp_buffer.buffer) {
- pr_info("page_pinner disabled due to failure of buffer allocation\n");
- return;
- }
- spin_lock_init(&pp_buffer.lock);
- pp_buffer.index = 0;
- register_failure_stack();
- static_branch_enable(&page_pinner_inited);
- }
- struct page_ext_operations page_pinner_ops = {
- .size = sizeof(struct page_pinner),
- .need = need_page_pinner,
- .init = init_page_pinner,
- };
- static inline struct page_pinner *get_page_pinner(struct page_ext *page_ext)
- {
- return (void *)page_ext + page_pinner_ops.offset;
- }
- static noinline depot_stack_handle_t save_stack(gfp_t flags)
- {
- unsigned long entries[PAGE_PINNER_STACK_DEPTH];
- depot_stack_handle_t handle;
- unsigned int nr_entries;
- nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
- handle = stack_depot_save(entries, nr_entries, flags);
- if (!handle)
- handle = failure_handle;
- return handle;
- }
- static void capture_page_state(struct page *page,
- struct captured_pinner *record)
- {
- record->flags = page->flags;
- record->mapping = page_mapping(page);
- record->pfn = page_to_pfn(page);
- record->count = page_count(page);
- record->mapcount = page_mapcount(page);
- }
- static void add_record(struct page_pinner_buffer *pp_buf,
- struct captured_pinner *record)
- {
- unsigned long flags;
- unsigned int idx;
- spin_lock_irqsave(&pp_buf->lock, flags);
- idx = pp_buf->index++;
- pp_buf->index %= pp_buf_size;
- pp_buf->buffer[idx] = *record;
- spin_unlock_irqrestore(&pp_buf->lock, flags);
- }
- void __free_page_pinner(struct page *page, unsigned int order)
- {
- struct page_pinner *page_pinner;
- struct page_ext *page_ext;
- int i;
- /* free_page could be called before buffer is initialized */
- if (!pp_buffer.buffer)
- return;
- page_ext = page_ext_get(page);
- if (unlikely(!page_ext))
- return;
- for (i = 0; i < (1 << order); i++) {
- struct captured_pinner record;
- if (!test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags))
- continue;
- page_pinner = get_page_pinner(page_ext);
- record.handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
- record.ts_usec = (u64)ktime_to_us(ktime_get_boottime());
- record.state = PP_FREE;
- capture_page_state(page, &record);
- add_record(&pp_buffer, &record);
- atomic_set(&page_pinner->count, 0);
- page_pinner->ts_usec = 0;
- clear_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
- page_ext = page_ext_next(page_ext);
- }
- page_ext_put(page_ext);
- }
- static ssize_t
- print_page_pinner(char __user *buf, size_t count, struct captured_pinner *record)
- {
- int ret;
- unsigned long *entries;
- unsigned int nr_entries;
- char *kbuf;
- count = min_t(size_t, count, PAGE_SIZE);
- kbuf = kmalloc(count, GFP_KERNEL);
- if (!kbuf)
- return -ENOMEM;
- if (record->state == PP_PUT) {
- ret = snprintf(kbuf, count, "At least, pinned for %llu us\n",
- record->elapsed);
- } else {
- u64 ts_usec = record->ts_usec;
- unsigned long rem_usec = do_div(ts_usec, 1000000);
- ret = snprintf(kbuf, count,
- "%s [%5lu.%06lu]\n",
- record->state == PP_FREE ? "Freed at" :
- "Failure detected at",
- (unsigned long)ts_usec, rem_usec);
- }
- if (ret >= count)
- goto err;
- /* Print information relevant to grouping pages by mobility */
- ret += snprintf(kbuf + ret, count - ret,
- "PFN 0x%lx Block %lu count %d mapcount %d mapping %pS Flags %#lx(%pGp)\n",
- record->pfn,
- record->pfn >> pageblock_order,
- record->count, record->mapcount,
- record->mapping,
- record->flags, &record->flags);
- if (ret >= count)
- goto err;
- nr_entries = stack_depot_fetch(record->handle, &entries);
- ret += stack_trace_snprint(kbuf + ret, count - ret, entries,
- nr_entries, 0);
- if (ret >= count)
- goto err;
- ret += snprintf(kbuf + ret, count - ret, "\n");
- if (ret >= count)
- goto err;
- if (copy_to_user(buf, kbuf, ret))
- ret = -EFAULT;
- kfree(kbuf);
- return ret;
- err:
- kfree(kbuf);
- return -ENOMEM;
- }
- void __page_pinner_failure_detect(struct page *page)
- {
- struct page_ext *page_ext;
- struct page_pinner *page_pinner;
- struct captured_pinner record;
- u64 now;
- if (!static_branch_unlikely(&failure_tracking))
- return;
- page_ext = page_ext_get(page);
- if (unlikely(!page_ext))
- return;
- if (test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags)) {
- page_ext_put(page_ext);
- return;
- }
- now = (u64)ktime_to_us(ktime_get_boottime());
- page_pinner = get_page_pinner(page_ext);
- if (!page_pinner->ts_usec)
- page_pinner->ts_usec = now;
- set_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
- record.handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
- record.ts_usec = now;
- record.state = PP_FAIL_DETECTED;
- capture_page_state(page, &record);
- add_record(&pp_buffer, &record);
- page_ext_put(page_ext);
- }
- EXPORT_SYMBOL_GPL(__page_pinner_failure_detect);
- void __page_pinner_put_page(struct page *page)
- {
- struct page_ext *page_ext;
- struct page_pinner *page_pinner;
- struct captured_pinner record;
- u64 now, ts_usec;
- if (!static_branch_unlikely(&failure_tracking))
- return;
- page_ext = page_ext_get(page);
- if (unlikely(!page_ext))
- return;
- if (!test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags)) {
- page_ext_put(page_ext);
- return;
- }
- page_pinner = get_page_pinner(page_ext);
- record.handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
- now = (u64)ktime_to_us(ktime_get_boottime());
- ts_usec = page_pinner->ts_usec;
- if (now > ts_usec)
- record.elapsed = now - ts_usec;
- else
- record.elapsed = 0;
- record.state = PP_PUT;
- capture_page_state(page, &record);
- add_record(&pp_buffer, &record);
- page_ext_put(page_ext);
- }
- EXPORT_SYMBOL_GPL(__page_pinner_put_page);
- static ssize_t read_buffer(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
- {
- u64 tmp;
- loff_t i, idx;
- struct captured_pinner record;
- unsigned long flags;
- if (!static_branch_unlikely(&failure_tracking))
- return -EINVAL;
- if (*ppos >= pp_buf_size)
- return 0;
- i = *ppos;
- *ppos = i + 1;
- /*
- * reading the records in the reverse order with newest one
- * being read first followed by older ones
- */
- tmp = pp_buffer.index - 1 - i + pp_buf_size;
- idx = do_div(tmp, pp_buf_size);
- spin_lock_irqsave(&pp_buffer.lock, flags);
- record = pp_buffer.buffer[idx];
- spin_unlock_irqrestore(&pp_buffer.lock, flags);
- if (!record.handle)
- return 0;
- return print_page_pinner(buf, count, &record);
- }
- static const struct file_operations proc_buffer_operations = {
- .read = read_buffer,
- };
- static int failure_tracking_set(void *data, u64 val)
- {
- bool on;
- on = (bool)val;
- if (on)
- static_branch_enable(&failure_tracking);
- else
- static_branch_disable(&failure_tracking);
- return 0;
- }
- static int failure_tracking_get(void *data, u64 *val)
- {
- *val = static_branch_unlikely(&failure_tracking);
- return 0;
- }
- DEFINE_DEBUGFS_ATTRIBUTE(failure_tracking_fops,
- failure_tracking_get,
- failure_tracking_set, "%llu\n");
- static int buffer_size_set(void *data, u64 val)
- {
- unsigned long flags;
- struct captured_pinner *new, *old;
- new = kvmalloc_array(val, sizeof(*new), GFP_KERNEL);
- if (!new)
- return -ENOMEM;
- spin_lock_irqsave(&pp_buffer.lock, flags);
- old = pp_buffer.buffer;
- pp_buffer.buffer = new;
- pp_buffer.index = 0;
- pp_buf_size = val;
- spin_unlock_irqrestore(&pp_buffer.lock, flags);
- kvfree(old);
- return 0;
- }
- static int buffer_size_get(void *data, u64 *val)
- {
- *val = pp_buf_size;
- return 0;
- }
- DEFINE_DEBUGFS_ATTRIBUTE(buffer_size_fops,
- buffer_size_get,
- buffer_size_set, "%llu\n");
- static int __init page_pinner_init(void)
- {
- struct dentry *pp_debugfs_root;
- if (!static_branch_unlikely(&page_pinner_inited))
- return 0;
- pr_info("page_pinner enabled\n");
- pp_debugfs_root = debugfs_create_dir("page_pinner", NULL);
- debugfs_create_file("buffer", 0444,
- pp_debugfs_root, NULL,
- &proc_buffer_operations);
- debugfs_create_file("failure_tracking", 0644,
- pp_debugfs_root, NULL,
- &failure_tracking_fops);
- debugfs_create_file("buffer_size", 0644,
- pp_debugfs_root, NULL,
- &buffer_size_fops);
- return 0;
- }
- late_initcall(page_pinner_init)
|