stackdepot.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Generic stack depot for storing stack traces.
  4. *
  5. * Some debugging tools need to save stack traces of certain events which can
  6. * be later presented to the user. For example, KASAN needs to safe alloc and
  7. * free stacks for each object, but storing two stack traces per object
  8. * requires too much memory (e.g. SLUB_DEBUG needs 256 bytes per object for
  9. * that).
  10. *
  11. * Instead, stack depot maintains a hashtable of unique stacktraces. Since alloc
  12. * and free stacks repeat a lot, we save about 100x space.
  13. * Stacks are never removed from depot, so we store them contiguously one after
  14. * another in a contiguous memory allocation.
  15. *
  16. * Author: Alexander Potapenko <[email protected]>
  17. * Copyright (C) 2016 Google, Inc.
  18. *
  19. * Based on code by Dmitry Chernenkov.
  20. */
  21. #include <linux/gfp.h>
  22. #include <linux/jhash.h>
  23. #include <linux/kernel.h>
  24. #include <linux/mm.h>
  25. #include <linux/mutex.h>
  26. #include <linux/percpu.h>
  27. #include <linux/printk.h>
  28. #include <linux/slab.h>
  29. #include <linux/stacktrace.h>
  30. #include <linux/stackdepot.h>
  31. #include <linux/string.h>
  32. #include <linux/types.h>
  33. #include <linux/memblock.h>
  34. #include <linux/kasan-enabled.h>
  35. #define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8)
  36. #define STACK_ALLOC_NULL_PROTECTION_BITS 1
  37. #define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */
  38. #define STACK_ALLOC_SIZE (1LL << (PAGE_SHIFT + STACK_ALLOC_ORDER))
  39. #define STACK_ALLOC_ALIGN 4
  40. #define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \
  41. STACK_ALLOC_ALIGN)
  42. #define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \
  43. STACK_ALLOC_NULL_PROTECTION_BITS - \
  44. STACK_ALLOC_OFFSET_BITS - STACK_DEPOT_EXTRA_BITS)
  45. #define STACK_ALLOC_SLABS_CAP 8192
  46. #define STACK_ALLOC_MAX_SLABS \
  47. (((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \
  48. (1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_SLABS_CAP)
  49. /* The compact structure to store the reference to stacks. */
  50. union handle_parts {
  51. depot_stack_handle_t handle;
  52. struct {
  53. u32 slabindex : STACK_ALLOC_INDEX_BITS;
  54. u32 offset : STACK_ALLOC_OFFSET_BITS;
  55. u32 valid : STACK_ALLOC_NULL_PROTECTION_BITS;
  56. u32 extra : STACK_DEPOT_EXTRA_BITS;
  57. };
  58. };
  59. struct stack_record {
  60. struct stack_record *next; /* Link in the hashtable */
  61. u32 hash; /* Hash in the hastable */
  62. u32 size; /* Number of frames in the stack */
  63. union handle_parts handle;
  64. unsigned long entries[]; /* Variable-sized array of entries. */
  65. };
  66. static bool __stack_depot_want_early_init __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT);
  67. static bool __stack_depot_early_init_passed __initdata;
  68. static void *stack_slabs[STACK_ALLOC_MAX_SLABS];
  69. static int depot_index;
  70. static int next_slab_inited;
  71. static size_t depot_offset;
  72. static DEFINE_RAW_SPINLOCK(depot_lock);
  73. unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle)
  74. {
  75. union handle_parts parts = { .handle = handle };
  76. return parts.extra;
  77. }
  78. EXPORT_SYMBOL(stack_depot_get_extra_bits);
  79. static bool init_stack_slab(void **prealloc)
  80. {
  81. if (!*prealloc)
  82. return false;
  83. /*
  84. * This smp_load_acquire() pairs with smp_store_release() to
  85. * |next_slab_inited| below and in depot_alloc_stack().
  86. */
  87. if (smp_load_acquire(&next_slab_inited))
  88. return true;
  89. if (stack_slabs[depot_index] == NULL) {
  90. stack_slabs[depot_index] = *prealloc;
  91. *prealloc = NULL;
  92. } else {
  93. /* If this is the last depot slab, do not touch the next one. */
  94. if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) {
  95. stack_slabs[depot_index + 1] = *prealloc;
  96. *prealloc = NULL;
  97. }
  98. /*
  99. * This smp_store_release pairs with smp_load_acquire() from
  100. * |next_slab_inited| above and in stack_depot_save().
  101. */
  102. smp_store_release(&next_slab_inited, 1);
  103. }
  104. return true;
  105. }
  106. /* Allocation of a new stack in raw storage */
  107. static struct stack_record *
  108. depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc)
  109. {
  110. struct stack_record *stack;
  111. size_t required_size = struct_size(stack, entries, size);
  112. required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN);
  113. if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) {
  114. if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) {
  115. WARN_ONCE(1, "Stack depot reached limit capacity");
  116. return NULL;
  117. }
  118. depot_index++;
  119. depot_offset = 0;
  120. /*
  121. * smp_store_release() here pairs with smp_load_acquire() from
  122. * |next_slab_inited| in stack_depot_save() and
  123. * init_stack_slab().
  124. */
  125. if (depot_index + 1 < STACK_ALLOC_MAX_SLABS)
  126. smp_store_release(&next_slab_inited, 0);
  127. }
  128. init_stack_slab(prealloc);
  129. if (stack_slabs[depot_index] == NULL)
  130. return NULL;
  131. stack = stack_slabs[depot_index] + depot_offset;
  132. stack->hash = hash;
  133. stack->size = size;
  134. stack->handle.slabindex = depot_index;
  135. stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN;
  136. stack->handle.valid = 1;
  137. stack->handle.extra = 0;
  138. memcpy(stack->entries, entries, flex_array_size(stack, entries, size));
  139. depot_offset += required_size;
  140. return stack;
  141. }
  142. /* one hash table bucket entry per 16kB of memory */
  143. #define STACK_HASH_SCALE 14
  144. /* limited between 4k and 1M buckets */
  145. #define STACK_HASH_ORDER_MIN 12
  146. #define STACK_HASH_ORDER_MAX 20
  147. #define STACK_HASH_SEED 0x9747b28c
  148. static unsigned int stack_hash_order;
  149. static unsigned int stack_hash_mask;
  150. static bool stack_depot_disable;
  151. static struct stack_record **stack_table;
  152. static int __init is_stack_depot_disabled(char *str)
  153. {
  154. int ret;
  155. ret = kstrtobool(str, &stack_depot_disable);
  156. if (!ret && stack_depot_disable) {
  157. pr_info("Stack Depot is disabled\n");
  158. stack_table = NULL;
  159. }
  160. return 0;
  161. }
  162. early_param("stack_depot_disable", is_stack_depot_disabled);
  163. void __init stack_depot_want_early_init(void)
  164. {
  165. /* Too late to request early init now */
  166. WARN_ON(__stack_depot_early_init_passed);
  167. __stack_depot_want_early_init = true;
  168. }
  169. int __init stack_depot_early_init(void)
  170. {
  171. unsigned long entries = 0;
  172. /* This is supposed to be called only once, from mm_init() */
  173. if (WARN_ON(__stack_depot_early_init_passed))
  174. return 0;
  175. __stack_depot_early_init_passed = true;
  176. if (kasan_enabled() && !stack_hash_order)
  177. stack_hash_order = STACK_HASH_ORDER_MAX;
  178. if (!__stack_depot_want_early_init || stack_depot_disable)
  179. return 0;
  180. if (stack_hash_order)
  181. entries = 1UL << stack_hash_order;
  182. stack_table = alloc_large_system_hash("stackdepot",
  183. sizeof(struct stack_record *),
  184. entries,
  185. STACK_HASH_SCALE,
  186. HASH_EARLY | HASH_ZERO,
  187. NULL,
  188. &stack_hash_mask,
  189. 1UL << STACK_HASH_ORDER_MIN,
  190. 1UL << STACK_HASH_ORDER_MAX);
  191. if (!stack_table) {
  192. pr_err("Stack Depot hash table allocation failed, disabling\n");
  193. stack_depot_disable = true;
  194. return -ENOMEM;
  195. }
  196. return 0;
  197. }
  198. int stack_depot_init(void)
  199. {
  200. static DEFINE_MUTEX(stack_depot_init_mutex);
  201. int ret = 0;
  202. mutex_lock(&stack_depot_init_mutex);
  203. if (!stack_depot_disable && !stack_table) {
  204. unsigned long entries;
  205. int scale = STACK_HASH_SCALE;
  206. if (stack_hash_order) {
  207. entries = 1UL << stack_hash_order;
  208. } else {
  209. entries = nr_free_buffer_pages();
  210. entries = roundup_pow_of_two(entries);
  211. if (scale > PAGE_SHIFT)
  212. entries >>= (scale - PAGE_SHIFT);
  213. else
  214. entries <<= (PAGE_SHIFT - scale);
  215. }
  216. if (entries < 1UL << STACK_HASH_ORDER_MIN)
  217. entries = 1UL << STACK_HASH_ORDER_MIN;
  218. if (entries > 1UL << STACK_HASH_ORDER_MAX)
  219. entries = 1UL << STACK_HASH_ORDER_MAX;
  220. pr_info("Stack Depot allocating hash table of %lu entries with kvcalloc\n",
  221. entries);
  222. stack_table = kvcalloc(entries, sizeof(struct stack_record *), GFP_KERNEL);
  223. if (!stack_table) {
  224. pr_err("Stack Depot hash table allocation failed, disabling\n");
  225. stack_depot_disable = true;
  226. ret = -ENOMEM;
  227. }
  228. stack_hash_mask = entries - 1;
  229. }
  230. mutex_unlock(&stack_depot_init_mutex);
  231. return ret;
  232. }
  233. EXPORT_SYMBOL_GPL(stack_depot_init);
  234. /* Calculate hash for a stack */
  235. static inline u32 hash_stack(unsigned long *entries, unsigned int size)
  236. {
  237. return jhash2((u32 *)entries,
  238. array_size(size, sizeof(*entries)) / sizeof(u32),
  239. STACK_HASH_SEED);
  240. }
  241. /* Use our own, non-instrumented version of memcmp().
  242. *
  243. * We actually don't care about the order, just the equality.
  244. */
  245. static inline
  246. int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2,
  247. unsigned int n)
  248. {
  249. for ( ; n-- ; u1++, u2++) {
  250. if (*u1 != *u2)
  251. return 1;
  252. }
  253. return 0;
  254. }
  255. /* Find a stack that is equal to the one stored in entries in the hash */
  256. static inline struct stack_record *find_stack(struct stack_record *bucket,
  257. unsigned long *entries, int size,
  258. u32 hash)
  259. {
  260. struct stack_record *found;
  261. for (found = bucket; found; found = found->next) {
  262. if (found->hash == hash &&
  263. found->size == size &&
  264. !stackdepot_memcmp(entries, found->entries, size))
  265. return found;
  266. }
  267. return NULL;
  268. }
  269. /**
  270. * stack_depot_snprint - print stack entries from a depot into a buffer
  271. *
  272. * @handle: Stack depot handle which was returned from
  273. * stack_depot_save().
  274. * @buf: Pointer to the print buffer
  275. *
  276. * @size: Size of the print buffer
  277. *
  278. * @spaces: Number of leading spaces to print
  279. *
  280. * Return: Number of bytes printed.
  281. */
  282. int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size,
  283. int spaces)
  284. {
  285. unsigned long *entries;
  286. unsigned int nr_entries;
  287. nr_entries = stack_depot_fetch(handle, &entries);
  288. return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries,
  289. spaces) : 0;
  290. }
  291. EXPORT_SYMBOL_GPL(stack_depot_snprint);
  292. /**
  293. * stack_depot_print - print stack entries from a depot
  294. *
  295. * @stack: Stack depot handle which was returned from
  296. * stack_depot_save().
  297. *
  298. */
  299. void stack_depot_print(depot_stack_handle_t stack)
  300. {
  301. unsigned long *entries;
  302. unsigned int nr_entries;
  303. nr_entries = stack_depot_fetch(stack, &entries);
  304. if (nr_entries > 0)
  305. stack_trace_print(entries, nr_entries, 0);
  306. }
  307. EXPORT_SYMBOL_GPL(stack_depot_print);
  308. /**
  309. * stack_depot_fetch - Fetch stack entries from a depot
  310. *
  311. * @handle: Stack depot handle which was returned from
  312. * stack_depot_save().
  313. * @entries: Pointer to store the entries address
  314. *
  315. * Return: The number of trace entries for this depot.
  316. */
  317. unsigned int stack_depot_fetch(depot_stack_handle_t handle,
  318. unsigned long **entries)
  319. {
  320. union handle_parts parts = { .handle = handle };
  321. void *slab;
  322. size_t offset = parts.offset << STACK_ALLOC_ALIGN;
  323. struct stack_record *stack;
  324. *entries = NULL;
  325. if (!handle)
  326. return 0;
  327. if (parts.slabindex > depot_index) {
  328. WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n",
  329. parts.slabindex, depot_index, handle);
  330. return 0;
  331. }
  332. slab = stack_slabs[parts.slabindex];
  333. if (!slab)
  334. return 0;
  335. stack = slab + offset;
  336. *entries = stack->entries;
  337. return stack->size;
  338. }
  339. EXPORT_SYMBOL_GPL(stack_depot_fetch);
  340. /**
  341. * __stack_depot_save - Save a stack trace from an array
  342. *
  343. * @entries: Pointer to storage array
  344. * @nr_entries: Size of the storage array
  345. * @extra_bits: Flags to store in unused bits of depot_stack_handle_t
  346. * @alloc_flags: Allocation gfp flags
  347. * @can_alloc: Allocate stack slabs (increased chance of failure if false)
  348. *
  349. * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is
  350. * %true, is allowed to replenish the stack slab pool in case no space is left
  351. * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids
  352. * any allocations and will fail if no space is left to store the stack trace.
  353. *
  354. * If the stack trace in @entries is from an interrupt, only the portion up to
  355. * interrupt entry is saved.
  356. *
  357. * Additional opaque flags can be passed in @extra_bits, stored in the unused
  358. * bits of the stack handle, and retrieved using stack_depot_get_extra_bits()
  359. * without calling stack_depot_fetch().
  360. *
  361. * Context: Any context, but setting @can_alloc to %false is required if
  362. * alloc_pages() cannot be used from the current context. Currently
  363. * this is the case from contexts where neither %GFP_ATOMIC nor
  364. * %GFP_NOWAIT can be used (NMI, raw_spin_lock).
  365. *
  366. * Return: The handle of the stack struct stored in depot, 0 on failure.
  367. */
  368. depot_stack_handle_t __stack_depot_save(unsigned long *entries,
  369. unsigned int nr_entries,
  370. unsigned int extra_bits,
  371. gfp_t alloc_flags, bool can_alloc)
  372. {
  373. struct stack_record *found = NULL, **bucket;
  374. union handle_parts retval = { .handle = 0 };
  375. struct page *page = NULL;
  376. void *prealloc = NULL;
  377. unsigned long flags;
  378. u32 hash;
  379. /*
  380. * If this stack trace is from an interrupt, including anything before
  381. * interrupt entry usually leads to unbounded stackdepot growth.
  382. *
  383. * Because use of filter_irq_stacks() is a requirement to ensure
  384. * stackdepot can efficiently deduplicate interrupt stacks, always
  385. * filter_irq_stacks() to simplify all callers' use of stackdepot.
  386. */
  387. nr_entries = filter_irq_stacks(entries, nr_entries);
  388. if (unlikely(nr_entries == 0) || stack_depot_disable)
  389. goto fast_exit;
  390. hash = hash_stack(entries, nr_entries);
  391. bucket = &stack_table[hash & stack_hash_mask];
  392. /*
  393. * Fast path: look the stack trace up without locking.
  394. * The smp_load_acquire() here pairs with smp_store_release() to
  395. * |bucket| below.
  396. */
  397. found = find_stack(smp_load_acquire(bucket), entries,
  398. nr_entries, hash);
  399. if (found)
  400. goto exit;
  401. /*
  402. * Check if the current or the next stack slab need to be initialized.
  403. * If so, allocate the memory - we won't be able to do that under the
  404. * lock.
  405. *
  406. * The smp_load_acquire() here pairs with smp_store_release() to
  407. * |next_slab_inited| in depot_alloc_stack() and init_stack_slab().
  408. */
  409. if (unlikely(can_alloc && !smp_load_acquire(&next_slab_inited))) {
  410. /*
  411. * Zero out zone modifiers, as we don't have specific zone
  412. * requirements. Keep the flags related to allocation in atomic
  413. * contexts and I/O.
  414. */
  415. alloc_flags &= ~GFP_ZONEMASK;
  416. alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
  417. alloc_flags |= __GFP_NOWARN;
  418. page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER);
  419. if (page)
  420. prealloc = page_address(page);
  421. }
  422. raw_spin_lock_irqsave(&depot_lock, flags);
  423. found = find_stack(*bucket, entries, nr_entries, hash);
  424. if (!found) {
  425. struct stack_record *new = depot_alloc_stack(entries, nr_entries, hash, &prealloc);
  426. if (new) {
  427. new->next = *bucket;
  428. /*
  429. * This smp_store_release() pairs with
  430. * smp_load_acquire() from |bucket| above.
  431. */
  432. smp_store_release(bucket, new);
  433. found = new;
  434. }
  435. } else if (prealloc) {
  436. /*
  437. * We didn't need to store this stack trace, but let's keep
  438. * the preallocated memory for the future.
  439. */
  440. WARN_ON(!init_stack_slab(&prealloc));
  441. }
  442. raw_spin_unlock_irqrestore(&depot_lock, flags);
  443. exit:
  444. if (prealloc) {
  445. /* Nobody used this memory, ok to free it. */
  446. free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER);
  447. }
  448. if (found)
  449. retval.handle = found->handle.handle;
  450. fast_exit:
  451. retval.extra = extra_bits;
  452. return retval.handle;
  453. }
  454. EXPORT_SYMBOL_GPL(__stack_depot_save);
  455. /**
  456. * stack_depot_save - Save a stack trace from an array
  457. *
  458. * @entries: Pointer to storage array
  459. * @nr_entries: Size of the storage array
  460. * @alloc_flags: Allocation gfp flags
  461. *
  462. * Context: Contexts where allocations via alloc_pages() are allowed.
  463. * See __stack_depot_save() for more details.
  464. *
  465. * Return: The handle of the stack struct stored in depot, 0 on failure.
  466. */
  467. depot_stack_handle_t stack_depot_save(unsigned long *entries,
  468. unsigned int nr_entries,
  469. gfp_t alloc_flags)
  470. {
  471. return __stack_depot_save(entries, nr_entries, 0, alloc_flags, true);
  472. }
  473. EXPORT_SYMBOL_GPL(stack_depot_save);