quarantine.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * KASAN quarantine.
  4. *
  5. * Author: Alexander Potapenko <[email protected]>
  6. * Copyright (C) 2016 Google, Inc.
  7. *
  8. * Based on code by Dmitry Chernenkov.
  9. */
  10. #include <linux/gfp.h>
  11. #include <linux/hash.h>
  12. #include <linux/kernel.h>
  13. #include <linux/mm.h>
  14. #include <linux/percpu.h>
  15. #include <linux/printk.h>
  16. #include <linux/shrinker.h>
  17. #include <linux/slab.h>
  18. #include <linux/srcu.h>
  19. #include <linux/string.h>
  20. #include <linux/types.h>
  21. #include <linux/cpuhotplug.h>
  22. #include "../slab.h"
  23. #include "kasan.h"
  24. /* Data structure and operations for quarantine queues. */
  25. /*
  26. * Each queue is a single-linked list, which also stores the total size of
  27. * objects inside of it.
  28. */
  29. struct qlist_head {
  30. struct qlist_node *head;
  31. struct qlist_node *tail;
  32. size_t bytes;
  33. bool offline;
  34. };
  35. #define QLIST_INIT { NULL, NULL, 0 }
  36. static bool qlist_empty(struct qlist_head *q)
  37. {
  38. return !q->head;
  39. }
  40. static void qlist_init(struct qlist_head *q)
  41. {
  42. q->head = q->tail = NULL;
  43. q->bytes = 0;
  44. }
  45. static void qlist_put(struct qlist_head *q, struct qlist_node *qlink,
  46. size_t size)
  47. {
  48. if (unlikely(qlist_empty(q)))
  49. q->head = qlink;
  50. else
  51. q->tail->next = qlink;
  52. q->tail = qlink;
  53. qlink->next = NULL;
  54. q->bytes += size;
  55. }
  56. static void qlist_move_all(struct qlist_head *from, struct qlist_head *to)
  57. {
  58. if (unlikely(qlist_empty(from)))
  59. return;
  60. if (qlist_empty(to)) {
  61. *to = *from;
  62. qlist_init(from);
  63. return;
  64. }
  65. to->tail->next = from->head;
  66. to->tail = from->tail;
  67. to->bytes += from->bytes;
  68. qlist_init(from);
  69. }
  70. #define QUARANTINE_PERCPU_SIZE (1 << 20)
  71. #define QUARANTINE_BATCHES \
  72. (1024 > 4 * CONFIG_NR_CPUS ? 1024 : 4 * CONFIG_NR_CPUS)
  73. /*
  74. * The object quarantine consists of per-cpu queues and a global queue,
  75. * guarded by quarantine_lock.
  76. */
  77. static DEFINE_PER_CPU(struct qlist_head, cpu_quarantine);
  78. /* Round-robin FIFO array of batches. */
  79. static struct qlist_head global_quarantine[QUARANTINE_BATCHES];
  80. static int quarantine_head;
  81. static int quarantine_tail;
  82. /* Total size of all objects in global_quarantine across all batches. */
  83. static unsigned long quarantine_size;
  84. static DEFINE_RAW_SPINLOCK(quarantine_lock);
  85. DEFINE_STATIC_SRCU(remove_cache_srcu);
  86. #ifdef CONFIG_PREEMPT_RT
  87. struct cpu_shrink_qlist {
  88. raw_spinlock_t lock;
  89. struct qlist_head qlist;
  90. };
  91. static DEFINE_PER_CPU(struct cpu_shrink_qlist, shrink_qlist) = {
  92. .lock = __RAW_SPIN_LOCK_UNLOCKED(shrink_qlist.lock),
  93. };
  94. #endif
  95. /* Maximum size of the global queue. */
  96. static unsigned long quarantine_max_size;
  97. /*
  98. * Target size of a batch in global_quarantine.
  99. * Usually equal to QUARANTINE_PERCPU_SIZE unless we have too much RAM.
  100. */
  101. static unsigned long quarantine_batch_size;
  102. /*
  103. * The fraction of physical memory the quarantine is allowed to occupy.
  104. * Quarantine doesn't support memory shrinker with SLAB allocator, so we keep
  105. * the ratio low to avoid OOM.
  106. */
  107. #define QUARANTINE_FRACTION 32
  108. static struct kmem_cache *qlink_to_cache(struct qlist_node *qlink)
  109. {
  110. return virt_to_slab(qlink)->slab_cache;
  111. }
  112. static void *qlink_to_object(struct qlist_node *qlink, struct kmem_cache *cache)
  113. {
  114. struct kasan_free_meta *free_info =
  115. container_of(qlink, struct kasan_free_meta,
  116. quarantine_link);
  117. return ((void *)free_info) - cache->kasan_info.free_meta_offset;
  118. }
  119. static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache)
  120. {
  121. void *object = qlink_to_object(qlink, cache);
  122. struct kasan_free_meta *meta = kasan_get_free_meta(cache, object);
  123. unsigned long flags;
  124. if (IS_ENABLED(CONFIG_SLAB))
  125. local_irq_save(flags);
  126. /*
  127. * If init_on_free is enabled and KASAN's free metadata is stored in
  128. * the object, zero the metadata. Otherwise, the object's memory will
  129. * not be properly zeroed, as KASAN saves the metadata after the slab
  130. * allocator zeroes the object.
  131. */
  132. if (slab_want_init_on_free(cache) &&
  133. cache->kasan_info.free_meta_offset == 0)
  134. memzero_explicit(meta, sizeof(*meta));
  135. /*
  136. * As the object now gets freed from the quarantine, assume that its
  137. * free track is no longer valid.
  138. */
  139. *(u8 *)kasan_mem_to_shadow(object) = KASAN_SLAB_FREE;
  140. ___cache_free(cache, object, _THIS_IP_);
  141. if (IS_ENABLED(CONFIG_SLAB))
  142. local_irq_restore(flags);
  143. }
  144. static void qlist_free_all(struct qlist_head *q, struct kmem_cache *cache)
  145. {
  146. struct qlist_node *qlink;
  147. if (unlikely(qlist_empty(q)))
  148. return;
  149. qlink = q->head;
  150. while (qlink) {
  151. struct kmem_cache *obj_cache =
  152. cache ? cache : qlink_to_cache(qlink);
  153. struct qlist_node *next = qlink->next;
  154. qlink_free(qlink, obj_cache);
  155. qlink = next;
  156. }
  157. qlist_init(q);
  158. }
  159. bool kasan_quarantine_put(struct kmem_cache *cache, void *object)
  160. {
  161. unsigned long flags;
  162. struct qlist_head *q;
  163. struct qlist_head temp = QLIST_INIT;
  164. struct kasan_free_meta *meta = kasan_get_free_meta(cache, object);
  165. /*
  166. * If there's no metadata for this object, don't put it into
  167. * quarantine.
  168. */
  169. if (!meta)
  170. return false;
  171. /*
  172. * Note: irq must be disabled until after we move the batch to the
  173. * global quarantine. Otherwise kasan_quarantine_remove_cache() can
  174. * miss some objects belonging to the cache if they are in our local
  175. * temp list. kasan_quarantine_remove_cache() executes on_each_cpu()
  176. * at the beginning which ensures that it either sees the objects in
  177. * per-cpu lists or in the global quarantine.
  178. */
  179. local_irq_save(flags);
  180. q = this_cpu_ptr(&cpu_quarantine);
  181. if (q->offline) {
  182. local_irq_restore(flags);
  183. return false;
  184. }
  185. qlist_put(q, &meta->quarantine_link, cache->size);
  186. if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) {
  187. qlist_move_all(q, &temp);
  188. raw_spin_lock(&quarantine_lock);
  189. WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes);
  190. qlist_move_all(&temp, &global_quarantine[quarantine_tail]);
  191. if (global_quarantine[quarantine_tail].bytes >=
  192. READ_ONCE(quarantine_batch_size)) {
  193. int new_tail;
  194. new_tail = quarantine_tail + 1;
  195. if (new_tail == QUARANTINE_BATCHES)
  196. new_tail = 0;
  197. if (new_tail != quarantine_head)
  198. quarantine_tail = new_tail;
  199. }
  200. raw_spin_unlock(&quarantine_lock);
  201. }
  202. local_irq_restore(flags);
  203. return true;
  204. }
  205. void kasan_quarantine_reduce(void)
  206. {
  207. size_t total_size, new_quarantine_size, percpu_quarantines;
  208. unsigned long flags;
  209. int srcu_idx;
  210. struct qlist_head to_free = QLIST_INIT;
  211. if (likely(READ_ONCE(quarantine_size) <=
  212. READ_ONCE(quarantine_max_size)))
  213. return;
  214. /*
  215. * srcu critical section ensures that kasan_quarantine_remove_cache()
  216. * will not miss objects belonging to the cache while they are in our
  217. * local to_free list. srcu is chosen because (1) it gives us private
  218. * grace period domain that does not interfere with anything else,
  219. * and (2) it allows synchronize_srcu() to return without waiting
  220. * if there are no pending read critical sections (which is the
  221. * expected case).
  222. */
  223. srcu_idx = srcu_read_lock(&remove_cache_srcu);
  224. raw_spin_lock_irqsave(&quarantine_lock, flags);
  225. /*
  226. * Update quarantine size in case of hotplug. Allocate a fraction of
  227. * the installed memory to quarantine minus per-cpu queue limits.
  228. */
  229. total_size = (totalram_pages() << PAGE_SHIFT) /
  230. QUARANTINE_FRACTION;
  231. percpu_quarantines = QUARANTINE_PERCPU_SIZE * num_online_cpus();
  232. new_quarantine_size = (total_size < percpu_quarantines) ?
  233. 0 : total_size - percpu_quarantines;
  234. WRITE_ONCE(quarantine_max_size, new_quarantine_size);
  235. /* Aim at consuming at most 1/2 of slots in quarantine. */
  236. WRITE_ONCE(quarantine_batch_size, max((size_t)QUARANTINE_PERCPU_SIZE,
  237. 2 * total_size / QUARANTINE_BATCHES));
  238. if (likely(quarantine_size > quarantine_max_size)) {
  239. qlist_move_all(&global_quarantine[quarantine_head], &to_free);
  240. WRITE_ONCE(quarantine_size, quarantine_size - to_free.bytes);
  241. quarantine_head++;
  242. if (quarantine_head == QUARANTINE_BATCHES)
  243. quarantine_head = 0;
  244. }
  245. raw_spin_unlock_irqrestore(&quarantine_lock, flags);
  246. qlist_free_all(&to_free, NULL);
  247. srcu_read_unlock(&remove_cache_srcu, srcu_idx);
  248. }
  249. static void qlist_move_cache(struct qlist_head *from,
  250. struct qlist_head *to,
  251. struct kmem_cache *cache)
  252. {
  253. struct qlist_node *curr;
  254. if (unlikely(qlist_empty(from)))
  255. return;
  256. curr = from->head;
  257. qlist_init(from);
  258. while (curr) {
  259. struct qlist_node *next = curr->next;
  260. struct kmem_cache *obj_cache = qlink_to_cache(curr);
  261. if (obj_cache == cache)
  262. qlist_put(to, curr, obj_cache->size);
  263. else
  264. qlist_put(from, curr, obj_cache->size);
  265. curr = next;
  266. }
  267. }
  268. #ifndef CONFIG_PREEMPT_RT
  269. static void __per_cpu_remove_cache(struct qlist_head *q, void *arg)
  270. {
  271. struct kmem_cache *cache = arg;
  272. struct qlist_head to_free = QLIST_INIT;
  273. qlist_move_cache(q, &to_free, cache);
  274. qlist_free_all(&to_free, cache);
  275. }
  276. #else
  277. static void __per_cpu_remove_cache(struct qlist_head *q, void *arg)
  278. {
  279. struct kmem_cache *cache = arg;
  280. unsigned long flags;
  281. struct cpu_shrink_qlist *sq;
  282. sq = this_cpu_ptr(&shrink_qlist);
  283. raw_spin_lock_irqsave(&sq->lock, flags);
  284. qlist_move_cache(q, &sq->qlist, cache);
  285. raw_spin_unlock_irqrestore(&sq->lock, flags);
  286. }
  287. #endif
  288. static void per_cpu_remove_cache(void *arg)
  289. {
  290. struct qlist_head *q;
  291. q = this_cpu_ptr(&cpu_quarantine);
  292. /*
  293. * Ensure the ordering between the writing to q->offline and
  294. * per_cpu_remove_cache. Prevent cpu_quarantine from being corrupted
  295. * by interrupt.
  296. */
  297. if (READ_ONCE(q->offline))
  298. return;
  299. __per_cpu_remove_cache(q, arg);
  300. }
  301. /* Free all quarantined objects belonging to cache. */
  302. void kasan_quarantine_remove_cache(struct kmem_cache *cache)
  303. {
  304. unsigned long flags, i;
  305. struct qlist_head to_free = QLIST_INIT;
  306. /*
  307. * Must be careful to not miss any objects that are being moved from
  308. * per-cpu list to the global quarantine in kasan_quarantine_put(),
  309. * nor objects being freed in kasan_quarantine_reduce(). on_each_cpu()
  310. * achieves the first goal, while synchronize_srcu() achieves the
  311. * second.
  312. */
  313. on_each_cpu(per_cpu_remove_cache, cache, 1);
  314. #ifdef CONFIG_PREEMPT_RT
  315. {
  316. int cpu;
  317. struct cpu_shrink_qlist *sq;
  318. for_each_online_cpu(cpu) {
  319. sq = per_cpu_ptr(&shrink_qlist, cpu);
  320. raw_spin_lock_irqsave(&sq->lock, flags);
  321. qlist_move_cache(&sq->qlist, &to_free, cache);
  322. raw_spin_unlock_irqrestore(&sq->lock, flags);
  323. }
  324. qlist_free_all(&to_free, cache);
  325. }
  326. #endif
  327. raw_spin_lock_irqsave(&quarantine_lock, flags);
  328. for (i = 0; i < QUARANTINE_BATCHES; i++) {
  329. if (qlist_empty(&global_quarantine[i]))
  330. continue;
  331. qlist_move_cache(&global_quarantine[i], &to_free, cache);
  332. /* Scanning whole quarantine can take a while. */
  333. raw_spin_unlock_irqrestore(&quarantine_lock, flags);
  334. cond_resched();
  335. raw_spin_lock_irqsave(&quarantine_lock, flags);
  336. }
  337. raw_spin_unlock_irqrestore(&quarantine_lock, flags);
  338. qlist_free_all(&to_free, cache);
  339. synchronize_srcu(&remove_cache_srcu);
  340. }
  341. static int kasan_cpu_online(unsigned int cpu)
  342. {
  343. this_cpu_ptr(&cpu_quarantine)->offline = false;
  344. return 0;
  345. }
  346. static int kasan_cpu_offline(unsigned int cpu)
  347. {
  348. struct qlist_head *q;
  349. q = this_cpu_ptr(&cpu_quarantine);
  350. /* Ensure the ordering between the writing to q->offline and
  351. * qlist_free_all. Otherwise, cpu_quarantine may be corrupted
  352. * by interrupt.
  353. */
  354. WRITE_ONCE(q->offline, true);
  355. barrier();
  356. qlist_free_all(q, NULL);
  357. return 0;
  358. }
  359. static int __init kasan_cpu_quarantine_init(void)
  360. {
  361. int ret = 0;
  362. ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/kasan:online",
  363. kasan_cpu_online, kasan_cpu_offline);
  364. if (ret < 0)
  365. pr_err("kasan cpu quarantine register failed [%d]\n", ret);
  366. return ret;
  367. }
  368. late_initcall(kasan_cpu_quarantine_init);