swap_slots.c 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Manage cache of swap slots to be used for and returned from
  4. * swap.
  5. *
  6. * Copyright(c) 2016 Intel Corporation.
  7. *
  8. * Author: Tim Chen <[email protected]>
  9. *
  10. * We allocate the swap slots from the global pool and put
  11. * it into local per cpu caches. This has the advantage
  12. * of no needing to acquire the swap_info lock every time
  13. * we need a new slot.
  14. *
  15. * There is also opportunity to simply return the slot
  16. * to local caches without needing to acquire swap_info
  17. * lock. We do not reuse the returned slots directly but
  18. * move them back to the global pool in a batch. This
  19. * allows the slots to coalesce and reduce fragmentation.
  20. *
  21. * The swap entry allocated is marked with SWAP_HAS_CACHE
  22. * flag in map_count that prevents it from being allocated
  23. * again from the global pool.
  24. *
  25. * The swap slots cache is protected by a mutex instead of
  26. * a spin lock as when we search for slots with scan_swap_map,
  27. * we can possibly sleep.
  28. */
  29. #include <linux/swap_slots.h>
  30. #include <linux/cpu.h>
  31. #include <linux/cpumask.h>
  32. #include <linux/slab.h>
  33. #include <linux/vmalloc.h>
  34. #include <linux/mutex.h>
  35. #include <linux/mm.h>
  36. static DEFINE_PER_CPU(struct swap_slots_cache, swp_slots);
  37. static bool swap_slot_cache_active;
  38. bool swap_slot_cache_enabled;
  39. static bool swap_slot_cache_initialized;
  40. static DEFINE_MUTEX(swap_slots_cache_mutex);
  41. /* Serialize swap slots cache enable/disable operations */
  42. static DEFINE_MUTEX(swap_slots_cache_enable_mutex);
  43. static void __drain_swap_slots_cache(unsigned int type);
  44. #define use_swap_slot_cache (swap_slot_cache_active && swap_slot_cache_enabled)
  45. #define SLOTS_CACHE 0x1
  46. #define SLOTS_CACHE_RET 0x2
  47. static void deactivate_swap_slots_cache(void)
  48. {
  49. mutex_lock(&swap_slots_cache_mutex);
  50. swap_slot_cache_active = false;
  51. __drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET);
  52. mutex_unlock(&swap_slots_cache_mutex);
  53. }
  54. static void reactivate_swap_slots_cache(void)
  55. {
  56. mutex_lock(&swap_slots_cache_mutex);
  57. swap_slot_cache_active = true;
  58. mutex_unlock(&swap_slots_cache_mutex);
  59. }
  60. /* Must not be called with cpu hot plug lock */
  61. void disable_swap_slots_cache_lock(void)
  62. {
  63. mutex_lock(&swap_slots_cache_enable_mutex);
  64. swap_slot_cache_enabled = false;
  65. if (swap_slot_cache_initialized) {
  66. /* serialize with cpu hotplug operations */
  67. cpus_read_lock();
  68. __drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET);
  69. cpus_read_unlock();
  70. }
  71. }
  72. static void __reenable_swap_slots_cache(void)
  73. {
  74. swap_slot_cache_enabled = has_usable_swap();
  75. }
  76. void reenable_swap_slots_cache_unlock(void)
  77. {
  78. __reenable_swap_slots_cache();
  79. mutex_unlock(&swap_slots_cache_enable_mutex);
  80. }
  81. static bool check_cache_active(void)
  82. {
  83. long pages;
  84. if (!swap_slot_cache_enabled)
  85. return false;
  86. pages = get_nr_swap_pages();
  87. if (!swap_slot_cache_active) {
  88. if (pages > num_online_cpus() *
  89. THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE)
  90. reactivate_swap_slots_cache();
  91. goto out;
  92. }
  93. /* if global pool of slot caches too low, deactivate cache */
  94. if (pages < num_online_cpus() * THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE)
  95. deactivate_swap_slots_cache();
  96. out:
  97. return swap_slot_cache_active;
  98. }
  99. static int alloc_swap_slot_cache(unsigned int cpu)
  100. {
  101. struct swap_slots_cache *cache;
  102. swp_entry_t *slots, *slots_ret;
  103. /*
  104. * Do allocation outside swap_slots_cache_mutex
  105. * as kvzalloc could trigger reclaim and folio_alloc_swap,
  106. * which can lock swap_slots_cache_mutex.
  107. */
  108. slots = kvcalloc(SWAP_SLOTS_CACHE_SIZE, sizeof(swp_entry_t),
  109. GFP_KERNEL);
  110. if (!slots)
  111. return -ENOMEM;
  112. slots_ret = kvcalloc(SWAP_SLOTS_CACHE_SIZE, sizeof(swp_entry_t),
  113. GFP_KERNEL);
  114. if (!slots_ret) {
  115. kvfree(slots);
  116. return -ENOMEM;
  117. }
  118. mutex_lock(&swap_slots_cache_mutex);
  119. cache = &per_cpu(swp_slots, cpu);
  120. if (cache->slots || cache->slots_ret) {
  121. /* cache already allocated */
  122. mutex_unlock(&swap_slots_cache_mutex);
  123. kvfree(slots);
  124. kvfree(slots_ret);
  125. return 0;
  126. }
  127. if (!cache->lock_initialized) {
  128. mutex_init(&cache->alloc_lock);
  129. spin_lock_init(&cache->free_lock);
  130. cache->lock_initialized = true;
  131. }
  132. cache->nr = 0;
  133. cache->cur = 0;
  134. cache->n_ret = 0;
  135. /*
  136. * We initialized alloc_lock and free_lock earlier. We use
  137. * !cache->slots or !cache->slots_ret to know if it is safe to acquire
  138. * the corresponding lock and use the cache. Memory barrier below
  139. * ensures the assumption.
  140. */
  141. mb();
  142. cache->slots = slots;
  143. cache->slots_ret = slots_ret;
  144. mutex_unlock(&swap_slots_cache_mutex);
  145. return 0;
  146. }
  147. static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
  148. bool free_slots)
  149. {
  150. struct swap_slots_cache *cache;
  151. swp_entry_t *slots = NULL;
  152. cache = &per_cpu(swp_slots, cpu);
  153. if ((type & SLOTS_CACHE) && cache->slots) {
  154. mutex_lock(&cache->alloc_lock);
  155. swapcache_free_entries(cache->slots + cache->cur, cache->nr);
  156. cache->cur = 0;
  157. cache->nr = 0;
  158. if (free_slots && cache->slots) {
  159. kvfree(cache->slots);
  160. cache->slots = NULL;
  161. }
  162. mutex_unlock(&cache->alloc_lock);
  163. }
  164. if ((type & SLOTS_CACHE_RET) && cache->slots_ret) {
  165. spin_lock_irq(&cache->free_lock);
  166. swapcache_free_entries(cache->slots_ret, cache->n_ret);
  167. cache->n_ret = 0;
  168. if (free_slots && cache->slots_ret) {
  169. slots = cache->slots_ret;
  170. cache->slots_ret = NULL;
  171. }
  172. spin_unlock_irq(&cache->free_lock);
  173. kvfree(slots);
  174. }
  175. }
  176. static void __drain_swap_slots_cache(unsigned int type)
  177. {
  178. unsigned int cpu;
  179. /*
  180. * This function is called during
  181. * 1) swapoff, when we have to make sure no
  182. * left over slots are in cache when we remove
  183. * a swap device;
  184. * 2) disabling of swap slot cache, when we run low
  185. * on swap slots when allocating memory and need
  186. * to return swap slots to global pool.
  187. *
  188. * We cannot acquire cpu hot plug lock here as
  189. * this function can be invoked in the cpu
  190. * hot plug path:
  191. * cpu_up -> lock cpu_hotplug -> cpu hotplug state callback
  192. * -> memory allocation -> direct reclaim -> folio_alloc_swap
  193. * -> drain_swap_slots_cache
  194. *
  195. * Hence the loop over current online cpu below could miss cpu that
  196. * is being brought online but not yet marked as online.
  197. * That is okay as we do not schedule and run anything on a
  198. * cpu before it has been marked online. Hence, we will not
  199. * fill any swap slots in slots cache of such cpu.
  200. * There are no slots on such cpu that need to be drained.
  201. */
  202. for_each_online_cpu(cpu)
  203. drain_slots_cache_cpu(cpu, type, false);
  204. }
  205. static int free_slot_cache(unsigned int cpu)
  206. {
  207. mutex_lock(&swap_slots_cache_mutex);
  208. drain_slots_cache_cpu(cpu, SLOTS_CACHE | SLOTS_CACHE_RET, true);
  209. mutex_unlock(&swap_slots_cache_mutex);
  210. return 0;
  211. }
  212. void enable_swap_slots_cache(void)
  213. {
  214. mutex_lock(&swap_slots_cache_enable_mutex);
  215. if (!swap_slot_cache_initialized) {
  216. int ret;
  217. ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "swap_slots_cache",
  218. alloc_swap_slot_cache, free_slot_cache);
  219. if (WARN_ONCE(ret < 0, "Cache allocation failed (%s), operating "
  220. "without swap slots cache.\n", __func__))
  221. goto out_unlock;
  222. swap_slot_cache_initialized = true;
  223. }
  224. __reenable_swap_slots_cache();
  225. out_unlock:
  226. mutex_unlock(&swap_slots_cache_enable_mutex);
  227. }
  228. /* called with swap slot cache's alloc lock held */
  229. static int refill_swap_slots_cache(struct swap_slots_cache *cache)
  230. {
  231. if (!use_swap_slot_cache)
  232. return 0;
  233. cache->cur = 0;
  234. if (swap_slot_cache_active)
  235. cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE,
  236. cache->slots, 1);
  237. return cache->nr;
  238. }
  239. void free_swap_slot(swp_entry_t entry)
  240. {
  241. struct swap_slots_cache *cache;
  242. cache = raw_cpu_ptr(&swp_slots);
  243. if (likely(use_swap_slot_cache && cache->slots_ret)) {
  244. spin_lock_irq(&cache->free_lock);
  245. /* Swap slots cache may be deactivated before acquiring lock */
  246. if (!use_swap_slot_cache || !cache->slots_ret) {
  247. spin_unlock_irq(&cache->free_lock);
  248. goto direct_free;
  249. }
  250. if (cache->n_ret >= SWAP_SLOTS_CACHE_SIZE) {
  251. /*
  252. * Return slots to global pool.
  253. * The current swap_map value is SWAP_HAS_CACHE.
  254. * Set it to 0 to indicate it is available for
  255. * allocation in global pool
  256. */
  257. swapcache_free_entries(cache->slots_ret, cache->n_ret);
  258. cache->n_ret = 0;
  259. }
  260. cache->slots_ret[cache->n_ret++] = entry;
  261. spin_unlock_irq(&cache->free_lock);
  262. } else {
  263. direct_free:
  264. swapcache_free_entries(&entry, 1);
  265. }
  266. }
  267. swp_entry_t folio_alloc_swap(struct folio *folio)
  268. {
  269. swp_entry_t entry;
  270. struct swap_slots_cache *cache;
  271. entry.val = 0;
  272. if (folio_test_large(folio)) {
  273. if (IS_ENABLED(CONFIG_THP_SWAP) && arch_thp_swp_supported())
  274. get_swap_pages(1, &entry, folio_nr_pages(folio));
  275. goto out;
  276. }
  277. /*
  278. * Preemption is allowed here, because we may sleep
  279. * in refill_swap_slots_cache(). But it is safe, because
  280. * accesses to the per-CPU data structure are protected by the
  281. * mutex cache->alloc_lock.
  282. *
  283. * The alloc path here does not touch cache->slots_ret
  284. * so cache->free_lock is not taken.
  285. */
  286. cache = raw_cpu_ptr(&swp_slots);
  287. if (likely(check_cache_active() && cache->slots)) {
  288. mutex_lock(&cache->alloc_lock);
  289. if (cache->slots) {
  290. repeat:
  291. if (cache->nr) {
  292. entry = cache->slots[cache->cur];
  293. cache->slots[cache->cur++].val = 0;
  294. cache->nr--;
  295. } else if (refill_swap_slots_cache(cache)) {
  296. goto repeat;
  297. }
  298. }
  299. mutex_unlock(&cache->alloc_lock);
  300. if (entry.val)
  301. goto out;
  302. }
  303. get_swap_pages(1, &entry, 1);
  304. out:
  305. if (mem_cgroup_try_charge_swap(folio, entry)) {
  306. put_swap_folio(folio, entry);
  307. entry.val = 0;
  308. }
  309. return entry;
  310. }