zswap.c 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * zswap.c - zswap driver file
  4. *
  5. * zswap is a backend for frontswap that takes pages that are in the process
  6. * of being swapped out and attempts to compress and store them in a
  7. * RAM-based memory pool. This can result in a significant I/O reduction on
  8. * the swap device and, in the case where decompressing from RAM is faster
  9. * than reading from the swap device, can also improve workload performance.
  10. *
  11. * Copyright (C) 2012 Seth Jennings <[email protected]>
  12. */
  13. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  14. #include <linux/module.h>
  15. #include <linux/cpu.h>
  16. #include <linux/highmem.h>
  17. #include <linux/slab.h>
  18. #include <linux/spinlock.h>
  19. #include <linux/types.h>
  20. #include <linux/atomic.h>
  21. #include <linux/frontswap.h>
  22. #include <linux/rbtree.h>
  23. #include <linux/swap.h>
  24. #include <linux/crypto.h>
  25. #include <linux/scatterlist.h>
  26. #include <linux/mempool.h>
  27. #include <linux/zpool.h>
  28. #include <crypto/acompress.h>
  29. #include <linux/mm_types.h>
  30. #include <linux/page-flags.h>
  31. #include <linux/swapops.h>
  32. #include <linux/writeback.h>
  33. #include <linux/pagemap.h>
  34. #include <linux/workqueue.h>
  35. #include "swap.h"
  36. /*********************************
  37. * statistics
  38. **********************************/
  39. /* Total bytes used by the compressed storage */
  40. u64 zswap_pool_total_size;
  41. /* The number of compressed pages currently stored in zswap */
  42. atomic_t zswap_stored_pages = ATOMIC_INIT(0);
  43. /* The number of same-value filled pages currently stored in zswap */
  44. static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0);
  45. /*
  46. * The statistics below are not protected from concurrent access for
  47. * performance reasons so they may not be a 100% accurate. However,
  48. * they do provide useful information on roughly how many times a
  49. * certain event is occurring.
  50. */
  51. /* Pool limit was hit (see zswap_max_pool_percent) */
  52. static u64 zswap_pool_limit_hit;
  53. /* Pages written back when pool limit was reached */
  54. static u64 zswap_written_back_pages;
  55. /* Store failed due to a reclaim failure after pool limit was reached */
  56. static u64 zswap_reject_reclaim_fail;
  57. /* Compressed page was too big for the allocator to (optimally) store */
  58. static u64 zswap_reject_compress_poor;
  59. /* Store failed because underlying allocator could not get memory */
  60. static u64 zswap_reject_alloc_fail;
  61. /* Store failed because the entry metadata could not be allocated (rare) */
  62. static u64 zswap_reject_kmemcache_fail;
  63. /* Duplicate store was encountered (rare) */
  64. static u64 zswap_duplicate_entry;
  65. /* Shrinker work queue */
  66. static struct workqueue_struct *shrink_wq;
  67. /* Pool limit was hit, we need to calm down */
  68. static bool zswap_pool_reached_full;
  69. /*********************************
  70. * tunables
  71. **********************************/
  72. #define ZSWAP_PARAM_UNSET ""
  73. /* Enable/disable zswap */
  74. static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
  75. static int zswap_enabled_param_set(const char *,
  76. const struct kernel_param *);
  77. static const struct kernel_param_ops zswap_enabled_param_ops = {
  78. .set = zswap_enabled_param_set,
  79. .get = param_get_bool,
  80. };
  81. module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644);
  82. /* Crypto compressor to use */
  83. static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
  84. static int zswap_compressor_param_set(const char *,
  85. const struct kernel_param *);
  86. static const struct kernel_param_ops zswap_compressor_param_ops = {
  87. .set = zswap_compressor_param_set,
  88. .get = param_get_charp,
  89. .free = param_free_charp,
  90. };
  91. module_param_cb(compressor, &zswap_compressor_param_ops,
  92. &zswap_compressor, 0644);
  93. /* Compressed storage zpool to use */
  94. static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
  95. static int zswap_zpool_param_set(const char *, const struct kernel_param *);
  96. static const struct kernel_param_ops zswap_zpool_param_ops = {
  97. .set = zswap_zpool_param_set,
  98. .get = param_get_charp,
  99. .free = param_free_charp,
  100. };
  101. module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644);
  102. /* The maximum percentage of memory that the compressed pool can occupy */
  103. static unsigned int zswap_max_pool_percent = 20;
  104. module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644);
  105. /* The threshold for accepting new pages after the max_pool_percent was hit */
  106. static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
  107. module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
  108. uint, 0644);
  109. /*
  110. * Enable/disable handling same-value filled pages (enabled by default).
  111. * If disabled every page is considered non-same-value filled.
  112. */
  113. static bool zswap_same_filled_pages_enabled = true;
  114. module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled,
  115. bool, 0644);
  116. /* Enable/disable handling non-same-value filled pages (enabled by default) */
  117. static bool zswap_non_same_filled_pages_enabled = true;
  118. module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled,
  119. bool, 0644);
  120. /*********************************
  121. * data structures
  122. **********************************/
  123. struct crypto_acomp_ctx {
  124. struct crypto_acomp *acomp;
  125. struct acomp_req *req;
  126. struct crypto_wait wait;
  127. u8 *dstmem;
  128. struct mutex *mutex;
  129. };
  130. struct zswap_pool {
  131. struct zpool *zpool;
  132. struct crypto_acomp_ctx __percpu *acomp_ctx;
  133. struct kref kref;
  134. struct list_head list;
  135. struct work_struct release_work;
  136. struct work_struct shrink_work;
  137. struct hlist_node node;
  138. char tfm_name[CRYPTO_MAX_ALG_NAME];
  139. };
  140. /*
  141. * struct zswap_entry
  142. *
  143. * This structure contains the metadata for tracking a single compressed
  144. * page within zswap.
  145. *
  146. * rbnode - links the entry into red-black tree for the appropriate swap type
  147. * offset - the swap offset for the entry. Index into the red-black tree.
  148. * refcount - the number of outstanding reference to the entry. This is needed
  149. * to protect against premature freeing of the entry by code
  150. * concurrent calls to load, invalidate, and writeback. The lock
  151. * for the zswap_tree structure that contains the entry must
  152. * be held while changing the refcount. Since the lock must
  153. * be held, there is no reason to also make refcount atomic.
  154. * length - the length in bytes of the compressed page data. Needed during
  155. * decompression. For a same value filled page length is 0.
  156. * pool - the zswap_pool the entry's data is in
  157. * handle - zpool allocation handle that stores the compressed page data
  158. * value - value of the same-value filled pages which have same content
  159. */
  160. struct zswap_entry {
  161. struct rb_node rbnode;
  162. pgoff_t offset;
  163. int refcount;
  164. unsigned int length;
  165. struct zswap_pool *pool;
  166. union {
  167. unsigned long handle;
  168. unsigned long value;
  169. };
  170. struct obj_cgroup *objcg;
  171. };
  172. struct zswap_header {
  173. swp_entry_t swpentry;
  174. };
  175. /*
  176. * The tree lock in the zswap_tree struct protects a few things:
  177. * - the rbtree
  178. * - the refcount field of each entry in the tree
  179. */
  180. struct zswap_tree {
  181. struct rb_root rbroot;
  182. spinlock_t lock;
  183. };
  184. static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
  185. /* RCU-protected iteration */
  186. static LIST_HEAD(zswap_pools);
  187. /* protects zswap_pools list modification */
  188. static DEFINE_SPINLOCK(zswap_pools_lock);
  189. /* pool counter to provide unique names to zpool */
  190. static atomic_t zswap_pools_count = ATOMIC_INIT(0);
  191. /* used by param callback function */
  192. static bool zswap_init_started;
  193. /* fatal error during init */
  194. static bool zswap_init_failed;
  195. /* init completed, but couldn't create the initial pool */
  196. static bool zswap_has_pool;
  197. /*********************************
  198. * helpers and fwd declarations
  199. **********************************/
  200. #define zswap_pool_debug(msg, p) \
  201. pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \
  202. zpool_get_type((p)->zpool))
  203. static int zswap_writeback_entry(struct zpool *pool, unsigned long handle);
  204. static int zswap_pool_get(struct zswap_pool *pool);
  205. static void zswap_pool_put(struct zswap_pool *pool);
  206. static const struct zpool_ops zswap_zpool_ops = {
  207. .evict = zswap_writeback_entry
  208. };
  209. static bool zswap_is_full(void)
  210. {
  211. return totalram_pages() * zswap_max_pool_percent / 100 <
  212. DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
  213. }
  214. static bool zswap_can_accept(void)
  215. {
  216. return totalram_pages() * zswap_accept_thr_percent / 100 *
  217. zswap_max_pool_percent / 100 >
  218. DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
  219. }
  220. static void zswap_update_total_size(void)
  221. {
  222. struct zswap_pool *pool;
  223. u64 total = 0;
  224. rcu_read_lock();
  225. list_for_each_entry_rcu(pool, &zswap_pools, list)
  226. total += zpool_get_total_size(pool->zpool);
  227. rcu_read_unlock();
  228. zswap_pool_total_size = total;
  229. }
  230. /*********************************
  231. * zswap entry functions
  232. **********************************/
  233. static struct kmem_cache *zswap_entry_cache;
  234. static int __init zswap_entry_cache_create(void)
  235. {
  236. zswap_entry_cache = KMEM_CACHE(zswap_entry, 0);
  237. return zswap_entry_cache == NULL;
  238. }
  239. static void __init zswap_entry_cache_destroy(void)
  240. {
  241. kmem_cache_destroy(zswap_entry_cache);
  242. }
  243. static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp)
  244. {
  245. struct zswap_entry *entry;
  246. entry = kmem_cache_alloc(zswap_entry_cache, gfp);
  247. if (!entry)
  248. return NULL;
  249. entry->refcount = 1;
  250. RB_CLEAR_NODE(&entry->rbnode);
  251. return entry;
  252. }
  253. static void zswap_entry_cache_free(struct zswap_entry *entry)
  254. {
  255. kmem_cache_free(zswap_entry_cache, entry);
  256. }
  257. /*********************************
  258. * rbtree functions
  259. **********************************/
  260. static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset)
  261. {
  262. struct rb_node *node = root->rb_node;
  263. struct zswap_entry *entry;
  264. while (node) {
  265. entry = rb_entry(node, struct zswap_entry, rbnode);
  266. if (entry->offset > offset)
  267. node = node->rb_left;
  268. else if (entry->offset < offset)
  269. node = node->rb_right;
  270. else
  271. return entry;
  272. }
  273. return NULL;
  274. }
  275. /*
  276. * In the case that a entry with the same offset is found, a pointer to
  277. * the existing entry is stored in dupentry and the function returns -EEXIST
  278. */
  279. static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry,
  280. struct zswap_entry **dupentry)
  281. {
  282. struct rb_node **link = &root->rb_node, *parent = NULL;
  283. struct zswap_entry *myentry;
  284. while (*link) {
  285. parent = *link;
  286. myentry = rb_entry(parent, struct zswap_entry, rbnode);
  287. if (myentry->offset > entry->offset)
  288. link = &(*link)->rb_left;
  289. else if (myentry->offset < entry->offset)
  290. link = &(*link)->rb_right;
  291. else {
  292. *dupentry = myentry;
  293. return -EEXIST;
  294. }
  295. }
  296. rb_link_node(&entry->rbnode, parent, link);
  297. rb_insert_color(&entry->rbnode, root);
  298. return 0;
  299. }
  300. static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
  301. {
  302. if (!RB_EMPTY_NODE(&entry->rbnode)) {
  303. rb_erase(&entry->rbnode, root);
  304. RB_CLEAR_NODE(&entry->rbnode);
  305. }
  306. }
  307. /*
  308. * Carries out the common pattern of freeing and entry's zpool allocation,
  309. * freeing the entry itself, and decrementing the number of stored pages.
  310. */
  311. static void zswap_free_entry(struct zswap_entry *entry)
  312. {
  313. if (entry->objcg) {
  314. obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
  315. obj_cgroup_put(entry->objcg);
  316. }
  317. if (!entry->length)
  318. atomic_dec(&zswap_same_filled_pages);
  319. else {
  320. zpool_free(entry->pool->zpool, entry->handle);
  321. zswap_pool_put(entry->pool);
  322. }
  323. zswap_entry_cache_free(entry);
  324. atomic_dec(&zswap_stored_pages);
  325. zswap_update_total_size();
  326. }
  327. /* caller must hold the tree lock */
  328. static void zswap_entry_get(struct zswap_entry *entry)
  329. {
  330. entry->refcount++;
  331. }
  332. /* caller must hold the tree lock
  333. * remove from the tree and free it, if nobody reference the entry
  334. */
  335. static void zswap_entry_put(struct zswap_tree *tree,
  336. struct zswap_entry *entry)
  337. {
  338. int refcount = --entry->refcount;
  339. BUG_ON(refcount < 0);
  340. if (refcount == 0) {
  341. zswap_rb_erase(&tree->rbroot, entry);
  342. zswap_free_entry(entry);
  343. }
  344. }
  345. /* caller must hold the tree lock */
  346. static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
  347. pgoff_t offset)
  348. {
  349. struct zswap_entry *entry;
  350. entry = zswap_rb_search(root, offset);
  351. if (entry)
  352. zswap_entry_get(entry);
  353. return entry;
  354. }
  355. /*********************************
  356. * per-cpu code
  357. **********************************/
  358. static DEFINE_PER_CPU(u8 *, zswap_dstmem);
  359. /*
  360. * If users dynamically change the zpool type and compressor at runtime, i.e.
  361. * zswap is running, zswap can have more than one zpool on one cpu, but they
  362. * are sharing dtsmem. So we need this mutex to be per-cpu.
  363. */
  364. static DEFINE_PER_CPU(struct mutex *, zswap_mutex);
  365. static int zswap_dstmem_prepare(unsigned int cpu)
  366. {
  367. struct mutex *mutex;
  368. u8 *dst;
  369. dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
  370. if (!dst)
  371. return -ENOMEM;
  372. mutex = kmalloc_node(sizeof(*mutex), GFP_KERNEL, cpu_to_node(cpu));
  373. if (!mutex) {
  374. kfree(dst);
  375. return -ENOMEM;
  376. }
  377. mutex_init(mutex);
  378. per_cpu(zswap_dstmem, cpu) = dst;
  379. per_cpu(zswap_mutex, cpu) = mutex;
  380. return 0;
  381. }
  382. static int zswap_dstmem_dead(unsigned int cpu)
  383. {
  384. struct mutex *mutex;
  385. u8 *dst;
  386. mutex = per_cpu(zswap_mutex, cpu);
  387. kfree(mutex);
  388. per_cpu(zswap_mutex, cpu) = NULL;
  389. dst = per_cpu(zswap_dstmem, cpu);
  390. kfree(dst);
  391. per_cpu(zswap_dstmem, cpu) = NULL;
  392. return 0;
  393. }
  394. static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
  395. {
  396. struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
  397. struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
  398. struct crypto_acomp *acomp;
  399. struct acomp_req *req;
  400. acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
  401. if (IS_ERR(acomp)) {
  402. pr_err("could not alloc crypto acomp %s : %ld\n",
  403. pool->tfm_name, PTR_ERR(acomp));
  404. return PTR_ERR(acomp);
  405. }
  406. acomp_ctx->acomp = acomp;
  407. req = acomp_request_alloc(acomp_ctx->acomp);
  408. if (!req) {
  409. pr_err("could not alloc crypto acomp_request %s\n",
  410. pool->tfm_name);
  411. crypto_free_acomp(acomp_ctx->acomp);
  412. return -ENOMEM;
  413. }
  414. acomp_ctx->req = req;
  415. crypto_init_wait(&acomp_ctx->wait);
  416. /*
  417. * if the backend of acomp is async zip, crypto_req_done() will wakeup
  418. * crypto_wait_req(); if the backend of acomp is scomp, the callback
  419. * won't be called, crypto_wait_req() will return without blocking.
  420. */
  421. acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
  422. crypto_req_done, &acomp_ctx->wait);
  423. acomp_ctx->mutex = per_cpu(zswap_mutex, cpu);
  424. acomp_ctx->dstmem = per_cpu(zswap_dstmem, cpu);
  425. return 0;
  426. }
  427. static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
  428. {
  429. struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
  430. struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
  431. if (!IS_ERR_OR_NULL(acomp_ctx)) {
  432. if (!IS_ERR_OR_NULL(acomp_ctx->req))
  433. acomp_request_free(acomp_ctx->req);
  434. if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
  435. crypto_free_acomp(acomp_ctx->acomp);
  436. }
  437. return 0;
  438. }
  439. /*********************************
  440. * pool functions
  441. **********************************/
  442. static struct zswap_pool *__zswap_pool_current(void)
  443. {
  444. struct zswap_pool *pool;
  445. pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
  446. WARN_ONCE(!pool && zswap_has_pool,
  447. "%s: no page storage pool!\n", __func__);
  448. return pool;
  449. }
  450. static struct zswap_pool *zswap_pool_current(void)
  451. {
  452. assert_spin_locked(&zswap_pools_lock);
  453. return __zswap_pool_current();
  454. }
  455. static struct zswap_pool *zswap_pool_current_get(void)
  456. {
  457. struct zswap_pool *pool;
  458. rcu_read_lock();
  459. pool = __zswap_pool_current();
  460. if (!zswap_pool_get(pool))
  461. pool = NULL;
  462. rcu_read_unlock();
  463. return pool;
  464. }
  465. static struct zswap_pool *zswap_pool_last_get(void)
  466. {
  467. struct zswap_pool *pool, *last = NULL;
  468. rcu_read_lock();
  469. list_for_each_entry_rcu(pool, &zswap_pools, list)
  470. last = pool;
  471. WARN_ONCE(!last && zswap_has_pool,
  472. "%s: no page storage pool!\n", __func__);
  473. if (!zswap_pool_get(last))
  474. last = NULL;
  475. rcu_read_unlock();
  476. return last;
  477. }
  478. /* type and compressor must be null-terminated */
  479. static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
  480. {
  481. struct zswap_pool *pool;
  482. assert_spin_locked(&zswap_pools_lock);
  483. list_for_each_entry_rcu(pool, &zswap_pools, list) {
  484. if (strcmp(pool->tfm_name, compressor))
  485. continue;
  486. if (strcmp(zpool_get_type(pool->zpool), type))
  487. continue;
  488. /* if we can't get it, it's about to be destroyed */
  489. if (!zswap_pool_get(pool))
  490. continue;
  491. return pool;
  492. }
  493. return NULL;
  494. }
  495. static void shrink_worker(struct work_struct *w)
  496. {
  497. struct zswap_pool *pool = container_of(w, typeof(*pool),
  498. shrink_work);
  499. if (zpool_shrink(pool->zpool, 1, NULL))
  500. zswap_reject_reclaim_fail++;
  501. zswap_pool_put(pool);
  502. }
  503. static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
  504. {
  505. struct zswap_pool *pool;
  506. char name[38]; /* 'zswap' + 32 char (max) num + \0 */
  507. gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
  508. int ret;
  509. if (!zswap_has_pool) {
  510. /* if either are unset, pool initialization failed, and we
  511. * need both params to be set correctly before trying to
  512. * create a pool.
  513. */
  514. if (!strcmp(type, ZSWAP_PARAM_UNSET))
  515. return NULL;
  516. if (!strcmp(compressor, ZSWAP_PARAM_UNSET))
  517. return NULL;
  518. }
  519. pool = kzalloc(sizeof(*pool), GFP_KERNEL);
  520. if (!pool)
  521. return NULL;
  522. /* unique name for each pool specifically required by zsmalloc */
  523. snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count));
  524. pool->zpool = zpool_create_pool(type, name, gfp, &zswap_zpool_ops);
  525. if (!pool->zpool) {
  526. pr_err("%s zpool not available\n", type);
  527. goto error;
  528. }
  529. pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
  530. strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
  531. pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx);
  532. if (!pool->acomp_ctx) {
  533. pr_err("percpu alloc failed\n");
  534. goto error;
  535. }
  536. ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
  537. &pool->node);
  538. if (ret)
  539. goto error;
  540. pr_debug("using %s compressor\n", pool->tfm_name);
  541. /* being the current pool takes 1 ref; this func expects the
  542. * caller to always add the new pool as the current pool
  543. */
  544. kref_init(&pool->kref);
  545. INIT_LIST_HEAD(&pool->list);
  546. INIT_WORK(&pool->shrink_work, shrink_worker);
  547. zswap_pool_debug("created", pool);
  548. return pool;
  549. error:
  550. if (pool->acomp_ctx)
  551. free_percpu(pool->acomp_ctx);
  552. if (pool->zpool)
  553. zpool_destroy_pool(pool->zpool);
  554. kfree(pool);
  555. return NULL;
  556. }
  557. static __init struct zswap_pool *__zswap_pool_create_fallback(void)
  558. {
  559. bool has_comp, has_zpool;
  560. has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
  561. if (!has_comp && strcmp(zswap_compressor,
  562. CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) {
  563. pr_err("compressor %s not available, using default %s\n",
  564. zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT);
  565. param_free_charp(&zswap_compressor);
  566. zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
  567. has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
  568. }
  569. if (!has_comp) {
  570. pr_err("default compressor %s not available\n",
  571. zswap_compressor);
  572. param_free_charp(&zswap_compressor);
  573. zswap_compressor = ZSWAP_PARAM_UNSET;
  574. }
  575. has_zpool = zpool_has_pool(zswap_zpool_type);
  576. if (!has_zpool && strcmp(zswap_zpool_type,
  577. CONFIG_ZSWAP_ZPOOL_DEFAULT)) {
  578. pr_err("zpool %s not available, using default %s\n",
  579. zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT);
  580. param_free_charp(&zswap_zpool_type);
  581. zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
  582. has_zpool = zpool_has_pool(zswap_zpool_type);
  583. }
  584. if (!has_zpool) {
  585. pr_err("default zpool %s not available\n",
  586. zswap_zpool_type);
  587. param_free_charp(&zswap_zpool_type);
  588. zswap_zpool_type = ZSWAP_PARAM_UNSET;
  589. }
  590. if (!has_comp || !has_zpool)
  591. return NULL;
  592. return zswap_pool_create(zswap_zpool_type, zswap_compressor);
  593. }
  594. static void zswap_pool_destroy(struct zswap_pool *pool)
  595. {
  596. zswap_pool_debug("destroying", pool);
  597. cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
  598. free_percpu(pool->acomp_ctx);
  599. zpool_destroy_pool(pool->zpool);
  600. kfree(pool);
  601. }
  602. static int __must_check zswap_pool_get(struct zswap_pool *pool)
  603. {
  604. if (!pool)
  605. return 0;
  606. return kref_get_unless_zero(&pool->kref);
  607. }
  608. static void __zswap_pool_release(struct work_struct *work)
  609. {
  610. struct zswap_pool *pool = container_of(work, typeof(*pool),
  611. release_work);
  612. synchronize_rcu();
  613. /* nobody should have been able to get a kref... */
  614. WARN_ON(kref_get_unless_zero(&pool->kref));
  615. /* pool is now off zswap_pools list and has no references. */
  616. zswap_pool_destroy(pool);
  617. }
  618. static void __zswap_pool_empty(struct kref *kref)
  619. {
  620. struct zswap_pool *pool;
  621. pool = container_of(kref, typeof(*pool), kref);
  622. spin_lock(&zswap_pools_lock);
  623. WARN_ON(pool == zswap_pool_current());
  624. list_del_rcu(&pool->list);
  625. INIT_WORK(&pool->release_work, __zswap_pool_release);
  626. schedule_work(&pool->release_work);
  627. spin_unlock(&zswap_pools_lock);
  628. }
  629. static void zswap_pool_put(struct zswap_pool *pool)
  630. {
  631. kref_put(&pool->kref, __zswap_pool_empty);
  632. }
  633. /*********************************
  634. * param callbacks
  635. **********************************/
  636. /* val must be a null-terminated string */
  637. static int __zswap_param_set(const char *val, const struct kernel_param *kp,
  638. char *type, char *compressor)
  639. {
  640. struct zswap_pool *pool, *put_pool = NULL;
  641. char *s = strstrip((char *)val);
  642. int ret;
  643. if (zswap_init_failed) {
  644. pr_err("can't set param, initialization failed\n");
  645. return -ENODEV;
  646. }
  647. /* no change required */
  648. if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool)
  649. return 0;
  650. /* if this is load-time (pre-init) param setting,
  651. * don't create a pool; that's done during init.
  652. */
  653. if (!zswap_init_started)
  654. return param_set_charp(s, kp);
  655. if (!type) {
  656. if (!zpool_has_pool(s)) {
  657. pr_err("zpool %s not available\n", s);
  658. return -ENOENT;
  659. }
  660. type = s;
  661. } else if (!compressor) {
  662. if (!crypto_has_acomp(s, 0, 0)) {
  663. pr_err("compressor %s not available\n", s);
  664. return -ENOENT;
  665. }
  666. compressor = s;
  667. } else {
  668. WARN_ON(1);
  669. return -EINVAL;
  670. }
  671. spin_lock(&zswap_pools_lock);
  672. pool = zswap_pool_find_get(type, compressor);
  673. if (pool) {
  674. zswap_pool_debug("using existing", pool);
  675. WARN_ON(pool == zswap_pool_current());
  676. list_del_rcu(&pool->list);
  677. }
  678. spin_unlock(&zswap_pools_lock);
  679. if (!pool)
  680. pool = zswap_pool_create(type, compressor);
  681. if (pool)
  682. ret = param_set_charp(s, kp);
  683. else
  684. ret = -EINVAL;
  685. spin_lock(&zswap_pools_lock);
  686. if (!ret) {
  687. put_pool = zswap_pool_current();
  688. list_add_rcu(&pool->list, &zswap_pools);
  689. zswap_has_pool = true;
  690. } else if (pool) {
  691. /* add the possibly pre-existing pool to the end of the pools
  692. * list; if it's new (and empty) then it'll be removed and
  693. * destroyed by the put after we drop the lock
  694. */
  695. list_add_tail_rcu(&pool->list, &zswap_pools);
  696. put_pool = pool;
  697. }
  698. spin_unlock(&zswap_pools_lock);
  699. if (!zswap_has_pool && !pool) {
  700. /* if initial pool creation failed, and this pool creation also
  701. * failed, maybe both compressor and zpool params were bad.
  702. * Allow changing this param, so pool creation will succeed
  703. * when the other param is changed. We already verified this
  704. * param is ok in the zpool_has_pool() or crypto_has_acomp()
  705. * checks above.
  706. */
  707. ret = param_set_charp(s, kp);
  708. }
  709. /* drop the ref from either the old current pool,
  710. * or the new pool we failed to add
  711. */
  712. if (put_pool)
  713. zswap_pool_put(put_pool);
  714. return ret;
  715. }
  716. static int zswap_compressor_param_set(const char *val,
  717. const struct kernel_param *kp)
  718. {
  719. return __zswap_param_set(val, kp, zswap_zpool_type, NULL);
  720. }
  721. static int zswap_zpool_param_set(const char *val,
  722. const struct kernel_param *kp)
  723. {
  724. return __zswap_param_set(val, kp, NULL, zswap_compressor);
  725. }
  726. static int zswap_enabled_param_set(const char *val,
  727. const struct kernel_param *kp)
  728. {
  729. if (zswap_init_failed) {
  730. pr_err("can't enable, initialization failed\n");
  731. return -ENODEV;
  732. }
  733. if (!zswap_has_pool && zswap_init_started) {
  734. pr_err("can't enable, no pool configured\n");
  735. return -ENODEV;
  736. }
  737. return param_set_bool(val, kp);
  738. }
  739. /*********************************
  740. * writeback code
  741. **********************************/
  742. /* return enum for zswap_get_swap_cache_page */
  743. enum zswap_get_swap_ret {
  744. ZSWAP_SWAPCACHE_NEW,
  745. ZSWAP_SWAPCACHE_EXIST,
  746. ZSWAP_SWAPCACHE_FAIL,
  747. };
  748. /*
  749. * zswap_get_swap_cache_page
  750. *
  751. * This is an adaption of read_swap_cache_async()
  752. *
  753. * This function tries to find a page with the given swap entry
  754. * in the swapper_space address space (the swap cache). If the page
  755. * is found, it is returned in retpage. Otherwise, a page is allocated,
  756. * added to the swap cache, and returned in retpage.
  757. *
  758. * If success, the swap cache page is returned in retpage
  759. * Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache
  760. * Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated,
  761. * the new page is added to swapcache and locked
  762. * Returns ZSWAP_SWAPCACHE_FAIL on error
  763. */
  764. static int zswap_get_swap_cache_page(swp_entry_t entry,
  765. struct page **retpage)
  766. {
  767. bool page_was_allocated;
  768. *retpage = __read_swap_cache_async(entry, GFP_KERNEL,
  769. NULL, 0, &page_was_allocated);
  770. if (page_was_allocated)
  771. return ZSWAP_SWAPCACHE_NEW;
  772. if (!*retpage)
  773. return ZSWAP_SWAPCACHE_FAIL;
  774. return ZSWAP_SWAPCACHE_EXIST;
  775. }
  776. /*
  777. * Attempts to free an entry by adding a page to the swap cache,
  778. * decompressing the entry data into the page, and issuing a
  779. * bio write to write the page back to the swap device.
  780. *
  781. * This can be thought of as a "resumed writeback" of the page
  782. * to the swap device. We are basically resuming the same swap
  783. * writeback path that was intercepted with the frontswap_store()
  784. * in the first place. After the page has been decompressed into
  785. * the swap cache, the compressed version stored by zswap can be
  786. * freed.
  787. */
  788. static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
  789. {
  790. struct zswap_header *zhdr;
  791. swp_entry_t swpentry;
  792. struct zswap_tree *tree;
  793. pgoff_t offset;
  794. struct zswap_entry *entry;
  795. struct page *page;
  796. struct scatterlist input, output;
  797. struct crypto_acomp_ctx *acomp_ctx;
  798. u8 *src, *tmp = NULL;
  799. unsigned int dlen;
  800. int ret;
  801. struct writeback_control wbc = {
  802. .sync_mode = WB_SYNC_NONE,
  803. };
  804. if (!zpool_can_sleep_mapped(pool)) {
  805. tmp = kmalloc(PAGE_SIZE, GFP_ATOMIC);
  806. if (!tmp)
  807. return -ENOMEM;
  808. }
  809. /* extract swpentry from data */
  810. zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
  811. swpentry = zhdr->swpentry; /* here */
  812. tree = zswap_trees[swp_type(swpentry)];
  813. offset = swp_offset(swpentry);
  814. /* find and ref zswap entry */
  815. spin_lock(&tree->lock);
  816. entry = zswap_entry_find_get(&tree->rbroot, offset);
  817. if (!entry) {
  818. /* entry was invalidated */
  819. spin_unlock(&tree->lock);
  820. zpool_unmap_handle(pool, handle);
  821. kfree(tmp);
  822. return 0;
  823. }
  824. spin_unlock(&tree->lock);
  825. BUG_ON(offset != entry->offset);
  826. src = (u8 *)zhdr + sizeof(struct zswap_header);
  827. if (!zpool_can_sleep_mapped(pool)) {
  828. memcpy(tmp, src, entry->length);
  829. src = tmp;
  830. zpool_unmap_handle(pool, handle);
  831. }
  832. /* try to allocate swap cache page */
  833. switch (zswap_get_swap_cache_page(swpentry, &page)) {
  834. case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */
  835. ret = -ENOMEM;
  836. goto fail;
  837. case ZSWAP_SWAPCACHE_EXIST:
  838. /* page is already in the swap cache, ignore for now */
  839. put_page(page);
  840. ret = -EEXIST;
  841. goto fail;
  842. case ZSWAP_SWAPCACHE_NEW: /* page is locked */
  843. /*
  844. * Having a local reference to the zswap entry doesn't exclude
  845. * swapping from invalidating and recycling the swap slot. Once
  846. * the swapcache is secured against concurrent swapping to and
  847. * from the slot, recheck that the entry is still current before
  848. * writing.
  849. */
  850. spin_lock(&tree->lock);
  851. if (zswap_rb_search(&tree->rbroot, entry->offset) != entry) {
  852. spin_unlock(&tree->lock);
  853. delete_from_swap_cache(page_folio(page));
  854. ret = -ENOMEM;
  855. goto fail;
  856. }
  857. spin_unlock(&tree->lock);
  858. /* decompress */
  859. acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
  860. dlen = PAGE_SIZE;
  861. mutex_lock(acomp_ctx->mutex);
  862. sg_init_one(&input, src, entry->length);
  863. sg_init_table(&output, 1);
  864. sg_set_page(&output, page, PAGE_SIZE, 0);
  865. acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen);
  866. ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
  867. dlen = acomp_ctx->req->dlen;
  868. mutex_unlock(acomp_ctx->mutex);
  869. BUG_ON(ret);
  870. BUG_ON(dlen != PAGE_SIZE);
  871. /* page is up to date */
  872. SetPageUptodate(page);
  873. }
  874. /* move it to the tail of the inactive list after end_writeback */
  875. SetPageReclaim(page);
  876. /* start writeback */
  877. __swap_writepage(page, &wbc);
  878. put_page(page);
  879. zswap_written_back_pages++;
  880. spin_lock(&tree->lock);
  881. /* drop local reference */
  882. zswap_entry_put(tree, entry);
  883. /*
  884. * There are two possible situations for entry here:
  885. * (1) refcount is 1(normal case), entry is valid and on the tree
  886. * (2) refcount is 0, entry is freed and not on the tree
  887. * because invalidate happened during writeback
  888. * search the tree and free the entry if find entry
  889. */
  890. if (entry == zswap_rb_search(&tree->rbroot, offset))
  891. zswap_entry_put(tree, entry);
  892. spin_unlock(&tree->lock);
  893. goto end;
  894. /*
  895. * if we get here due to ZSWAP_SWAPCACHE_EXIST
  896. * a load may be happening concurrently.
  897. * it is safe and okay to not free the entry.
  898. * if we free the entry in the following put
  899. * it is also okay to return !0
  900. */
  901. fail:
  902. spin_lock(&tree->lock);
  903. zswap_entry_put(tree, entry);
  904. spin_unlock(&tree->lock);
  905. end:
  906. if (zpool_can_sleep_mapped(pool))
  907. zpool_unmap_handle(pool, handle);
  908. else
  909. kfree(tmp);
  910. return ret;
  911. }
  912. static int zswap_is_page_same_filled(void *ptr, unsigned long *value)
  913. {
  914. unsigned int pos;
  915. unsigned long *page;
  916. page = (unsigned long *)ptr;
  917. for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) {
  918. if (page[pos] != page[0])
  919. return 0;
  920. }
  921. *value = page[0];
  922. return 1;
  923. }
  924. static void zswap_fill_page(void *ptr, unsigned long value)
  925. {
  926. unsigned long *page;
  927. page = (unsigned long *)ptr;
  928. memset_l(page, value, PAGE_SIZE / sizeof(unsigned long));
  929. }
  930. /*********************************
  931. * frontswap hooks
  932. **********************************/
  933. /* attempts to compress and store an single page */
  934. static int zswap_frontswap_store(unsigned type, pgoff_t offset,
  935. struct page *page)
  936. {
  937. struct zswap_tree *tree = zswap_trees[type];
  938. struct zswap_entry *entry, *dupentry;
  939. struct scatterlist input, output;
  940. struct crypto_acomp_ctx *acomp_ctx;
  941. struct obj_cgroup *objcg = NULL;
  942. struct zswap_pool *pool;
  943. int ret;
  944. unsigned int hlen, dlen = PAGE_SIZE;
  945. unsigned long handle, value;
  946. char *buf;
  947. u8 *src, *dst;
  948. struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) };
  949. gfp_t gfp;
  950. /* THP isn't supported */
  951. if (PageTransHuge(page)) {
  952. ret = -EINVAL;
  953. goto reject;
  954. }
  955. if (!zswap_enabled || !tree) {
  956. ret = -ENODEV;
  957. goto reject;
  958. }
  959. /*
  960. * XXX: zswap reclaim does not work with cgroups yet. Without a
  961. * cgroup-aware entry LRU, we will push out entries system-wide based on
  962. * local cgroup limits.
  963. */
  964. objcg = get_obj_cgroup_from_page(page);
  965. if (objcg && !obj_cgroup_may_zswap(objcg)) {
  966. ret = -ENOMEM;
  967. goto reject;
  968. }
  969. /* reclaim space if needed */
  970. if (zswap_is_full()) {
  971. zswap_pool_limit_hit++;
  972. zswap_pool_reached_full = true;
  973. goto shrink;
  974. }
  975. if (zswap_pool_reached_full) {
  976. if (!zswap_can_accept()) {
  977. ret = -ENOMEM;
  978. goto reject;
  979. } else
  980. zswap_pool_reached_full = false;
  981. }
  982. /* allocate entry */
  983. entry = zswap_entry_cache_alloc(GFP_KERNEL);
  984. if (!entry) {
  985. zswap_reject_kmemcache_fail++;
  986. ret = -ENOMEM;
  987. goto reject;
  988. }
  989. if (zswap_same_filled_pages_enabled) {
  990. src = kmap_atomic(page);
  991. if (zswap_is_page_same_filled(src, &value)) {
  992. kunmap_atomic(src);
  993. entry->offset = offset;
  994. entry->length = 0;
  995. entry->value = value;
  996. atomic_inc(&zswap_same_filled_pages);
  997. goto insert_entry;
  998. }
  999. kunmap_atomic(src);
  1000. }
  1001. if (!zswap_non_same_filled_pages_enabled) {
  1002. ret = -EINVAL;
  1003. goto freepage;
  1004. }
  1005. /* if entry is successfully added, it keeps the reference */
  1006. entry->pool = zswap_pool_current_get();
  1007. if (!entry->pool) {
  1008. ret = -EINVAL;
  1009. goto freepage;
  1010. }
  1011. /* compress */
  1012. acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
  1013. mutex_lock(acomp_ctx->mutex);
  1014. dst = acomp_ctx->dstmem;
  1015. sg_init_table(&input, 1);
  1016. sg_set_page(&input, page, PAGE_SIZE, 0);
  1017. /* zswap_dstmem is of size (PAGE_SIZE * 2). Reflect same in sg_list */
  1018. sg_init_one(&output, dst, PAGE_SIZE * 2);
  1019. acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
  1020. /*
  1021. * it maybe looks a little bit silly that we send an asynchronous request,
  1022. * then wait for its completion synchronously. This makes the process look
  1023. * synchronous in fact.
  1024. * Theoretically, acomp supports users send multiple acomp requests in one
  1025. * acomp instance, then get those requests done simultaneously. but in this
  1026. * case, frontswap actually does store and load page by page, there is no
  1027. * existing method to send the second page before the first page is done
  1028. * in one thread doing frontswap.
  1029. * but in different threads running on different cpu, we have different
  1030. * acomp instance, so multiple threads can do (de)compression in parallel.
  1031. */
  1032. ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
  1033. dlen = acomp_ctx->req->dlen;
  1034. if (ret) {
  1035. ret = -EINVAL;
  1036. goto put_dstmem;
  1037. }
  1038. /* store */
  1039. hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0;
  1040. gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
  1041. if (zpool_malloc_support_movable(entry->pool->zpool))
  1042. gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
  1043. ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle);
  1044. if (ret == -ENOSPC) {
  1045. zswap_reject_compress_poor++;
  1046. goto put_dstmem;
  1047. }
  1048. if (ret) {
  1049. zswap_reject_alloc_fail++;
  1050. goto put_dstmem;
  1051. }
  1052. buf = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_WO);
  1053. memcpy(buf, &zhdr, hlen);
  1054. memcpy(buf + hlen, dst, dlen);
  1055. zpool_unmap_handle(entry->pool->zpool, handle);
  1056. mutex_unlock(acomp_ctx->mutex);
  1057. /* populate entry */
  1058. entry->offset = offset;
  1059. entry->handle = handle;
  1060. entry->length = dlen;
  1061. insert_entry:
  1062. entry->objcg = objcg;
  1063. if (objcg) {
  1064. obj_cgroup_charge_zswap(objcg, entry->length);
  1065. /* Account before objcg ref is moved to tree */
  1066. count_objcg_event(objcg, ZSWPOUT);
  1067. }
  1068. /* map */
  1069. spin_lock(&tree->lock);
  1070. do {
  1071. ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry);
  1072. if (ret == -EEXIST) {
  1073. zswap_duplicate_entry++;
  1074. /* remove from rbtree */
  1075. zswap_rb_erase(&tree->rbroot, dupentry);
  1076. zswap_entry_put(tree, dupentry);
  1077. }
  1078. } while (ret == -EEXIST);
  1079. spin_unlock(&tree->lock);
  1080. /* update stats */
  1081. atomic_inc(&zswap_stored_pages);
  1082. zswap_update_total_size();
  1083. count_vm_event(ZSWPOUT);
  1084. return 0;
  1085. put_dstmem:
  1086. mutex_unlock(acomp_ctx->mutex);
  1087. zswap_pool_put(entry->pool);
  1088. freepage:
  1089. zswap_entry_cache_free(entry);
  1090. reject:
  1091. if (objcg)
  1092. obj_cgroup_put(objcg);
  1093. return ret;
  1094. shrink:
  1095. pool = zswap_pool_last_get();
  1096. if (pool)
  1097. queue_work(shrink_wq, &pool->shrink_work);
  1098. ret = -ENOMEM;
  1099. goto reject;
  1100. }
  1101. /*
  1102. * returns 0 if the page was successfully decompressed
  1103. * return -1 on entry not found or error
  1104. */
  1105. static int zswap_frontswap_load(unsigned type, pgoff_t offset,
  1106. struct page *page)
  1107. {
  1108. struct zswap_tree *tree = zswap_trees[type];
  1109. struct zswap_entry *entry;
  1110. struct scatterlist input, output;
  1111. struct crypto_acomp_ctx *acomp_ctx;
  1112. u8 *src, *dst, *tmp;
  1113. unsigned int dlen;
  1114. int ret;
  1115. /* find */
  1116. spin_lock(&tree->lock);
  1117. entry = zswap_entry_find_get(&tree->rbroot, offset);
  1118. if (!entry) {
  1119. /* entry was written back */
  1120. spin_unlock(&tree->lock);
  1121. return -1;
  1122. }
  1123. spin_unlock(&tree->lock);
  1124. if (!entry->length) {
  1125. dst = kmap_atomic(page);
  1126. zswap_fill_page(dst, entry->value);
  1127. kunmap_atomic(dst);
  1128. ret = 0;
  1129. goto stats;
  1130. }
  1131. if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
  1132. tmp = kmalloc(entry->length, GFP_ATOMIC);
  1133. if (!tmp) {
  1134. ret = -ENOMEM;
  1135. goto freeentry;
  1136. }
  1137. }
  1138. /* decompress */
  1139. dlen = PAGE_SIZE;
  1140. src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO);
  1141. if (zpool_evictable(entry->pool->zpool))
  1142. src += sizeof(struct zswap_header);
  1143. if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
  1144. memcpy(tmp, src, entry->length);
  1145. src = tmp;
  1146. zpool_unmap_handle(entry->pool->zpool, entry->handle);
  1147. }
  1148. acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
  1149. mutex_lock(acomp_ctx->mutex);
  1150. sg_init_one(&input, src, entry->length);
  1151. sg_init_table(&output, 1);
  1152. sg_set_page(&output, page, PAGE_SIZE, 0);
  1153. acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen);
  1154. ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
  1155. mutex_unlock(acomp_ctx->mutex);
  1156. if (zpool_can_sleep_mapped(entry->pool->zpool))
  1157. zpool_unmap_handle(entry->pool->zpool, entry->handle);
  1158. else
  1159. kfree(tmp);
  1160. BUG_ON(ret);
  1161. stats:
  1162. count_vm_event(ZSWPIN);
  1163. if (entry->objcg)
  1164. count_objcg_event(entry->objcg, ZSWPIN);
  1165. freeentry:
  1166. spin_lock(&tree->lock);
  1167. zswap_entry_put(tree, entry);
  1168. spin_unlock(&tree->lock);
  1169. return ret;
  1170. }
  1171. /* frees an entry in zswap */
  1172. static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset)
  1173. {
  1174. struct zswap_tree *tree = zswap_trees[type];
  1175. struct zswap_entry *entry;
  1176. /* find */
  1177. spin_lock(&tree->lock);
  1178. entry = zswap_rb_search(&tree->rbroot, offset);
  1179. if (!entry) {
  1180. /* entry was written back */
  1181. spin_unlock(&tree->lock);
  1182. return;
  1183. }
  1184. /* remove from rbtree */
  1185. zswap_rb_erase(&tree->rbroot, entry);
  1186. /* drop the initial reference from entry creation */
  1187. zswap_entry_put(tree, entry);
  1188. spin_unlock(&tree->lock);
  1189. }
  1190. /* frees all zswap entries for the given swap type */
  1191. static void zswap_frontswap_invalidate_area(unsigned type)
  1192. {
  1193. struct zswap_tree *tree = zswap_trees[type];
  1194. struct zswap_entry *entry, *n;
  1195. if (!tree)
  1196. return;
  1197. /* walk the tree and free everything */
  1198. spin_lock(&tree->lock);
  1199. rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode)
  1200. zswap_free_entry(entry);
  1201. tree->rbroot = RB_ROOT;
  1202. spin_unlock(&tree->lock);
  1203. kfree(tree);
  1204. zswap_trees[type] = NULL;
  1205. }
  1206. static void zswap_frontswap_init(unsigned type)
  1207. {
  1208. struct zswap_tree *tree;
  1209. tree = kzalloc(sizeof(*tree), GFP_KERNEL);
  1210. if (!tree) {
  1211. pr_err("alloc failed, zswap disabled for swap type %d\n", type);
  1212. return;
  1213. }
  1214. tree->rbroot = RB_ROOT;
  1215. spin_lock_init(&tree->lock);
  1216. zswap_trees[type] = tree;
  1217. }
  1218. static const struct frontswap_ops zswap_frontswap_ops = {
  1219. .store = zswap_frontswap_store,
  1220. .load = zswap_frontswap_load,
  1221. .invalidate_page = zswap_frontswap_invalidate_page,
  1222. .invalidate_area = zswap_frontswap_invalidate_area,
  1223. .init = zswap_frontswap_init
  1224. };
  1225. /*********************************
  1226. * debugfs functions
  1227. **********************************/
  1228. #ifdef CONFIG_DEBUG_FS
  1229. #include <linux/debugfs.h>
  1230. static struct dentry *zswap_debugfs_root;
  1231. static int __init zswap_debugfs_init(void)
  1232. {
  1233. if (!debugfs_initialized())
  1234. return -ENODEV;
  1235. zswap_debugfs_root = debugfs_create_dir("zswap", NULL);
  1236. debugfs_create_u64("pool_limit_hit", 0444,
  1237. zswap_debugfs_root, &zswap_pool_limit_hit);
  1238. debugfs_create_u64("reject_reclaim_fail", 0444,
  1239. zswap_debugfs_root, &zswap_reject_reclaim_fail);
  1240. debugfs_create_u64("reject_alloc_fail", 0444,
  1241. zswap_debugfs_root, &zswap_reject_alloc_fail);
  1242. debugfs_create_u64("reject_kmemcache_fail", 0444,
  1243. zswap_debugfs_root, &zswap_reject_kmemcache_fail);
  1244. debugfs_create_u64("reject_compress_poor", 0444,
  1245. zswap_debugfs_root, &zswap_reject_compress_poor);
  1246. debugfs_create_u64("written_back_pages", 0444,
  1247. zswap_debugfs_root, &zswap_written_back_pages);
  1248. debugfs_create_u64("duplicate_entry", 0444,
  1249. zswap_debugfs_root, &zswap_duplicate_entry);
  1250. debugfs_create_u64("pool_total_size", 0444,
  1251. zswap_debugfs_root, &zswap_pool_total_size);
  1252. debugfs_create_atomic_t("stored_pages", 0444,
  1253. zswap_debugfs_root, &zswap_stored_pages);
  1254. debugfs_create_atomic_t("same_filled_pages", 0444,
  1255. zswap_debugfs_root, &zswap_same_filled_pages);
  1256. return 0;
  1257. }
  1258. #else
  1259. static int __init zswap_debugfs_init(void)
  1260. {
  1261. return 0;
  1262. }
  1263. #endif
  1264. /*********************************
  1265. * module init and exit
  1266. **********************************/
  1267. static int __init init_zswap(void)
  1268. {
  1269. struct zswap_pool *pool;
  1270. int ret;
  1271. zswap_init_started = true;
  1272. if (zswap_entry_cache_create()) {
  1273. pr_err("entry cache creation failed\n");
  1274. goto cache_fail;
  1275. }
  1276. ret = cpuhp_setup_state(CPUHP_MM_ZSWP_MEM_PREPARE, "mm/zswap:prepare",
  1277. zswap_dstmem_prepare, zswap_dstmem_dead);
  1278. if (ret) {
  1279. pr_err("dstmem alloc failed\n");
  1280. goto dstmem_fail;
  1281. }
  1282. ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE,
  1283. "mm/zswap_pool:prepare",
  1284. zswap_cpu_comp_prepare,
  1285. zswap_cpu_comp_dead);
  1286. if (ret)
  1287. goto hp_fail;
  1288. pool = __zswap_pool_create_fallback();
  1289. if (pool) {
  1290. pr_info("loaded using pool %s/%s\n", pool->tfm_name,
  1291. zpool_get_type(pool->zpool));
  1292. list_add(&pool->list, &zswap_pools);
  1293. zswap_has_pool = true;
  1294. } else {
  1295. pr_err("pool creation failed\n");
  1296. zswap_enabled = false;
  1297. }
  1298. shrink_wq = create_workqueue("zswap-shrink");
  1299. if (!shrink_wq)
  1300. goto fallback_fail;
  1301. ret = frontswap_register_ops(&zswap_frontswap_ops);
  1302. if (ret)
  1303. goto destroy_wq;
  1304. if (zswap_debugfs_init())
  1305. pr_warn("debugfs initialization failed\n");
  1306. return 0;
  1307. destroy_wq:
  1308. destroy_workqueue(shrink_wq);
  1309. fallback_fail:
  1310. if (pool)
  1311. zswap_pool_destroy(pool);
  1312. hp_fail:
  1313. cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE);
  1314. dstmem_fail:
  1315. zswap_entry_cache_destroy();
  1316. cache_fail:
  1317. /* if built-in, we aren't unloaded on failure; don't allow use */
  1318. zswap_init_failed = true;
  1319. zswap_enabled = false;
  1320. return -ENOMEM;
  1321. }
  1322. /* must be late so crypto has time to come up */
  1323. late_initcall(init_zswap);
  1324. MODULE_LICENSE("GPL");
  1325. MODULE_AUTHOR("Seth Jennings <[email protected]>");
  1326. MODULE_DESCRIPTION("Compressed cache for swap pages");