rbincache.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863
  1. /*
  2. * linux/mm/rbincache.c
  3. *
  4. * A cleancache backend for fast ion allocation.
  5. * Cache management methods based on rbincache.
  6. * Copyright (C) 2019 Samsung Electronics
  7. *
  8. * With rbincache, active file pages can be backed up in memory during page
  9. * reclaiming. When their data is needed again the I/O reading operation is
  10. * avoided. Up to here it might seem as a waste of time and resource just to
  11. * copy file pages into cleancache area.
  12. *
  13. * However, since the cleancache API ensures the cache pages to be clean,
  14. * we can make use of clean file caches stored in one contiguous section.
  15. * By having two modes(1.cache_mode, 2.alloc_mode), rbincache normally
  16. * acts as a cleancache backbone, while providing a rapidly reclaimed
  17. * contiguous space when needed.
  18. *
  19. */
  20. #include <linux/atomic.h>
  21. #include <linux/cleancache.h>
  22. #include <linux/kernel.h>
  23. #include <linux/module.h>
  24. #include <linux/seq_file.h>
  25. #include <linux/types.h>
  26. #include <linux/mm.h>
  27. #include <linux/slab.h>
  28. #include <linux/swap.h>
  29. #include "rbinregion.h"
  30. /*
  31. * rbincache: a cleancache API implementation
  32. *
  33. * When a file page is passed from cleancache to rbincache, rbincache maintains
  34. * a mapping of the <filesystem_type, inode_number, page_index> to the
  35. * rr_handle that represents the backed up file page.
  36. * This mapping is achieved with a red-black tree per filesystem,
  37. * plus a radix tree per a red-black node.
  38. *
  39. * A rbincache pool is assigned its pool_id when a filesystem mounted
  40. * Each rbincache pool has a red-black tree, where the inode number(rb_index)
  41. * is the search key. In other words, each rb_node represents an inode.
  42. * Each rb_node has a radix tree which use page->index(ra_index) as the index.
  43. * Each radix tree slot points to the rr_handle.
  44. *
  45. * The implementation borrows pretty much concepts from Zcache.
  46. */
  47. /* statistics */
  48. atomic_t rbin_allocated_pages = ATOMIC_INIT(0);
  49. atomic_t rbin_cached_pages = ATOMIC_INIT(0);
  50. atomic_t rbin_free_pages = ATOMIC_INIT(0);
  51. atomic_t rbin_pool_pages = ATOMIC_INIT(0);
  52. static atomic_t rbin_zero_pages = ATOMIC_INIT(0);
  53. static atomic_t rc_num_rbnode = ATOMIC_INIT(0);
  54. static atomic_t rc_num_ra_entry = ATOMIC_INIT(0);
  55. static atomic_t rc_num_dup_handle = ATOMIC_INIT(0);
  56. static atomic_t rc_num_init_fs = ATOMIC_INIT(0);
  57. static atomic_t rc_num_init_shared_fs = ATOMIC_INIT(0);
  58. static atomic_t rc_num_gets = ATOMIC_INIT(0);
  59. static atomic_t rc_num_puts = ATOMIC_INIT(0);
  60. static atomic_t rc_num_flush_page = ATOMIC_INIT(0);
  61. static atomic_t rc_num_flush_inode = ATOMIC_INIT(0);
  62. static atomic_t rc_num_flush_fs = ATOMIC_INIT(0);
  63. static atomic_t rc_num_succ_init_fs = ATOMIC_INIT(0);
  64. static atomic_t rc_num_succ_gets = ATOMIC_INIT(0);
  65. static atomic_t rc_num_succ_puts = ATOMIC_INIT(0);
  66. static atomic_t rc_num_succ_flush_page = ATOMIC_INIT(0);
  67. static atomic_t rc_num_succ_flush_inode = ATOMIC_INIT(0);
  68. static atomic_t rc_num_succ_flush_fs = ATOMIC_INIT(0);
  69. /* statistics end */
  70. /* rbincache data structures */
  71. #define MAX_RC_POOLS 64
  72. #define ZERO_HANDLE ((void *)~(~0UL >> 1))
  73. /* One rbincache pool per filesystem mount instance */
  74. struct rc_pool {
  75. struct rb_root rbtree;
  76. rwlock_t rb_lock; /* Protects rbtree */
  77. };
  78. /* Manage all rbincache pools */
  79. struct rbincache {
  80. struct rc_pool *pools[MAX_RC_POOLS];
  81. u32 num_pools; /* current number of rbincache pools */
  82. spinlock_t pool_lock; /* Protects pools[] and num_pools */
  83. };
  84. struct rbincache rbincache;
  85. /*
  86. * Redblack tree node, each node has a page index radix-tree.
  87. * Indexed by inode nubmer.
  88. */
  89. struct rc_rbnode {
  90. struct rb_node rb_node;
  91. int rb_index;
  92. struct radix_tree_root ratree; /* Page radix tree per inode rbtree */
  93. spinlock_t ra_lock; /* Protects radix tree */
  94. struct kref refcount;
  95. };
  96. /* rbincache data structures end */
  97. /* rbincache slab allocation */
  98. static struct kmem_cache *rc_rbnode_cache;
  99. static int rc_rbnode_cache_create(void)
  100. {
  101. rc_rbnode_cache = KMEM_CACHE(rc_rbnode, 0);
  102. return rc_rbnode_cache == NULL;
  103. }
  104. static void rc_rbnode_cache_destroy(void)
  105. {
  106. kmem_cache_destroy(rc_rbnode_cache);
  107. }
  108. /* rbincache slab allocation end */
  109. /* rbincache rb_tree & ra_tree helper functions */
  110. /**
  111. * radix_tree_gang_lookup_index - perform multiple lookup on a radix tree
  112. * @root: radix tree root
  113. * @results: where the results of the lookup are placed
  114. * @indices: where their indices should be placed
  115. * @first_index: start the lookup from this key
  116. * @max_items: place up to this many items at *results
  117. *
  118. * Performs an index-ascending scan of the tree for present items. Places
  119. * them at *@results and returns the number of items which were placed at
  120. * *@results. The indices are placed in @indices.
  121. *
  122. * The implementation is naive.
  123. *
  124. * Just one difference from radix_tree_gang_lookup, the indices are also
  125. * collected along with the results of lookup.
  126. */
  127. static unsigned int
  128. radix_tree_gang_lookup_index(const struct radix_tree_root *root, void **results,
  129. unsigned long *indices, unsigned long first_index,
  130. unsigned int max_items)
  131. {
  132. struct radix_tree_iter iter;
  133. void **slot;
  134. unsigned int ret = 0;
  135. if (unlikely(!max_items))
  136. return 0;
  137. radix_tree_for_each_slot(slot, root, &iter, first_index) {
  138. results[ret] = rcu_dereference_raw(*slot);
  139. if (!results[ret])
  140. continue;
  141. if (radix_tree_is_internal_node(results[ret])) {
  142. slot = radix_tree_iter_retry(&iter);
  143. continue;
  144. }
  145. if (indices)
  146. indices[ret] = iter.index;
  147. if (++ret == max_items)
  148. break;
  149. }
  150. return ret;
  151. }
  152. /*
  153. * The caller should hold rb_lock
  154. */
  155. static struct rc_rbnode *rc_find_rbnode(struct rb_root *rbtree, int index,
  156. struct rb_node **rb_parent,
  157. struct rb_node ***rb_link)
  158. {
  159. struct rc_rbnode *entry;
  160. struct rb_node **__rb_link, *__rb_parent, *rb_prev;
  161. __rb_link = &rbtree->rb_node;
  162. rb_prev = __rb_parent = NULL;
  163. while (*__rb_link) {
  164. __rb_parent = *__rb_link;
  165. entry = rb_entry(__rb_parent, struct rc_rbnode, rb_node);
  166. if (entry->rb_index > index)
  167. __rb_link = &__rb_parent->rb_left;
  168. else if (entry->rb_index < index) {
  169. rb_prev = __rb_parent;
  170. __rb_link = &__rb_parent->rb_right;
  171. } else
  172. return entry;
  173. }
  174. if (rb_parent)
  175. *rb_parent = __rb_parent;
  176. if (rb_link)
  177. *rb_link = __rb_link;
  178. return NULL;
  179. }
  180. static struct rc_rbnode *rc_find_get_rbnode(struct rc_pool *rcpool,
  181. int rb_index)
  182. {
  183. unsigned long flags;
  184. struct rc_rbnode *rbnode;
  185. read_lock_irqsave(&rcpool->rb_lock, flags);
  186. rbnode = rc_find_rbnode(&rcpool->rbtree, rb_index, 0, 0);
  187. if (rbnode)
  188. kref_get(&rbnode->refcount);
  189. read_unlock_irqrestore(&rcpool->rb_lock, flags);
  190. return rbnode;
  191. }
  192. /*
  193. * kref_put callback for rc_rbnode.
  194. * The rbnode must have been isolated from rbtree already.
  195. * Called when rbnode has 0 references.
  196. */
  197. static void rc_rbnode_release(struct kref *kref)
  198. {
  199. struct rc_rbnode *rbnode;
  200. rbnode = container_of(kref, struct rc_rbnode, refcount);
  201. BUG_ON(rbnode->ratree.xa_head);
  202. kmem_cache_free(rc_rbnode_cache, rbnode);
  203. atomic_dec(&rc_num_rbnode);
  204. }
  205. /*
  206. * Check whether the radix-tree of this rbnode is empty.
  207. * If that's true, then we can delete this rc_rbnode from
  208. * rc_pool->rbtree
  209. *
  210. * Caller must hold rc_rbnode->ra_lock
  211. */
  212. static inline int rc_rbnode_empty(struct rc_rbnode *rbnode)
  213. {
  214. return rbnode->ratree.xa_head == NULL;
  215. }
  216. /*
  217. * Remove rc_rbnode from rcpool->rbtree
  218. */
  219. static void rc_rbnode_isolate(struct rc_pool *rcpool, struct rc_rbnode *rbnode)
  220. {
  221. /*
  222. * Someone can get reference on this rbnode before we could
  223. * acquire write lock above.
  224. * We want to remove it from rcpool->rbtree when only the caller and
  225. * corresponding ratree holds a reference to this rbnode.
  226. * Below check ensures that a racing rc put will not end up adding
  227. * a page to an isolated node and thereby losing that memory.
  228. */
  229. if (atomic_read(&rbnode->refcount.refcount.refs) == 2) {
  230. rb_erase(&rbnode->rb_node, &rcpool->rbtree);
  231. RB_CLEAR_NODE(&rbnode->rb_node);
  232. kref_put(&rbnode->refcount, rc_rbnode_release);
  233. } else {
  234. pr_err("rbincache: unabled to erase rbnode : refcount=%d\n",
  235. atomic_read(&rbnode->refcount.refcount.refs));
  236. }
  237. }
  238. static int rc_store_handle(int pool_id, int rb_index, int ra_index, void *handle)
  239. {
  240. unsigned long flags;
  241. struct rc_pool *rcpool;
  242. struct rc_rbnode *rbnode, *dup_rbnode;
  243. struct rb_node **link = NULL, *parent = NULL;
  244. int ret = 0;
  245. void *dup_handle;
  246. rcpool = rbincache.pools[pool_id];
  247. rbnode = rc_find_get_rbnode(rcpool, rb_index);
  248. if (!rbnode) {
  249. /* create, init, and get(inc refcount) a new rbnode */
  250. rbnode = kmem_cache_alloc(rc_rbnode_cache, 0);
  251. if (!rbnode)
  252. return -ENOMEM;
  253. atomic_inc(&rc_num_rbnode);
  254. INIT_RADIX_TREE(&rbnode->ratree, GFP_ATOMIC|__GFP_NOWARN);
  255. spin_lock_init(&rbnode->ra_lock);
  256. rbnode->rb_index = rb_index;
  257. kref_init(&rbnode->refcount);
  258. RB_CLEAR_NODE(&rbnode->rb_node);
  259. /* add that rbnode to rbtree */
  260. write_lock_irqsave(&rcpool->rb_lock, flags);
  261. dup_rbnode = rc_find_rbnode(&rcpool->rbtree, rb_index,
  262. &parent, &link);
  263. if (dup_rbnode) {
  264. /* somebody else allocated new rbnode */
  265. kmem_cache_free(rc_rbnode_cache, rbnode);
  266. atomic_dec(&rc_num_rbnode);
  267. rbnode = dup_rbnode;
  268. } else {
  269. rb_link_node(&rbnode->rb_node, parent, link);
  270. rb_insert_color(&rbnode->rb_node, &rcpool->rbtree);
  271. }
  272. /* Inc the reference of this rc_rbnode */
  273. kref_get(&rbnode->refcount);
  274. write_unlock_irqrestore(&rcpool->rb_lock, flags);
  275. }
  276. /* Succfully got a rc_rbnode when arriving here */
  277. spin_lock_irqsave(&rbnode->ra_lock, flags);
  278. dup_handle = radix_tree_delete(&rbnode->ratree, ra_index);
  279. if (unlikely(dup_handle)) {
  280. atomic_inc(&rc_num_dup_handle);
  281. if (dup_handle == ZERO_HANDLE)
  282. atomic_dec(&rbin_zero_pages);
  283. else
  284. region_flush_cache(dup_handle);
  285. }
  286. ret = radix_tree_insert(&rbnode->ratree, ra_index, (void *)handle);
  287. spin_unlock_irqrestore(&rbnode->ra_lock, flags);
  288. if (unlikely(ret)) {
  289. // insert failed
  290. write_lock_irqsave(&rcpool->rb_lock, flags);
  291. spin_lock(&rbnode->ra_lock);
  292. if (rc_rbnode_empty(rbnode))
  293. rc_rbnode_isolate(rcpool, rbnode);
  294. spin_unlock(&rbnode->ra_lock);
  295. write_unlock_irqrestore(&rcpool->rb_lock, flags);
  296. } else {
  297. atomic_inc(&rc_num_ra_entry);
  298. }
  299. kref_put(&rbnode->refcount, rc_rbnode_release);
  300. return ret;
  301. }
  302. /*
  303. * Load handle and delete it from radix tree.
  304. * If the radix tree of the corresponding rbnode is empty, delete the rbnode
  305. * from rcpool->rbtree also.
  306. */
  307. static struct rr_handle *rc_load_del_handle(int pool_id,
  308. int rb_index, int ra_index)
  309. {
  310. struct rc_pool *rcpool;
  311. struct rc_rbnode *rbnode;
  312. struct rr_handle *handle = NULL;
  313. unsigned long flags;
  314. rcpool = rbincache.pools[pool_id];
  315. rbnode = rc_find_get_rbnode(rcpool, rb_index);
  316. if (!rbnode)
  317. goto out;
  318. BUG_ON(rbnode->rb_index != rb_index);
  319. spin_lock_irqsave(&rbnode->ra_lock, flags);
  320. handle = radix_tree_delete(&rbnode->ratree, ra_index);
  321. spin_unlock_irqrestore(&rbnode->ra_lock, flags);
  322. if (!handle)
  323. goto no_handle;
  324. else if (handle == ZERO_HANDLE)
  325. atomic_dec(&rbin_zero_pages);
  326. atomic_dec(&rc_num_ra_entry);
  327. /* rb_lock and ra_lock must be taken again in the given sequence */
  328. write_lock_irqsave(&rcpool->rb_lock, flags);
  329. spin_lock(&rbnode->ra_lock);
  330. if (rc_rbnode_empty(rbnode))
  331. rc_rbnode_isolate(rcpool, rbnode);
  332. spin_unlock(&rbnode->ra_lock);
  333. write_unlock_irqrestore(&rcpool->rb_lock, flags);
  334. no_handle:
  335. kref_put(&rbnode->refcount, rc_rbnode_release);
  336. out:
  337. return handle;
  338. }
  339. /* rbincache rb_tree & ra_tree helper functions */
  340. /* Cleancache API implementation start */
  341. static bool is_zero_page(struct page *page)
  342. {
  343. unsigned long *ptr = kmap_atomic(page);
  344. int i;
  345. bool ret = false;
  346. for (i = 0; i < PAGE_SIZE / sizeof(*ptr); i++) {
  347. if (ptr[i])
  348. goto out;
  349. }
  350. ret = true;
  351. out:
  352. kunmap_atomic(ptr);
  353. return ret;
  354. }
  355. /* mem_boost throttles only kswapd's behavior */
  356. enum refill_mode_state {
  357. ALLOW_REFILL,
  358. BLOCK_REFILL = 1
  359. };
  360. static int refill_mode = ALLOW_REFILL;
  361. static unsigned long last_mode_change;
  362. #define BLOCK_REFILL_MAX_TIME (5 * HZ) /* 5 sec */
  363. inline bool is_refill_blocked(void)
  364. {
  365. if (time_after(jiffies, last_mode_change + BLOCK_REFILL_MAX_TIME))
  366. refill_mode = ALLOW_REFILL;
  367. if (refill_mode >= BLOCK_REFILL)
  368. return true;
  369. else
  370. return false;
  371. }
  372. static ssize_t refill_mode_show(struct kobject *kobj,
  373. struct kobj_attribute *attr, char *buf)
  374. {
  375. if (time_after(jiffies, last_mode_change + BLOCK_REFILL_MAX_TIME))
  376. refill_mode = ALLOW_REFILL;
  377. return sprintf(buf, "%d\n", refill_mode);
  378. }
  379. static ssize_t refill_mode_store(struct kobject *kobj,
  380. struct kobj_attribute *attr,
  381. const char *buf, size_t count)
  382. {
  383. int mode;
  384. int err;
  385. err = kstrtoint(buf, 10, &mode);
  386. if (err || mode > BLOCK_REFILL || mode < ALLOW_REFILL)
  387. return -EINVAL;
  388. refill_mode = mode;
  389. last_mode_change = jiffies;
  390. return count;
  391. }
  392. /*
  393. * function for cleancache_ops->put_page
  394. * Though it might fail, it does not matter since Cleancache does not
  395. * guarantee the stored pages to be preserved.
  396. */
  397. static void rc_store_page(int pool_id, struct cleancache_filekey key,
  398. pgoff_t index, struct page *src)
  399. {
  400. struct rr_handle *handle;
  401. int ret;
  402. bool zero;
  403. if (!current_is_kswapd())
  404. return;
  405. if (is_refill_blocked())
  406. return;
  407. atomic_inc(&rc_num_puts);
  408. zero = is_zero_page(src);
  409. if (zero) {
  410. handle = (struct rr_handle *)ZERO_HANDLE;
  411. goto out_zero;
  412. }
  413. handle = region_store_cache(src, pool_id, key.u.ino, index);
  414. if (!handle)
  415. return;
  416. out_zero:
  417. ret = rc_store_handle(pool_id, key.u.ino, index, handle);
  418. if (ret) { // failed
  419. if (!zero)
  420. region_flush_cache(handle);
  421. return;
  422. }
  423. /* update stats */
  424. atomic_inc(&rc_num_succ_puts);
  425. if (zero)
  426. atomic_inc(&rbin_zero_pages);
  427. }
  428. /*
  429. * function for cleancache_ops->get_page
  430. */
  431. static int rc_load_page(int pool_id, struct cleancache_filekey key,
  432. pgoff_t index, struct page *dst)
  433. {
  434. struct rr_handle *handle;
  435. int ret = -EINVAL;
  436. void *addr;
  437. atomic_inc(&rc_num_gets);
  438. handle = rc_load_del_handle(pool_id, key.u.ino, index);
  439. if (!handle)
  440. goto out;
  441. if (handle == ZERO_HANDLE) {
  442. addr = kmap_atomic(dst);
  443. memset(addr, 0, PAGE_SIZE);
  444. kunmap_atomic(addr);
  445. flush_dcache_page(dst);
  446. ret = 0;
  447. } else {
  448. ret = region_load_cache(handle, dst, pool_id, key.u.ino, index);
  449. }
  450. if (ret)
  451. goto out;
  452. /* update stats */
  453. atomic_inc(&rc_num_succ_gets);
  454. out:
  455. return ret;
  456. }
  457. static void rc_flush_page(int pool_id, struct cleancache_filekey key,
  458. pgoff_t index)
  459. {
  460. struct rr_handle *handle;
  461. atomic_inc(&rc_num_flush_page);
  462. handle = rc_load_del_handle(pool_id, key.u.ino, index);
  463. if (!handle)
  464. return;
  465. if (handle != ZERO_HANDLE)
  466. region_flush_cache(handle);
  467. atomic_inc(&rc_num_succ_flush_page);
  468. }
  469. #define FREE_BATCH 16
  470. /*
  471. * Callers must hold the lock
  472. */
  473. static int rc_flush_ratree(struct rc_pool *rcpool, struct rc_rbnode *rbnode)
  474. {
  475. unsigned long index = 0;
  476. int count, i, total_count = 0;
  477. struct rr_handle *handle;
  478. void *results[FREE_BATCH];
  479. unsigned long indices[FREE_BATCH];
  480. do {
  481. count = radix_tree_gang_lookup_index(&rbnode->ratree,
  482. (void **)results, indices, index, FREE_BATCH);
  483. for (i = 0; i < count; i++) {
  484. if (results[i] == ZERO_HANDLE) {
  485. handle = radix_tree_delete(&rbnode->ratree,
  486. indices[i]);
  487. if (handle)
  488. atomic_dec(&rbin_zero_pages);
  489. continue;
  490. }
  491. handle = (struct rr_handle *)results[i];
  492. index = handle->ra_index;
  493. handle = radix_tree_delete(&rbnode->ratree, index);
  494. if (!handle)
  495. continue;
  496. atomic_dec(&rc_num_ra_entry);
  497. total_count++;
  498. region_flush_cache(handle);
  499. }
  500. index++;
  501. } while (count == FREE_BATCH);
  502. return total_count;
  503. }
  504. static void rc_flush_inode(int pool_id, struct cleancache_filekey key)
  505. {
  506. struct rc_rbnode *rbnode;
  507. unsigned long flags1, flags2;
  508. struct rc_pool *rcpool = rbincache.pools[pool_id];
  509. int pages_flushed;
  510. atomic_inc(&rc_num_flush_inode);
  511. /*
  512. * Refuse new pages added in to the same rbnode, so get rb_lock at
  513. * first.
  514. */
  515. write_lock_irqsave(&rcpool->rb_lock, flags1);
  516. rbnode = rc_find_rbnode(&rcpool->rbtree, key.u.ino, 0, 0);
  517. if (!rbnode) {
  518. write_unlock_irqrestore(&rcpool->rb_lock, flags1);
  519. return;
  520. }
  521. kref_get(&rbnode->refcount);
  522. spin_lock_irqsave(&rbnode->ra_lock, flags2);
  523. pages_flushed = rc_flush_ratree(rcpool, rbnode);
  524. if (rc_rbnode_empty(rbnode))
  525. /* When arrvied here, we already hold rb_lock */
  526. rc_rbnode_isolate(rcpool, rbnode);
  527. spin_unlock_irqrestore(&rbnode->ra_lock, flags2);
  528. kref_put(&rbnode->refcount, rc_rbnode_release);
  529. write_unlock_irqrestore(&rcpool->rb_lock, flags1);
  530. atomic_inc(&rc_num_succ_flush_inode);
  531. }
  532. static void rc_flush_fs(int pool_id)
  533. {
  534. struct rc_rbnode *rbnode = NULL;
  535. struct rb_node *node;
  536. unsigned long flags1, flags2;
  537. struct rc_pool *rcpool;
  538. int pages_flushed = 0;
  539. atomic_inc(&rc_num_flush_fs);
  540. if (pool_id < 0)
  541. return;
  542. rcpool = rbincache.pools[pool_id];
  543. if (!rcpool)
  544. return;
  545. /*
  546. * Refuse new pages added in, so get rb_lock at first.
  547. */
  548. write_lock_irqsave(&rcpool->rb_lock, flags1);
  549. node = rb_first(&rcpool->rbtree);
  550. while (node) {
  551. rbnode = rb_entry(node, struct rc_rbnode, rb_node);
  552. node = rb_next(node);
  553. if (rbnode) {
  554. kref_get(&rbnode->refcount);
  555. spin_lock_irqsave(&rbnode->ra_lock, flags2);
  556. pages_flushed += rc_flush_ratree(rcpool, rbnode);
  557. if (rc_rbnode_empty(rbnode))
  558. rc_rbnode_isolate(rcpool, rbnode);
  559. spin_unlock_irqrestore(&rbnode->ra_lock, flags2);
  560. kref_put(&rbnode->refcount, rc_rbnode_release);
  561. }
  562. }
  563. write_unlock_irqrestore(&rcpool->rb_lock, flags1);
  564. atomic_inc(&rc_num_succ_flush_fs);
  565. }
  566. static int rc_init_fs(size_t pagesize)
  567. {
  568. struct rc_pool *rcpool;
  569. int ret = -1;
  570. // init does not check rbincache_disabled. Even disabled, continue init.
  571. atomic_inc(&rc_num_init_fs);
  572. if (pagesize != PAGE_SIZE) {
  573. pr_warn("Unsupported page size: %zu", pagesize);
  574. ret = -EINVAL;
  575. goto out;
  576. }
  577. rcpool = kzalloc(sizeof(*rcpool), GFP_KERNEL);
  578. if (!rcpool) {
  579. ret = -ENOMEM;
  580. goto out;
  581. }
  582. spin_lock(&rbincache.pool_lock);
  583. if (rbincache.num_pools == MAX_RC_POOLS) {
  584. pr_err("Cannot create new pool (limit:%u)\n", MAX_RC_POOLS);
  585. ret = -EPERM;
  586. goto out_unlock;
  587. }
  588. rwlock_init(&rcpool->rb_lock);
  589. rcpool->rbtree = RB_ROOT;
  590. /* Add to pool list */
  591. for (ret = 0; ret < MAX_RC_POOLS; ret++)
  592. if (!rbincache.pools[ret])
  593. break;
  594. if (ret == MAX_RC_POOLS) {
  595. ret = -ENOMEM;
  596. goto out_unlock;
  597. }
  598. rbincache.pools[ret] = rcpool;
  599. rbincache.num_pools++;
  600. pr_info("New pool created id:%d\n", ret);
  601. atomic_inc(&rc_num_succ_init_fs);
  602. out_unlock:
  603. spin_unlock(&rbincache.pool_lock);
  604. if (ret < 0)
  605. kfree(rcpool);
  606. out:
  607. return ret;
  608. }
  609. static int rc_init_shared_fs(uuid_t *uuid, size_t pagesize)
  610. {
  611. atomic_inc(&rc_num_init_shared_fs);
  612. return rc_init_fs(pagesize);
  613. }
  614. static const struct cleancache_ops rbincache_ops = {
  615. .init_fs = rc_init_fs,
  616. .init_shared_fs = rc_init_shared_fs,
  617. .get_page = rc_load_page,
  618. .put_page = rc_store_page,
  619. .invalidate_page = rc_flush_page,
  620. .invalidate_inode = rc_flush_inode,
  621. .invalidate_fs = rc_flush_fs,
  622. };
  623. /* Cleancache API implementation end */
  624. static void rc_evict_cb(unsigned long raw_handle)
  625. {
  626. struct rr_handle *h = (struct rr_handle *)raw_handle;
  627. /* try find and remove entry if exists */
  628. rc_load_del_handle(h->pool_id, h->rb_index, h->ra_index);
  629. }
  630. static struct region_ops rc_region_ops = {
  631. .evict = rc_evict_cb
  632. };
  633. /* Statistics */
  634. static ssize_t stats_show(struct kobject *kobj,
  635. struct kobj_attribute *attr, char *buf)
  636. {
  637. return sprintf(buf,
  638. "allocated_pages : %8d\n"
  639. "cached_pages : %8d\n"
  640. "pool_pages : %8d\n"
  641. "zero_pages : %8d\n"
  642. "num_dup_handle : %8d\n"
  643. "init_fs : %8d\n"
  644. "init_shared_fs : %8d\n"
  645. "puts : %8d\n"
  646. "gets : %8d\n"
  647. "flush_page : %8d\n"
  648. "flush_inode : %8d\n"
  649. "flush_fs : %8d\n"
  650. "succ_init_fs : %8d\n"
  651. "succ_puts : %8d\n"
  652. "succ_gets : %8d\n"
  653. "succ_flush_page : %8d\n"
  654. "succ_flush_inode : %8d\n"
  655. "succ_flush_fs : %8d\n",
  656. atomic_read(&rbin_allocated_pages),
  657. atomic_read(&rbin_cached_pages),
  658. atomic_read(&rbin_pool_pages),
  659. atomic_read(&rbin_zero_pages),
  660. atomic_read(&rc_num_dup_handle),
  661. atomic_read(&rc_num_init_fs),
  662. atomic_read(&rc_num_init_shared_fs),
  663. atomic_read(&rc_num_puts),
  664. atomic_read(&rc_num_gets),
  665. atomic_read(&rc_num_flush_page),
  666. atomic_read(&rc_num_flush_inode),
  667. atomic_read(&rc_num_flush_fs),
  668. atomic_read(&rc_num_succ_init_fs),
  669. atomic_read(&rc_num_succ_puts),
  670. atomic_read(&rc_num_succ_gets),
  671. atomic_read(&rc_num_succ_flush_page),
  672. atomic_read(&rc_num_succ_flush_inode),
  673. atomic_read(&rc_num_succ_flush_fs));
  674. }
  675. /* mem_boost throttles only kswapd's behavior */
  676. enum mem_boost {
  677. NO_BOOST,
  678. BOOST_MID = 1,
  679. BOOST_HIGH = 2,
  680. BOOST_KILL = 3,
  681. };
  682. static int mem_boost_mode = NO_BOOST;
  683. static unsigned long last_mode_change;
  684. #define MEM_BOOST_MAX_TIME (5 * HZ) /* 5 sec */
  685. static ssize_t mem_boost_mode_show(struct kobject *kobj,
  686. struct kobj_attribute *attr, char *buf)
  687. {
  688. if (time_after(jiffies, last_mode_change + MEM_BOOST_MAX_TIME))
  689. mem_boost_mode = NO_BOOST;
  690. return sprintf(buf, "%d\n", mem_boost_mode);
  691. }
  692. static ssize_t mem_boost_mode_store(struct kobject *kobj,
  693. struct kobj_attribute *attr,
  694. const char *buf, size_t count)
  695. {
  696. int mode;
  697. int err;
  698. err = kstrtoint(buf, 10, &mode);
  699. if (err || mode > BOOST_KILL || mode < NO_BOOST)
  700. return -EINVAL;
  701. mem_boost_mode = mode;
  702. last_mode_change = jiffies;
  703. if (mem_boost_mode >= BOOST_HIGH)
  704. wake_dmabuf_rbin_heap_prereclaim();
  705. return count;
  706. }
  707. static struct kobj_attribute stats_attr = __ATTR_RO(stats);
  708. static struct kobj_attribute mem_boost_mode_attr = __ATTR_RW(mem_boost_mode);
  709. static struct kobj_attribute refill_mode_attr = __ATTR_RW(refill_mode);
  710. static struct attribute *rbincache_attrs[] = {
  711. &stats_attr.attr,
  712. &mem_boost_mode_attr.attr,
  713. &refill_mode_attr.attr,
  714. NULL,
  715. };
  716. ATTRIBUTE_GROUPS(rbincache);
  717. int init_rbincache(unsigned long pfn, unsigned long nr_pages)
  718. {
  719. int err = 0;
  720. init_region(pfn, nr_pages, &rc_region_ops);
  721. err = rc_rbnode_cache_create();
  722. if (err) {
  723. pr_err("entry cache creation failed\n");
  724. goto error;
  725. }
  726. spin_lock_init(&rbincache.pool_lock);
  727. err = cleancache_register_ops(&rbincache_ops);
  728. if (err) {
  729. pr_err("failed to register cleancache_ops: %d\n", err);
  730. goto register_fail;
  731. }
  732. err = sysfs_create_groups(rbin_kobject, rbincache_groups);
  733. if (err) {
  734. kobject_put(rbin_kobject);
  735. pr_warn("sysfs initialization failed\n");
  736. }
  737. pr_info("cleancache enabled for rbin cleancache\n");
  738. return 0;
  739. register_fail:
  740. rc_rbnode_cache_destroy();
  741. error:
  742. return err;
  743. }
  744. MODULE_LICENSE("GPL");
  745. MODULE_DESCRIPTION("RBIN Cleancache");