extent_cache.c 30 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * f2fs extent cache support
  4. *
  5. * Copyright (c) 2015 Motorola Mobility
  6. * Copyright (c) 2015 Samsung Electronics
  7. * Authors: Jaegeuk Kim <[email protected]>
  8. * Chao Yu <[email protected]>
  9. *
  10. * block_age-based extent cache added by:
  11. * Copyright (c) 2022 xiaomi Co., Ltd.
  12. * http://www.xiaomi.com/
  13. */
  14. #include <linux/fs.h>
  15. #include <linux/f2fs_fs.h>
  16. #include "f2fs.h"
  17. #include "node.h"
  18. #include <trace/events/f2fs.h>
  19. bool sanity_check_extent_cache(struct inode *inode)
  20. {
  21. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  22. struct f2fs_inode_info *fi = F2FS_I(inode);
  23. struct extent_tree *et = fi->extent_tree[EX_READ];
  24. struct extent_info *ei;
  25. if (!et)
  26. return true;
  27. ei = &et->largest;
  28. if (!ei->len)
  29. return true;
  30. /* Let's drop, if checkpoint got corrupted. */
  31. if (is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) {
  32. ei->len = 0;
  33. et->largest_updated = true;
  34. return true;
  35. }
  36. if (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE) ||
  37. !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
  38. DATA_GENERIC_ENHANCE)) {
  39. set_sbi_flag(sbi, SBI_NEED_FSCK);
  40. f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
  41. __func__, inode->i_ino,
  42. ei->blk, ei->fofs, ei->len);
  43. return false;
  44. }
  45. return true;
  46. }
  47. static void __set_extent_info(struct extent_info *ei,
  48. unsigned int fofs, unsigned int len,
  49. block_t blk, bool keep_clen,
  50. unsigned long age, unsigned long last_blocks,
  51. enum extent_type type)
  52. {
  53. ei->fofs = fofs;
  54. ei->len = len;
  55. if (type == EX_READ) {
  56. ei->blk = blk;
  57. if (keep_clen)
  58. return;
  59. #ifdef CONFIG_F2FS_FS_COMPRESSION
  60. ei->c_len = 0;
  61. #endif
  62. } else if (type == EX_BLOCK_AGE) {
  63. ei->age = age;
  64. ei->last_blocks = last_blocks;
  65. }
  66. }
  67. static bool __init_may_extent_tree(struct inode *inode, enum extent_type type)
  68. {
  69. if (type == EX_READ)
  70. return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) &&
  71. S_ISREG(inode->i_mode);
  72. if (type == EX_BLOCK_AGE)
  73. return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) &&
  74. (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode));
  75. return false;
  76. }
  77. static bool __may_extent_tree(struct inode *inode, enum extent_type type)
  78. {
  79. /*
  80. * for recovered files during mount do not create extents
  81. * if shrinker is not registered.
  82. */
  83. if (list_empty(&F2FS_I_SB(inode)->s_list))
  84. return false;
  85. if (!__init_may_extent_tree(inode, type))
  86. return false;
  87. if (type == EX_READ) {
  88. if (is_inode_flag_set(inode, FI_NO_EXTENT))
  89. return false;
  90. if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
  91. !f2fs_sb_has_readonly(F2FS_I_SB(inode)))
  92. return false;
  93. } else if (type == EX_BLOCK_AGE) {
  94. if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
  95. return false;
  96. if (file_is_cold(inode))
  97. return false;
  98. }
  99. return true;
  100. }
  101. static void __try_update_largest_extent(struct extent_tree *et,
  102. struct extent_node *en)
  103. {
  104. if (et->type != EX_READ)
  105. return;
  106. if (en->ei.len <= et->largest.len)
  107. return;
  108. et->largest = en->ei;
  109. et->largest_updated = true;
  110. }
  111. static bool __is_extent_mergeable(struct extent_info *back,
  112. struct extent_info *front, enum extent_type type)
  113. {
  114. if (type == EX_READ) {
  115. #ifdef CONFIG_F2FS_FS_COMPRESSION
  116. if (back->c_len && back->len != back->c_len)
  117. return false;
  118. if (front->c_len && front->len != front->c_len)
  119. return false;
  120. #endif
  121. return (back->fofs + back->len == front->fofs &&
  122. back->blk + back->len == front->blk);
  123. } else if (type == EX_BLOCK_AGE) {
  124. return (back->fofs + back->len == front->fofs &&
  125. abs(back->age - front->age) <= SAME_AGE_REGION &&
  126. abs(back->last_blocks - front->last_blocks) <=
  127. SAME_AGE_REGION);
  128. }
  129. return false;
  130. }
  131. static bool __is_back_mergeable(struct extent_info *cur,
  132. struct extent_info *back, enum extent_type type)
  133. {
  134. return __is_extent_mergeable(back, cur, type);
  135. }
  136. static bool __is_front_mergeable(struct extent_info *cur,
  137. struct extent_info *front, enum extent_type type)
  138. {
  139. return __is_extent_mergeable(cur, front, type);
  140. }
  141. static struct extent_node *__lookup_extent_node(struct rb_root_cached *root,
  142. struct extent_node *cached_en, unsigned int fofs)
  143. {
  144. struct rb_node *node = root->rb_root.rb_node;
  145. struct extent_node *en;
  146. /* check a cached entry */
  147. if (cached_en && cached_en->ei.fofs <= fofs &&
  148. cached_en->ei.fofs + cached_en->ei.len > fofs)
  149. return cached_en;
  150. /* check rb_tree */
  151. while (node) {
  152. en = rb_entry(node, struct extent_node, rb_node);
  153. if (fofs < en->ei.fofs)
  154. node = node->rb_left;
  155. else if (fofs >= en->ei.fofs + en->ei.len)
  156. node = node->rb_right;
  157. else
  158. return en;
  159. }
  160. return NULL;
  161. }
  162. /*
  163. * lookup rb entry in position of @fofs in rb-tree,
  164. * if hit, return the entry, otherwise, return NULL
  165. * @prev_ex: extent before fofs
  166. * @next_ex: extent after fofs
  167. * @insert_p: insert point for new extent at fofs
  168. * in order to simplify the insertion after.
  169. * tree must stay unchanged between lookup and insertion.
  170. */
  171. static struct extent_node *__lookup_extent_node_ret(struct rb_root_cached *root,
  172. struct extent_node *cached_en,
  173. unsigned int fofs,
  174. struct extent_node **prev_entry,
  175. struct extent_node **next_entry,
  176. struct rb_node ***insert_p,
  177. struct rb_node **insert_parent,
  178. bool *leftmost)
  179. {
  180. struct rb_node **pnode = &root->rb_root.rb_node;
  181. struct rb_node *parent = NULL, *tmp_node;
  182. struct extent_node *en = cached_en;
  183. *insert_p = NULL;
  184. *insert_parent = NULL;
  185. *prev_entry = NULL;
  186. *next_entry = NULL;
  187. if (RB_EMPTY_ROOT(&root->rb_root))
  188. return NULL;
  189. if (en && en->ei.fofs <= fofs && en->ei.fofs + en->ei.len > fofs)
  190. goto lookup_neighbors;
  191. *leftmost = true;
  192. while (*pnode) {
  193. parent = *pnode;
  194. en = rb_entry(*pnode, struct extent_node, rb_node);
  195. if (fofs < en->ei.fofs) {
  196. pnode = &(*pnode)->rb_left;
  197. } else if (fofs >= en->ei.fofs + en->ei.len) {
  198. pnode = &(*pnode)->rb_right;
  199. *leftmost = false;
  200. } else {
  201. goto lookup_neighbors;
  202. }
  203. }
  204. *insert_p = pnode;
  205. *insert_parent = parent;
  206. en = rb_entry(parent, struct extent_node, rb_node);
  207. tmp_node = parent;
  208. if (parent && fofs > en->ei.fofs)
  209. tmp_node = rb_next(parent);
  210. *next_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
  211. tmp_node = parent;
  212. if (parent && fofs < en->ei.fofs)
  213. tmp_node = rb_prev(parent);
  214. *prev_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
  215. return NULL;
  216. lookup_neighbors:
  217. if (fofs == en->ei.fofs) {
  218. /* lookup prev node for merging backward later */
  219. tmp_node = rb_prev(&en->rb_node);
  220. *prev_entry = rb_entry_safe(tmp_node,
  221. struct extent_node, rb_node);
  222. }
  223. if (fofs == en->ei.fofs + en->ei.len - 1) {
  224. /* lookup next node for merging frontward later */
  225. tmp_node = rb_next(&en->rb_node);
  226. *next_entry = rb_entry_safe(tmp_node,
  227. struct extent_node, rb_node);
  228. }
  229. return en;
  230. }
  231. static struct kmem_cache *extent_tree_slab;
  232. static struct kmem_cache *extent_node_slab;
  233. static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
  234. struct extent_tree *et, struct extent_info *ei,
  235. struct rb_node *parent, struct rb_node **p,
  236. bool leftmost)
  237. {
  238. struct extent_tree_info *eti = &sbi->extent_tree[et->type];
  239. struct extent_node *en;
  240. en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi);
  241. if (!en)
  242. return NULL;
  243. en->ei = *ei;
  244. INIT_LIST_HEAD(&en->list);
  245. en->et = et;
  246. rb_link_node(&en->rb_node, parent, p);
  247. rb_insert_color_cached(&en->rb_node, &et->root, leftmost);
  248. atomic_inc(&et->node_cnt);
  249. atomic_inc(&eti->total_ext_node);
  250. return en;
  251. }
  252. static void __detach_extent_node(struct f2fs_sb_info *sbi,
  253. struct extent_tree *et, struct extent_node *en)
  254. {
  255. struct extent_tree_info *eti = &sbi->extent_tree[et->type];
  256. rb_erase_cached(&en->rb_node, &et->root);
  257. atomic_dec(&et->node_cnt);
  258. atomic_dec(&eti->total_ext_node);
  259. if (et->cached_en == en)
  260. et->cached_en = NULL;
  261. kmem_cache_free(extent_node_slab, en);
  262. }
  263. /*
  264. * Flow to release an extent_node:
  265. * 1. list_del_init
  266. * 2. __detach_extent_node
  267. * 3. kmem_cache_free.
  268. */
  269. static void __release_extent_node(struct f2fs_sb_info *sbi,
  270. struct extent_tree *et, struct extent_node *en)
  271. {
  272. struct extent_tree_info *eti = &sbi->extent_tree[et->type];
  273. spin_lock(&eti->extent_lock);
  274. f2fs_bug_on(sbi, list_empty(&en->list));
  275. list_del_init(&en->list);
  276. spin_unlock(&eti->extent_lock);
  277. __detach_extent_node(sbi, et, en);
  278. }
  279. static struct extent_tree *__grab_extent_tree(struct inode *inode,
  280. enum extent_type type)
  281. {
  282. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  283. struct extent_tree_info *eti = &sbi->extent_tree[type];
  284. struct extent_tree *et;
  285. nid_t ino = inode->i_ino;
  286. mutex_lock(&eti->extent_tree_lock);
  287. et = radix_tree_lookup(&eti->extent_tree_root, ino);
  288. if (!et) {
  289. et = f2fs_kmem_cache_alloc(extent_tree_slab,
  290. GFP_NOFS, true, NULL);
  291. f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et);
  292. memset(et, 0, sizeof(struct extent_tree));
  293. et->ino = ino;
  294. et->type = type;
  295. et->root = RB_ROOT_CACHED;
  296. et->cached_en = NULL;
  297. rwlock_init(&et->lock);
  298. INIT_LIST_HEAD(&et->list);
  299. atomic_set(&et->node_cnt, 0);
  300. atomic_inc(&eti->total_ext_tree);
  301. } else {
  302. atomic_dec(&eti->total_zombie_tree);
  303. list_del_init(&et->list);
  304. }
  305. mutex_unlock(&eti->extent_tree_lock);
  306. /* never died until evict_inode */
  307. F2FS_I(inode)->extent_tree[type] = et;
  308. return et;
  309. }
  310. static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
  311. struct extent_tree *et)
  312. {
  313. struct rb_node *node, *next;
  314. struct extent_node *en;
  315. unsigned int count = atomic_read(&et->node_cnt);
  316. node = rb_first_cached(&et->root);
  317. while (node) {
  318. next = rb_next(node);
  319. en = rb_entry(node, struct extent_node, rb_node);
  320. __release_extent_node(sbi, et, en);
  321. node = next;
  322. }
  323. return count - atomic_read(&et->node_cnt);
  324. }
  325. static void __drop_largest_extent(struct extent_tree *et,
  326. pgoff_t fofs, unsigned int len)
  327. {
  328. if (fofs < et->largest.fofs + et->largest.len &&
  329. fofs + len > et->largest.fofs) {
  330. et->largest.len = 0;
  331. et->largest_updated = true;
  332. }
  333. }
  334. void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage)
  335. {
  336. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  337. struct extent_tree_info *eti = &sbi->extent_tree[EX_READ];
  338. struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
  339. struct extent_tree *et;
  340. struct extent_node *en;
  341. struct extent_info ei;
  342. if (!__may_extent_tree(inode, EX_READ)) {
  343. /* drop largest read extent */
  344. if (i_ext && i_ext->len) {
  345. f2fs_wait_on_page_writeback(ipage, NODE, true, true);
  346. i_ext->len = 0;
  347. set_page_dirty(ipage);
  348. }
  349. goto out;
  350. }
  351. et = __grab_extent_tree(inode, EX_READ);
  352. if (!i_ext || !i_ext->len)
  353. goto out;
  354. get_read_extent_info(&ei, i_ext);
  355. write_lock(&et->lock);
  356. if (atomic_read(&et->node_cnt))
  357. goto unlock_out;
  358. en = __attach_extent_node(sbi, et, &ei, NULL,
  359. &et->root.rb_root.rb_node, true);
  360. if (en) {
  361. et->largest = en->ei;
  362. et->cached_en = en;
  363. spin_lock(&eti->extent_lock);
  364. list_add_tail(&en->list, &eti->extent_list);
  365. spin_unlock(&eti->extent_lock);
  366. }
  367. unlock_out:
  368. write_unlock(&et->lock);
  369. out:
  370. if (!F2FS_I(inode)->extent_tree[EX_READ])
  371. set_inode_flag(inode, FI_NO_EXTENT);
  372. }
  373. void f2fs_init_age_extent_tree(struct inode *inode)
  374. {
  375. if (!__init_may_extent_tree(inode, EX_BLOCK_AGE))
  376. return;
  377. __grab_extent_tree(inode, EX_BLOCK_AGE);
  378. }
  379. void f2fs_init_extent_tree(struct inode *inode)
  380. {
  381. /* initialize read cache */
  382. if (__init_may_extent_tree(inode, EX_READ))
  383. __grab_extent_tree(inode, EX_READ);
  384. /* initialize block age cache */
  385. if (__init_may_extent_tree(inode, EX_BLOCK_AGE))
  386. __grab_extent_tree(inode, EX_BLOCK_AGE);
  387. }
  388. static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
  389. struct extent_info *ei, enum extent_type type)
  390. {
  391. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  392. struct extent_tree_info *eti = &sbi->extent_tree[type];
  393. struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
  394. struct extent_node *en;
  395. bool ret = false;
  396. if (!et)
  397. return false;
  398. trace_f2fs_lookup_extent_tree_start(inode, pgofs, type);
  399. read_lock(&et->lock);
  400. if (type == EX_READ &&
  401. et->largest.fofs <= pgofs &&
  402. et->largest.fofs + et->largest.len > pgofs) {
  403. *ei = et->largest;
  404. ret = true;
  405. stat_inc_largest_node_hit(sbi);
  406. goto out;
  407. }
  408. en = __lookup_extent_node(&et->root, et->cached_en, pgofs);
  409. if (!en)
  410. goto out;
  411. if (en == et->cached_en)
  412. stat_inc_cached_node_hit(sbi, type);
  413. else
  414. stat_inc_rbtree_node_hit(sbi, type);
  415. *ei = en->ei;
  416. spin_lock(&eti->extent_lock);
  417. if (!list_empty(&en->list)) {
  418. list_move_tail(&en->list, &eti->extent_list);
  419. et->cached_en = en;
  420. }
  421. spin_unlock(&eti->extent_lock);
  422. ret = true;
  423. out:
  424. stat_inc_total_hit(sbi, type);
  425. read_unlock(&et->lock);
  426. if (type == EX_READ)
  427. trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei);
  428. else if (type == EX_BLOCK_AGE)
  429. trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei);
  430. return ret;
  431. }
  432. static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
  433. struct extent_tree *et, struct extent_info *ei,
  434. struct extent_node *prev_ex,
  435. struct extent_node *next_ex)
  436. {
  437. struct extent_tree_info *eti = &sbi->extent_tree[et->type];
  438. struct extent_node *en = NULL;
  439. if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) {
  440. prev_ex->ei.len += ei->len;
  441. ei = &prev_ex->ei;
  442. en = prev_ex;
  443. }
  444. if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) {
  445. next_ex->ei.fofs = ei->fofs;
  446. next_ex->ei.len += ei->len;
  447. if (et->type == EX_READ)
  448. next_ex->ei.blk = ei->blk;
  449. if (en)
  450. __release_extent_node(sbi, et, prev_ex);
  451. en = next_ex;
  452. }
  453. if (!en)
  454. return NULL;
  455. __try_update_largest_extent(et, en);
  456. spin_lock(&eti->extent_lock);
  457. if (!list_empty(&en->list)) {
  458. list_move_tail(&en->list, &eti->extent_list);
  459. et->cached_en = en;
  460. }
  461. spin_unlock(&eti->extent_lock);
  462. return en;
  463. }
  464. static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
  465. struct extent_tree *et, struct extent_info *ei,
  466. struct rb_node **insert_p,
  467. struct rb_node *insert_parent,
  468. bool leftmost)
  469. {
  470. struct extent_tree_info *eti = &sbi->extent_tree[et->type];
  471. struct rb_node **p = &et->root.rb_root.rb_node;
  472. struct rb_node *parent = NULL;
  473. struct extent_node *en = NULL;
  474. if (insert_p && insert_parent) {
  475. parent = insert_parent;
  476. p = insert_p;
  477. goto do_insert;
  478. }
  479. leftmost = true;
  480. /* look up extent_node in the rb tree */
  481. while (*p) {
  482. parent = *p;
  483. en = rb_entry(parent, struct extent_node, rb_node);
  484. if (ei->fofs < en->ei.fofs) {
  485. p = &(*p)->rb_left;
  486. } else if (ei->fofs >= en->ei.fofs + en->ei.len) {
  487. p = &(*p)->rb_right;
  488. leftmost = false;
  489. } else {
  490. f2fs_bug_on(sbi, 1);
  491. }
  492. }
  493. do_insert:
  494. en = __attach_extent_node(sbi, et, ei, parent, p, leftmost);
  495. if (!en)
  496. return NULL;
  497. __try_update_largest_extent(et, en);
  498. /* update in global extent list */
  499. spin_lock(&eti->extent_lock);
  500. list_add_tail(&en->list, &eti->extent_list);
  501. et->cached_en = en;
  502. spin_unlock(&eti->extent_lock);
  503. return en;
  504. }
  505. static void __update_extent_tree_range(struct inode *inode,
  506. struct extent_info *tei, enum extent_type type)
  507. {
  508. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  509. struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
  510. struct extent_node *en = NULL, *en1 = NULL;
  511. struct extent_node *prev_en = NULL, *next_en = NULL;
  512. struct extent_info ei, dei, prev;
  513. struct rb_node **insert_p = NULL, *insert_parent = NULL;
  514. unsigned int fofs = tei->fofs, len = tei->len;
  515. unsigned int end = fofs + len;
  516. bool updated = false;
  517. bool leftmost = false;
  518. if (!et)
  519. return;
  520. if (type == EX_READ)
  521. trace_f2fs_update_read_extent_tree_range(inode, fofs, len,
  522. tei->blk, 0);
  523. else if (type == EX_BLOCK_AGE)
  524. trace_f2fs_update_age_extent_tree_range(inode, fofs, len,
  525. tei->age, tei->last_blocks);
  526. write_lock(&et->lock);
  527. if (type == EX_READ) {
  528. if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
  529. write_unlock(&et->lock);
  530. return;
  531. }
  532. prev = et->largest;
  533. dei.len = 0;
  534. /*
  535. * drop largest extent before lookup, in case it's already
  536. * been shrunk from extent tree
  537. */
  538. __drop_largest_extent(et, fofs, len);
  539. }
  540. /* 1. lookup first extent node in range [fofs, fofs + len - 1] */
  541. en = __lookup_extent_node_ret(&et->root,
  542. et->cached_en, fofs,
  543. &prev_en, &next_en,
  544. &insert_p, &insert_parent,
  545. &leftmost);
  546. if (!en)
  547. en = next_en;
  548. /* 2. invalidate all extent nodes in range [fofs, fofs + len - 1] */
  549. while (en && en->ei.fofs < end) {
  550. unsigned int org_end;
  551. int parts = 0; /* # of parts current extent split into */
  552. next_en = en1 = NULL;
  553. dei = en->ei;
  554. org_end = dei.fofs + dei.len;
  555. f2fs_bug_on(sbi, fofs >= org_end);
  556. if (fofs > dei.fofs && (type != EX_READ ||
  557. fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) {
  558. en->ei.len = fofs - en->ei.fofs;
  559. prev_en = en;
  560. parts = 1;
  561. }
  562. if (end < org_end && (type != EX_READ ||
  563. org_end - end >= F2FS_MIN_EXTENT_LEN)) {
  564. if (parts) {
  565. __set_extent_info(&ei,
  566. end, org_end - end,
  567. end - dei.fofs + dei.blk, false,
  568. dei.age, dei.last_blocks,
  569. type);
  570. en1 = __insert_extent_tree(sbi, et, &ei,
  571. NULL, NULL, true);
  572. next_en = en1;
  573. } else {
  574. __set_extent_info(&en->ei,
  575. end, en->ei.len - (end - dei.fofs),
  576. en->ei.blk + (end - dei.fofs), true,
  577. dei.age, dei.last_blocks,
  578. type);
  579. next_en = en;
  580. }
  581. parts++;
  582. }
  583. if (!next_en) {
  584. struct rb_node *node = rb_next(&en->rb_node);
  585. next_en = rb_entry_safe(node, struct extent_node,
  586. rb_node);
  587. }
  588. if (parts)
  589. __try_update_largest_extent(et, en);
  590. else
  591. __release_extent_node(sbi, et, en);
  592. /*
  593. * if original extent is split into zero or two parts, extent
  594. * tree has been altered by deletion or insertion, therefore
  595. * invalidate pointers regard to tree.
  596. */
  597. if (parts != 1) {
  598. insert_p = NULL;
  599. insert_parent = NULL;
  600. }
  601. en = next_en;
  602. }
  603. if (type == EX_BLOCK_AGE)
  604. goto update_age_extent_cache;
  605. /* 3. update extent in read extent cache */
  606. BUG_ON(type != EX_READ);
  607. if (tei->blk) {
  608. __set_extent_info(&ei, fofs, len, tei->blk, false,
  609. 0, 0, EX_READ);
  610. if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
  611. __insert_extent_tree(sbi, et, &ei,
  612. insert_p, insert_parent, leftmost);
  613. /* give up extent_cache, if split and small updates happen */
  614. if (dei.len >= 1 &&
  615. prev.len < F2FS_MIN_EXTENT_LEN &&
  616. et->largest.len < F2FS_MIN_EXTENT_LEN) {
  617. et->largest.len = 0;
  618. et->largest_updated = true;
  619. set_inode_flag(inode, FI_NO_EXTENT);
  620. }
  621. }
  622. if (is_inode_flag_set(inode, FI_NO_EXTENT))
  623. __free_extent_tree(sbi, et);
  624. if (et->largest_updated) {
  625. et->largest_updated = false;
  626. updated = true;
  627. }
  628. goto out_read_extent_cache;
  629. update_age_extent_cache:
  630. if (!tei->last_blocks)
  631. goto out_read_extent_cache;
  632. __set_extent_info(&ei, fofs, len, 0, false,
  633. tei->age, tei->last_blocks, EX_BLOCK_AGE);
  634. if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
  635. __insert_extent_tree(sbi, et, &ei,
  636. insert_p, insert_parent, leftmost);
  637. out_read_extent_cache:
  638. write_unlock(&et->lock);
  639. if (updated)
  640. f2fs_mark_inode_dirty_sync(inode, true);
  641. }
  642. #ifdef CONFIG_F2FS_FS_COMPRESSION
  643. void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
  644. pgoff_t fofs, block_t blkaddr, unsigned int llen,
  645. unsigned int c_len)
  646. {
  647. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  648. struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
  649. struct extent_node *en = NULL;
  650. struct extent_node *prev_en = NULL, *next_en = NULL;
  651. struct extent_info ei;
  652. struct rb_node **insert_p = NULL, *insert_parent = NULL;
  653. bool leftmost = false;
  654. trace_f2fs_update_read_extent_tree_range(inode, fofs, llen,
  655. blkaddr, c_len);
  656. /* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */
  657. if (is_inode_flag_set(inode, FI_NO_EXTENT))
  658. return;
  659. write_lock(&et->lock);
  660. en = __lookup_extent_node_ret(&et->root,
  661. et->cached_en, fofs,
  662. &prev_en, &next_en,
  663. &insert_p, &insert_parent,
  664. &leftmost);
  665. if (en)
  666. goto unlock_out;
  667. __set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ);
  668. ei.c_len = c_len;
  669. if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
  670. __insert_extent_tree(sbi, et, &ei,
  671. insert_p, insert_parent, leftmost);
  672. unlock_out:
  673. write_unlock(&et->lock);
  674. }
  675. #endif
  676. static unsigned long long __calculate_block_age(struct f2fs_sb_info *sbi,
  677. unsigned long long new,
  678. unsigned long long old)
  679. {
  680. unsigned int rem_old, rem_new;
  681. unsigned long long res;
  682. unsigned int weight = sbi->last_age_weight;
  683. res = div_u64_rem(new, 100, &rem_new) * (100 - weight)
  684. + div_u64_rem(old, 100, &rem_old) * weight;
  685. if (rem_new)
  686. res += rem_new * (100 - weight) / 100;
  687. if (rem_old)
  688. res += rem_old * weight / 100;
  689. return res;
  690. }
  691. /* This returns a new age and allocated blocks in ei */
  692. static int __get_new_block_age(struct inode *inode, struct extent_info *ei,
  693. block_t blkaddr)
  694. {
  695. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  696. loff_t f_size = i_size_read(inode);
  697. unsigned long long cur_blocks =
  698. atomic64_read(&sbi->allocated_data_blocks);
  699. struct extent_info tei = *ei; /* only fofs and len are valid */
  700. /*
  701. * When I/O is not aligned to a PAGE_SIZE, update will happen to the last
  702. * file block even in seq write. So don't record age for newly last file
  703. * block here.
  704. */
  705. if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) &&
  706. blkaddr == NEW_ADDR)
  707. return -EINVAL;
  708. if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) {
  709. unsigned long long cur_age;
  710. if (cur_blocks >= tei.last_blocks)
  711. cur_age = cur_blocks - tei.last_blocks;
  712. else
  713. /* allocated_data_blocks overflow */
  714. cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks;
  715. if (tei.age)
  716. ei->age = __calculate_block_age(sbi, cur_age, tei.age);
  717. else
  718. ei->age = cur_age;
  719. ei->last_blocks = cur_blocks;
  720. WARN_ON(ei->age > cur_blocks);
  721. return 0;
  722. }
  723. f2fs_bug_on(sbi, blkaddr == NULL_ADDR);
  724. /* the data block was allocated for the first time */
  725. if (blkaddr == NEW_ADDR)
  726. goto out;
  727. if (__is_valid_data_blkaddr(blkaddr) &&
  728. !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
  729. f2fs_bug_on(sbi, 1);
  730. return -EINVAL;
  731. }
  732. out:
  733. /*
  734. * init block age with zero, this can happen when the block age extent
  735. * was reclaimed due to memory constraint or system reboot
  736. */
  737. ei->age = 0;
  738. ei->last_blocks = cur_blocks;
  739. return 0;
  740. }
  741. static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type)
  742. {
  743. struct extent_info ei = {};
  744. if (!__may_extent_tree(dn->inode, type))
  745. return;
  746. ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
  747. dn->ofs_in_node;
  748. ei.len = 1;
  749. if (type == EX_READ) {
  750. if (dn->data_blkaddr == NEW_ADDR)
  751. ei.blk = NULL_ADDR;
  752. else
  753. ei.blk = dn->data_blkaddr;
  754. } else if (type == EX_BLOCK_AGE) {
  755. if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr))
  756. return;
  757. }
  758. __update_extent_tree_range(dn->inode, &ei, type);
  759. }
  760. static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink,
  761. enum extent_type type)
  762. {
  763. struct extent_tree_info *eti = &sbi->extent_tree[type];
  764. struct extent_tree *et, *next;
  765. struct extent_node *en;
  766. unsigned int node_cnt = 0, tree_cnt = 0;
  767. int remained;
  768. if (!atomic_read(&eti->total_zombie_tree))
  769. goto free_node;
  770. if (!mutex_trylock(&eti->extent_tree_lock))
  771. goto out;
  772. /* 1. remove unreferenced extent tree */
  773. list_for_each_entry_safe(et, next, &eti->zombie_list, list) {
  774. if (atomic_read(&et->node_cnt)) {
  775. write_lock(&et->lock);
  776. node_cnt += __free_extent_tree(sbi, et);
  777. write_unlock(&et->lock);
  778. }
  779. f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
  780. list_del_init(&et->list);
  781. radix_tree_delete(&eti->extent_tree_root, et->ino);
  782. kmem_cache_free(extent_tree_slab, et);
  783. atomic_dec(&eti->total_ext_tree);
  784. atomic_dec(&eti->total_zombie_tree);
  785. tree_cnt++;
  786. if (node_cnt + tree_cnt >= nr_shrink)
  787. goto unlock_out;
  788. cond_resched();
  789. }
  790. mutex_unlock(&eti->extent_tree_lock);
  791. free_node:
  792. /* 2. remove LRU extent entries */
  793. if (!mutex_trylock(&eti->extent_tree_lock))
  794. goto out;
  795. remained = nr_shrink - (node_cnt + tree_cnt);
  796. spin_lock(&eti->extent_lock);
  797. for (; remained > 0; remained--) {
  798. if (list_empty(&eti->extent_list))
  799. break;
  800. en = list_first_entry(&eti->extent_list,
  801. struct extent_node, list);
  802. et = en->et;
  803. if (!write_trylock(&et->lock)) {
  804. /* refresh this extent node's position in extent list */
  805. list_move_tail(&en->list, &eti->extent_list);
  806. continue;
  807. }
  808. list_del_init(&en->list);
  809. spin_unlock(&eti->extent_lock);
  810. __detach_extent_node(sbi, et, en);
  811. write_unlock(&et->lock);
  812. node_cnt++;
  813. spin_lock(&eti->extent_lock);
  814. }
  815. spin_unlock(&eti->extent_lock);
  816. unlock_out:
  817. mutex_unlock(&eti->extent_tree_lock);
  818. out:
  819. trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type);
  820. return node_cnt + tree_cnt;
  821. }
  822. /* read extent cache operations */
  823. bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs,
  824. struct extent_info *ei)
  825. {
  826. if (!__may_extent_tree(inode, EX_READ))
  827. return false;
  828. return __lookup_extent_tree(inode, pgofs, ei, EX_READ);
  829. }
  830. bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index,
  831. block_t *blkaddr)
  832. {
  833. struct extent_info ei = {};
  834. if (!f2fs_lookup_read_extent_cache(inode, index, &ei))
  835. return false;
  836. *blkaddr = ei.blk + index - ei.fofs;
  837. return true;
  838. }
  839. void f2fs_update_read_extent_cache(struct dnode_of_data *dn)
  840. {
  841. return __update_extent_cache(dn, EX_READ);
  842. }
  843. void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn,
  844. pgoff_t fofs, block_t blkaddr, unsigned int len)
  845. {
  846. struct extent_info ei = {
  847. .fofs = fofs,
  848. .len = len,
  849. .blk = blkaddr,
  850. };
  851. if (!__may_extent_tree(dn->inode, EX_READ))
  852. return;
  853. __update_extent_tree_range(dn->inode, &ei, EX_READ);
  854. }
  855. unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
  856. {
  857. if (!test_opt(sbi, READ_EXTENT_CACHE))
  858. return 0;
  859. return __shrink_extent_tree(sbi, nr_shrink, EX_READ);
  860. }
  861. /* block age extent cache operations */
  862. bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs,
  863. struct extent_info *ei)
  864. {
  865. if (!__may_extent_tree(inode, EX_BLOCK_AGE))
  866. return false;
  867. return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE);
  868. }
  869. void f2fs_update_age_extent_cache(struct dnode_of_data *dn)
  870. {
  871. return __update_extent_cache(dn, EX_BLOCK_AGE);
  872. }
  873. void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn,
  874. pgoff_t fofs, unsigned int len)
  875. {
  876. struct extent_info ei = {
  877. .fofs = fofs,
  878. .len = len,
  879. };
  880. if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE))
  881. return;
  882. __update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE);
  883. }
  884. unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
  885. {
  886. if (!test_opt(sbi, AGE_EXTENT_CACHE))
  887. return 0;
  888. return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE);
  889. }
  890. static unsigned int __destroy_extent_node(struct inode *inode,
  891. enum extent_type type)
  892. {
  893. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  894. struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
  895. unsigned int node_cnt = 0;
  896. if (!et || !atomic_read(&et->node_cnt))
  897. return 0;
  898. write_lock(&et->lock);
  899. node_cnt = __free_extent_tree(sbi, et);
  900. write_unlock(&et->lock);
  901. return node_cnt;
  902. }
  903. void f2fs_destroy_extent_node(struct inode *inode)
  904. {
  905. __destroy_extent_node(inode, EX_READ);
  906. __destroy_extent_node(inode, EX_BLOCK_AGE);
  907. }
  908. static void __drop_extent_tree(struct inode *inode, enum extent_type type)
  909. {
  910. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  911. struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
  912. bool updated = false;
  913. if (!__may_extent_tree(inode, type))
  914. return;
  915. write_lock(&et->lock);
  916. __free_extent_tree(sbi, et);
  917. if (type == EX_READ) {
  918. set_inode_flag(inode, FI_NO_EXTENT);
  919. if (et->largest.len) {
  920. et->largest.len = 0;
  921. updated = true;
  922. }
  923. }
  924. write_unlock(&et->lock);
  925. if (updated)
  926. f2fs_mark_inode_dirty_sync(inode, true);
  927. }
  928. void f2fs_drop_extent_tree(struct inode *inode)
  929. {
  930. __drop_extent_tree(inode, EX_READ);
  931. __drop_extent_tree(inode, EX_BLOCK_AGE);
  932. }
  933. static void __destroy_extent_tree(struct inode *inode, enum extent_type type)
  934. {
  935. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  936. struct extent_tree_info *eti = &sbi->extent_tree[type];
  937. struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
  938. unsigned int node_cnt = 0;
  939. if (!et)
  940. return;
  941. if (inode->i_nlink && !is_bad_inode(inode) &&
  942. atomic_read(&et->node_cnt)) {
  943. mutex_lock(&eti->extent_tree_lock);
  944. list_add_tail(&et->list, &eti->zombie_list);
  945. atomic_inc(&eti->total_zombie_tree);
  946. mutex_unlock(&eti->extent_tree_lock);
  947. return;
  948. }
  949. /* free all extent info belong to this extent tree */
  950. node_cnt = __destroy_extent_node(inode, type);
  951. /* delete extent tree entry in radix tree */
  952. mutex_lock(&eti->extent_tree_lock);
  953. f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
  954. radix_tree_delete(&eti->extent_tree_root, inode->i_ino);
  955. kmem_cache_free(extent_tree_slab, et);
  956. atomic_dec(&eti->total_ext_tree);
  957. mutex_unlock(&eti->extent_tree_lock);
  958. F2FS_I(inode)->extent_tree[type] = NULL;
  959. trace_f2fs_destroy_extent_tree(inode, node_cnt, type);
  960. }
  961. void f2fs_destroy_extent_tree(struct inode *inode)
  962. {
  963. __destroy_extent_tree(inode, EX_READ);
  964. __destroy_extent_tree(inode, EX_BLOCK_AGE);
  965. }
  966. static void __init_extent_tree_info(struct extent_tree_info *eti)
  967. {
  968. INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO);
  969. mutex_init(&eti->extent_tree_lock);
  970. INIT_LIST_HEAD(&eti->extent_list);
  971. spin_lock_init(&eti->extent_lock);
  972. atomic_set(&eti->total_ext_tree, 0);
  973. INIT_LIST_HEAD(&eti->zombie_list);
  974. atomic_set(&eti->total_zombie_tree, 0);
  975. atomic_set(&eti->total_ext_node, 0);
  976. }
  977. void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
  978. {
  979. __init_extent_tree_info(&sbi->extent_tree[EX_READ]);
  980. __init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]);
  981. /* initialize for block age extents */
  982. atomic64_set(&sbi->allocated_data_blocks, 0);
  983. sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
  984. sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
  985. sbi->last_age_weight = LAST_AGE_WEIGHT;
  986. }
  987. int __init f2fs_create_extent_cache(void)
  988. {
  989. extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
  990. sizeof(struct extent_tree));
  991. if (!extent_tree_slab)
  992. return -ENOMEM;
  993. extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
  994. sizeof(struct extent_node));
  995. if (!extent_node_slab) {
  996. kmem_cache_destroy(extent_tree_slab);
  997. return -ENOMEM;
  998. }
  999. return 0;
  1000. }
  1001. void f2fs_destroy_extent_cache(void)
  1002. {
  1003. kmem_cache_destroy(extent_node_slab);
  1004. kmem_cache_destroy(extent_tree_slab);
  1005. }