delayed-ref.c 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2009 Oracle. All rights reserved.
  4. */
  5. #include <linux/sched.h>
  6. #include <linux/slab.h>
  7. #include <linux/sort.h>
  8. #include "ctree.h"
  9. #include "delayed-ref.h"
  10. #include "transaction.h"
  11. #include "qgroup.h"
  12. #include "space-info.h"
  13. #include "tree-mod-log.h"
  14. struct kmem_cache *btrfs_delayed_ref_head_cachep;
  15. struct kmem_cache *btrfs_delayed_tree_ref_cachep;
  16. struct kmem_cache *btrfs_delayed_data_ref_cachep;
  17. struct kmem_cache *btrfs_delayed_extent_op_cachep;
  18. /*
  19. * delayed back reference update tracking. For subvolume trees
  20. * we queue up extent allocations and backref maintenance for
  21. * delayed processing. This avoids deep call chains where we
  22. * add extents in the middle of btrfs_search_slot, and it allows
  23. * us to buffer up frequently modified backrefs in an rb tree instead
  24. * of hammering updates on the extent allocation tree.
  25. */
  26. bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info)
  27. {
  28. struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
  29. struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
  30. bool ret = false;
  31. u64 reserved;
  32. spin_lock(&global_rsv->lock);
  33. reserved = global_rsv->reserved;
  34. spin_unlock(&global_rsv->lock);
  35. /*
  36. * Since the global reserve is just kind of magic we don't really want
  37. * to rely on it to save our bacon, so if our size is more than the
  38. * delayed_refs_rsv and the global rsv then it's time to think about
  39. * bailing.
  40. */
  41. spin_lock(&delayed_refs_rsv->lock);
  42. reserved += delayed_refs_rsv->reserved;
  43. if (delayed_refs_rsv->size >= reserved)
  44. ret = true;
  45. spin_unlock(&delayed_refs_rsv->lock);
  46. return ret;
  47. }
  48. int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans)
  49. {
  50. u64 num_entries =
  51. atomic_read(&trans->transaction->delayed_refs.num_entries);
  52. u64 avg_runtime;
  53. u64 val;
  54. smp_mb();
  55. avg_runtime = trans->fs_info->avg_delayed_ref_runtime;
  56. val = num_entries * avg_runtime;
  57. if (val >= NSEC_PER_SEC)
  58. return 1;
  59. if (val >= NSEC_PER_SEC / 2)
  60. return 2;
  61. return btrfs_check_space_for_delayed_refs(trans->fs_info);
  62. }
  63. /**
  64. * Release a ref head's reservation
  65. *
  66. * @fs_info: the filesystem
  67. * @nr: number of items to drop
  68. *
  69. * This drops the delayed ref head's count from the delayed refs rsv and frees
  70. * any excess reservation we had.
  71. */
  72. void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
  73. {
  74. struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
  75. u64 num_bytes = btrfs_calc_insert_metadata_size(fs_info, nr);
  76. u64 released = 0;
  77. /*
  78. * We have to check the mount option here because we could be enabling
  79. * the free space tree for the first time and don't have the compat_ro
  80. * option set yet.
  81. *
  82. * We need extra reservations if we have the free space tree because
  83. * we'll have to modify that tree as well.
  84. */
  85. if (btrfs_test_opt(fs_info, FREE_SPACE_TREE))
  86. num_bytes *= 2;
  87. released = btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
  88. if (released)
  89. trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
  90. 0, released, 0);
  91. }
  92. /*
  93. * btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv
  94. * @trans - the trans that may have generated delayed refs
  95. *
  96. * This is to be called anytime we may have adjusted trans->delayed_ref_updates,
  97. * it'll calculate the additional size and add it to the delayed_refs_rsv.
  98. */
  99. void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
  100. {
  101. struct btrfs_fs_info *fs_info = trans->fs_info;
  102. struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
  103. u64 num_bytes;
  104. if (!trans->delayed_ref_updates)
  105. return;
  106. num_bytes = btrfs_calc_insert_metadata_size(fs_info,
  107. trans->delayed_ref_updates);
  108. /*
  109. * We have to check the mount option here because we could be enabling
  110. * the free space tree for the first time and don't have the compat_ro
  111. * option set yet.
  112. *
  113. * We need extra reservations if we have the free space tree because
  114. * we'll have to modify that tree as well.
  115. */
  116. if (btrfs_test_opt(fs_info, FREE_SPACE_TREE))
  117. num_bytes *= 2;
  118. spin_lock(&delayed_rsv->lock);
  119. delayed_rsv->size += num_bytes;
  120. delayed_rsv->full = false;
  121. spin_unlock(&delayed_rsv->lock);
  122. trans->delayed_ref_updates = 0;
  123. }
  124. /**
  125. * Transfer bytes to our delayed refs rsv
  126. *
  127. * @fs_info: the filesystem
  128. * @num_bytes: number of bytes to transfer
  129. *
  130. * This transfers up to the num_bytes amount, previously reserved, to the
  131. * delayed_refs_rsv. Any extra bytes are returned to the space info.
  132. */
  133. void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
  134. u64 num_bytes)
  135. {
  136. struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
  137. u64 to_free = 0;
  138. spin_lock(&delayed_refs_rsv->lock);
  139. if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) {
  140. u64 delta = delayed_refs_rsv->size -
  141. delayed_refs_rsv->reserved;
  142. if (num_bytes > delta) {
  143. to_free = num_bytes - delta;
  144. num_bytes = delta;
  145. }
  146. } else {
  147. to_free = num_bytes;
  148. num_bytes = 0;
  149. }
  150. if (num_bytes)
  151. delayed_refs_rsv->reserved += num_bytes;
  152. if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size)
  153. delayed_refs_rsv->full = true;
  154. spin_unlock(&delayed_refs_rsv->lock);
  155. if (num_bytes)
  156. trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
  157. 0, num_bytes, 1);
  158. if (to_free)
  159. btrfs_space_info_free_bytes_may_use(fs_info,
  160. delayed_refs_rsv->space_info, to_free);
  161. }
  162. /**
  163. * Refill based on our delayed refs usage
  164. *
  165. * @fs_info: the filesystem
  166. * @flush: control how we can flush for this reservation.
  167. *
  168. * This will refill the delayed block_rsv up to 1 items size worth of space and
  169. * will return -ENOSPC if we can't make the reservation.
  170. */
  171. int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
  172. enum btrfs_reserve_flush_enum flush)
  173. {
  174. struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
  175. u64 limit = btrfs_calc_insert_metadata_size(fs_info, 1);
  176. u64 num_bytes = 0;
  177. int ret = -ENOSPC;
  178. spin_lock(&block_rsv->lock);
  179. if (block_rsv->reserved < block_rsv->size) {
  180. num_bytes = block_rsv->size - block_rsv->reserved;
  181. num_bytes = min(num_bytes, limit);
  182. }
  183. spin_unlock(&block_rsv->lock);
  184. if (!num_bytes)
  185. return 0;
  186. ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, num_bytes, flush);
  187. if (ret)
  188. return ret;
  189. btrfs_block_rsv_add_bytes(block_rsv, num_bytes, 0);
  190. trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
  191. 0, num_bytes, 1);
  192. return 0;
  193. }
  194. /*
  195. * compare two delayed tree backrefs with same bytenr and type
  196. */
  197. static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1,
  198. struct btrfs_delayed_tree_ref *ref2)
  199. {
  200. if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
  201. if (ref1->root < ref2->root)
  202. return -1;
  203. if (ref1->root > ref2->root)
  204. return 1;
  205. } else {
  206. if (ref1->parent < ref2->parent)
  207. return -1;
  208. if (ref1->parent > ref2->parent)
  209. return 1;
  210. }
  211. return 0;
  212. }
  213. /*
  214. * compare two delayed data backrefs with same bytenr and type
  215. */
  216. static int comp_data_refs(struct btrfs_delayed_data_ref *ref1,
  217. struct btrfs_delayed_data_ref *ref2)
  218. {
  219. if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
  220. if (ref1->root < ref2->root)
  221. return -1;
  222. if (ref1->root > ref2->root)
  223. return 1;
  224. if (ref1->objectid < ref2->objectid)
  225. return -1;
  226. if (ref1->objectid > ref2->objectid)
  227. return 1;
  228. if (ref1->offset < ref2->offset)
  229. return -1;
  230. if (ref1->offset > ref2->offset)
  231. return 1;
  232. } else {
  233. if (ref1->parent < ref2->parent)
  234. return -1;
  235. if (ref1->parent > ref2->parent)
  236. return 1;
  237. }
  238. return 0;
  239. }
  240. static int comp_refs(struct btrfs_delayed_ref_node *ref1,
  241. struct btrfs_delayed_ref_node *ref2,
  242. bool check_seq)
  243. {
  244. int ret = 0;
  245. if (ref1->type < ref2->type)
  246. return -1;
  247. if (ref1->type > ref2->type)
  248. return 1;
  249. if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
  250. ref1->type == BTRFS_SHARED_BLOCK_REF_KEY)
  251. ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1),
  252. btrfs_delayed_node_to_tree_ref(ref2));
  253. else
  254. ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1),
  255. btrfs_delayed_node_to_data_ref(ref2));
  256. if (ret)
  257. return ret;
  258. if (check_seq) {
  259. if (ref1->seq < ref2->seq)
  260. return -1;
  261. if (ref1->seq > ref2->seq)
  262. return 1;
  263. }
  264. return 0;
  265. }
  266. /* insert a new ref to head ref rbtree */
  267. static struct btrfs_delayed_ref_head *htree_insert(struct rb_root_cached *root,
  268. struct rb_node *node)
  269. {
  270. struct rb_node **p = &root->rb_root.rb_node;
  271. struct rb_node *parent_node = NULL;
  272. struct btrfs_delayed_ref_head *entry;
  273. struct btrfs_delayed_ref_head *ins;
  274. u64 bytenr;
  275. bool leftmost = true;
  276. ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
  277. bytenr = ins->bytenr;
  278. while (*p) {
  279. parent_node = *p;
  280. entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
  281. href_node);
  282. if (bytenr < entry->bytenr) {
  283. p = &(*p)->rb_left;
  284. } else if (bytenr > entry->bytenr) {
  285. p = &(*p)->rb_right;
  286. leftmost = false;
  287. } else {
  288. return entry;
  289. }
  290. }
  291. rb_link_node(node, parent_node, p);
  292. rb_insert_color_cached(node, root, leftmost);
  293. return NULL;
  294. }
  295. static struct btrfs_delayed_ref_node* tree_insert(struct rb_root_cached *root,
  296. struct btrfs_delayed_ref_node *ins)
  297. {
  298. struct rb_node **p = &root->rb_root.rb_node;
  299. struct rb_node *node = &ins->ref_node;
  300. struct rb_node *parent_node = NULL;
  301. struct btrfs_delayed_ref_node *entry;
  302. bool leftmost = true;
  303. while (*p) {
  304. int comp;
  305. parent_node = *p;
  306. entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
  307. ref_node);
  308. comp = comp_refs(ins, entry, true);
  309. if (comp < 0) {
  310. p = &(*p)->rb_left;
  311. } else if (comp > 0) {
  312. p = &(*p)->rb_right;
  313. leftmost = false;
  314. } else {
  315. return entry;
  316. }
  317. }
  318. rb_link_node(node, parent_node, p);
  319. rb_insert_color_cached(node, root, leftmost);
  320. return NULL;
  321. }
  322. static struct btrfs_delayed_ref_head *find_first_ref_head(
  323. struct btrfs_delayed_ref_root *dr)
  324. {
  325. struct rb_node *n;
  326. struct btrfs_delayed_ref_head *entry;
  327. n = rb_first_cached(&dr->href_root);
  328. if (!n)
  329. return NULL;
  330. entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
  331. return entry;
  332. }
  333. /*
  334. * Find a head entry based on bytenr. This returns the delayed ref head if it
  335. * was able to find one, or NULL if nothing was in that spot. If return_bigger
  336. * is given, the next bigger entry is returned if no exact match is found.
  337. */
  338. static struct btrfs_delayed_ref_head *find_ref_head(
  339. struct btrfs_delayed_ref_root *dr, u64 bytenr,
  340. bool return_bigger)
  341. {
  342. struct rb_root *root = &dr->href_root.rb_root;
  343. struct rb_node *n;
  344. struct btrfs_delayed_ref_head *entry;
  345. n = root->rb_node;
  346. entry = NULL;
  347. while (n) {
  348. entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
  349. if (bytenr < entry->bytenr)
  350. n = n->rb_left;
  351. else if (bytenr > entry->bytenr)
  352. n = n->rb_right;
  353. else
  354. return entry;
  355. }
  356. if (entry && return_bigger) {
  357. if (bytenr > entry->bytenr) {
  358. n = rb_next(&entry->href_node);
  359. if (!n)
  360. return NULL;
  361. entry = rb_entry(n, struct btrfs_delayed_ref_head,
  362. href_node);
  363. }
  364. return entry;
  365. }
  366. return NULL;
  367. }
  368. int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs,
  369. struct btrfs_delayed_ref_head *head)
  370. {
  371. lockdep_assert_held(&delayed_refs->lock);
  372. if (mutex_trylock(&head->mutex))
  373. return 0;
  374. refcount_inc(&head->refs);
  375. spin_unlock(&delayed_refs->lock);
  376. mutex_lock(&head->mutex);
  377. spin_lock(&delayed_refs->lock);
  378. if (RB_EMPTY_NODE(&head->href_node)) {
  379. mutex_unlock(&head->mutex);
  380. btrfs_put_delayed_ref_head(head);
  381. return -EAGAIN;
  382. }
  383. btrfs_put_delayed_ref_head(head);
  384. return 0;
  385. }
  386. static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
  387. struct btrfs_delayed_ref_root *delayed_refs,
  388. struct btrfs_delayed_ref_head *head,
  389. struct btrfs_delayed_ref_node *ref)
  390. {
  391. lockdep_assert_held(&head->lock);
  392. rb_erase_cached(&ref->ref_node, &head->ref_tree);
  393. RB_CLEAR_NODE(&ref->ref_node);
  394. if (!list_empty(&ref->add_list))
  395. list_del(&ref->add_list);
  396. ref->in_tree = 0;
  397. btrfs_put_delayed_ref(ref);
  398. atomic_dec(&delayed_refs->num_entries);
  399. }
  400. static bool merge_ref(struct btrfs_trans_handle *trans,
  401. struct btrfs_delayed_ref_root *delayed_refs,
  402. struct btrfs_delayed_ref_head *head,
  403. struct btrfs_delayed_ref_node *ref,
  404. u64 seq)
  405. {
  406. struct btrfs_delayed_ref_node *next;
  407. struct rb_node *node = rb_next(&ref->ref_node);
  408. bool done = false;
  409. while (!done && node) {
  410. int mod;
  411. next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
  412. node = rb_next(node);
  413. if (seq && next->seq >= seq)
  414. break;
  415. if (comp_refs(ref, next, false))
  416. break;
  417. if (ref->action == next->action) {
  418. mod = next->ref_mod;
  419. } else {
  420. if (ref->ref_mod < next->ref_mod) {
  421. swap(ref, next);
  422. done = true;
  423. }
  424. mod = -next->ref_mod;
  425. }
  426. drop_delayed_ref(trans, delayed_refs, head, next);
  427. ref->ref_mod += mod;
  428. if (ref->ref_mod == 0) {
  429. drop_delayed_ref(trans, delayed_refs, head, ref);
  430. done = true;
  431. } else {
  432. /*
  433. * Can't have multiples of the same ref on a tree block.
  434. */
  435. WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
  436. ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
  437. }
  438. }
  439. return done;
  440. }
  441. void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
  442. struct btrfs_delayed_ref_root *delayed_refs,
  443. struct btrfs_delayed_ref_head *head)
  444. {
  445. struct btrfs_fs_info *fs_info = trans->fs_info;
  446. struct btrfs_delayed_ref_node *ref;
  447. struct rb_node *node;
  448. u64 seq = 0;
  449. lockdep_assert_held(&head->lock);
  450. if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
  451. return;
  452. /* We don't have too many refs to merge for data. */
  453. if (head->is_data)
  454. return;
  455. seq = btrfs_tree_mod_log_lowest_seq(fs_info);
  456. again:
  457. for (node = rb_first_cached(&head->ref_tree); node;
  458. node = rb_next(node)) {
  459. ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
  460. if (seq && ref->seq >= seq)
  461. continue;
  462. if (merge_ref(trans, delayed_refs, head, ref, seq))
  463. goto again;
  464. }
  465. }
  466. int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq)
  467. {
  468. int ret = 0;
  469. u64 min_seq = btrfs_tree_mod_log_lowest_seq(fs_info);
  470. if (min_seq != 0 && seq >= min_seq) {
  471. btrfs_debug(fs_info,
  472. "holding back delayed_ref %llu, lowest is %llu",
  473. seq, min_seq);
  474. ret = 1;
  475. }
  476. return ret;
  477. }
  478. struct btrfs_delayed_ref_head *btrfs_select_ref_head(
  479. struct btrfs_delayed_ref_root *delayed_refs)
  480. {
  481. struct btrfs_delayed_ref_head *head;
  482. again:
  483. head = find_ref_head(delayed_refs, delayed_refs->run_delayed_start,
  484. true);
  485. if (!head && delayed_refs->run_delayed_start != 0) {
  486. delayed_refs->run_delayed_start = 0;
  487. head = find_first_ref_head(delayed_refs);
  488. }
  489. if (!head)
  490. return NULL;
  491. while (head->processing) {
  492. struct rb_node *node;
  493. node = rb_next(&head->href_node);
  494. if (!node) {
  495. if (delayed_refs->run_delayed_start == 0)
  496. return NULL;
  497. delayed_refs->run_delayed_start = 0;
  498. goto again;
  499. }
  500. head = rb_entry(node, struct btrfs_delayed_ref_head,
  501. href_node);
  502. }
  503. head->processing = 1;
  504. WARN_ON(delayed_refs->num_heads_ready == 0);
  505. delayed_refs->num_heads_ready--;
  506. delayed_refs->run_delayed_start = head->bytenr +
  507. head->num_bytes;
  508. return head;
  509. }
  510. void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
  511. struct btrfs_delayed_ref_head *head)
  512. {
  513. lockdep_assert_held(&delayed_refs->lock);
  514. lockdep_assert_held(&head->lock);
  515. rb_erase_cached(&head->href_node, &delayed_refs->href_root);
  516. RB_CLEAR_NODE(&head->href_node);
  517. atomic_dec(&delayed_refs->num_entries);
  518. delayed_refs->num_heads--;
  519. if (head->processing == 0)
  520. delayed_refs->num_heads_ready--;
  521. }
  522. /*
  523. * Helper to insert the ref_node to the tail or merge with tail.
  524. *
  525. * Return 0 for insert.
  526. * Return >0 for merge.
  527. */
  528. static int insert_delayed_ref(struct btrfs_trans_handle *trans,
  529. struct btrfs_delayed_ref_root *root,
  530. struct btrfs_delayed_ref_head *href,
  531. struct btrfs_delayed_ref_node *ref)
  532. {
  533. struct btrfs_delayed_ref_node *exist;
  534. int mod;
  535. int ret = 0;
  536. spin_lock(&href->lock);
  537. exist = tree_insert(&href->ref_tree, ref);
  538. if (!exist)
  539. goto inserted;
  540. /* Now we are sure we can merge */
  541. ret = 1;
  542. if (exist->action == ref->action) {
  543. mod = ref->ref_mod;
  544. } else {
  545. /* Need to change action */
  546. if (exist->ref_mod < ref->ref_mod) {
  547. exist->action = ref->action;
  548. mod = -exist->ref_mod;
  549. exist->ref_mod = ref->ref_mod;
  550. if (ref->action == BTRFS_ADD_DELAYED_REF)
  551. list_add_tail(&exist->add_list,
  552. &href->ref_add_list);
  553. else if (ref->action == BTRFS_DROP_DELAYED_REF) {
  554. ASSERT(!list_empty(&exist->add_list));
  555. list_del(&exist->add_list);
  556. } else {
  557. ASSERT(0);
  558. }
  559. } else
  560. mod = -ref->ref_mod;
  561. }
  562. exist->ref_mod += mod;
  563. /* remove existing tail if its ref_mod is zero */
  564. if (exist->ref_mod == 0)
  565. drop_delayed_ref(trans, root, href, exist);
  566. spin_unlock(&href->lock);
  567. return ret;
  568. inserted:
  569. if (ref->action == BTRFS_ADD_DELAYED_REF)
  570. list_add_tail(&ref->add_list, &href->ref_add_list);
  571. atomic_inc(&root->num_entries);
  572. spin_unlock(&href->lock);
  573. return ret;
  574. }
  575. /*
  576. * helper function to update the accounting in the head ref
  577. * existing and update must have the same bytenr
  578. */
  579. static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans,
  580. struct btrfs_delayed_ref_head *existing,
  581. struct btrfs_delayed_ref_head *update)
  582. {
  583. struct btrfs_delayed_ref_root *delayed_refs =
  584. &trans->transaction->delayed_refs;
  585. struct btrfs_fs_info *fs_info = trans->fs_info;
  586. int old_ref_mod;
  587. BUG_ON(existing->is_data != update->is_data);
  588. spin_lock(&existing->lock);
  589. if (update->must_insert_reserved) {
  590. /* if the extent was freed and then
  591. * reallocated before the delayed ref
  592. * entries were processed, we can end up
  593. * with an existing head ref without
  594. * the must_insert_reserved flag set.
  595. * Set it again here
  596. */
  597. existing->must_insert_reserved = update->must_insert_reserved;
  598. /*
  599. * update the num_bytes so we make sure the accounting
  600. * is done correctly
  601. */
  602. existing->num_bytes = update->num_bytes;
  603. }
  604. if (update->extent_op) {
  605. if (!existing->extent_op) {
  606. existing->extent_op = update->extent_op;
  607. } else {
  608. if (update->extent_op->update_key) {
  609. memcpy(&existing->extent_op->key,
  610. &update->extent_op->key,
  611. sizeof(update->extent_op->key));
  612. existing->extent_op->update_key = true;
  613. }
  614. if (update->extent_op->update_flags) {
  615. existing->extent_op->flags_to_set |=
  616. update->extent_op->flags_to_set;
  617. existing->extent_op->update_flags = true;
  618. }
  619. btrfs_free_delayed_extent_op(update->extent_op);
  620. }
  621. }
  622. /*
  623. * update the reference mod on the head to reflect this new operation,
  624. * only need the lock for this case cause we could be processing it
  625. * currently, for refs we just added we know we're a-ok.
  626. */
  627. old_ref_mod = existing->total_ref_mod;
  628. existing->ref_mod += update->ref_mod;
  629. existing->total_ref_mod += update->ref_mod;
  630. /*
  631. * If we are going to from a positive ref mod to a negative or vice
  632. * versa we need to make sure to adjust pending_csums accordingly.
  633. */
  634. if (existing->is_data) {
  635. u64 csum_leaves =
  636. btrfs_csum_bytes_to_leaves(fs_info,
  637. existing->num_bytes);
  638. if (existing->total_ref_mod >= 0 && old_ref_mod < 0) {
  639. delayed_refs->pending_csums -= existing->num_bytes;
  640. btrfs_delayed_refs_rsv_release(fs_info, csum_leaves);
  641. }
  642. if (existing->total_ref_mod < 0 && old_ref_mod >= 0) {
  643. delayed_refs->pending_csums += existing->num_bytes;
  644. trans->delayed_ref_updates += csum_leaves;
  645. }
  646. }
  647. spin_unlock(&existing->lock);
  648. }
  649. static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
  650. struct btrfs_qgroup_extent_record *qrecord,
  651. u64 bytenr, u64 num_bytes, u64 ref_root,
  652. u64 reserved, int action, bool is_data,
  653. bool is_system)
  654. {
  655. int count_mod = 1;
  656. int must_insert_reserved = 0;
  657. /* If reserved is provided, it must be a data extent. */
  658. BUG_ON(!is_data && reserved);
  659. /*
  660. * The head node stores the sum of all the mods, so dropping a ref
  661. * should drop the sum in the head node by one.
  662. */
  663. if (action == BTRFS_UPDATE_DELAYED_HEAD)
  664. count_mod = 0;
  665. else if (action == BTRFS_DROP_DELAYED_REF)
  666. count_mod = -1;
  667. /*
  668. * BTRFS_ADD_DELAYED_EXTENT means that we need to update the reserved
  669. * accounting when the extent is finally added, or if a later
  670. * modification deletes the delayed ref without ever inserting the
  671. * extent into the extent allocation tree. ref->must_insert_reserved
  672. * is the flag used to record that accounting mods are required.
  673. *
  674. * Once we record must_insert_reserved, switch the action to
  675. * BTRFS_ADD_DELAYED_REF because other special casing is not required.
  676. */
  677. if (action == BTRFS_ADD_DELAYED_EXTENT)
  678. must_insert_reserved = 1;
  679. else
  680. must_insert_reserved = 0;
  681. refcount_set(&head_ref->refs, 1);
  682. head_ref->bytenr = bytenr;
  683. head_ref->num_bytes = num_bytes;
  684. head_ref->ref_mod = count_mod;
  685. head_ref->must_insert_reserved = must_insert_reserved;
  686. head_ref->is_data = is_data;
  687. head_ref->is_system = is_system;
  688. head_ref->ref_tree = RB_ROOT_CACHED;
  689. INIT_LIST_HEAD(&head_ref->ref_add_list);
  690. RB_CLEAR_NODE(&head_ref->href_node);
  691. head_ref->processing = 0;
  692. head_ref->total_ref_mod = count_mod;
  693. spin_lock_init(&head_ref->lock);
  694. mutex_init(&head_ref->mutex);
  695. if (qrecord) {
  696. if (ref_root && reserved) {
  697. qrecord->data_rsv = reserved;
  698. qrecord->data_rsv_refroot = ref_root;
  699. }
  700. qrecord->bytenr = bytenr;
  701. qrecord->num_bytes = num_bytes;
  702. qrecord->old_roots = NULL;
  703. }
  704. }
  705. /*
  706. * helper function to actually insert a head node into the rbtree.
  707. * this does all the dirty work in terms of maintaining the correct
  708. * overall modification count.
  709. */
  710. static noinline struct btrfs_delayed_ref_head *
  711. add_delayed_ref_head(struct btrfs_trans_handle *trans,
  712. struct btrfs_delayed_ref_head *head_ref,
  713. struct btrfs_qgroup_extent_record *qrecord,
  714. int action, int *qrecord_inserted_ret)
  715. {
  716. struct btrfs_delayed_ref_head *existing;
  717. struct btrfs_delayed_ref_root *delayed_refs;
  718. int qrecord_inserted = 0;
  719. delayed_refs = &trans->transaction->delayed_refs;
  720. /* Record qgroup extent info if provided */
  721. if (qrecord) {
  722. if (btrfs_qgroup_trace_extent_nolock(trans->fs_info,
  723. delayed_refs, qrecord))
  724. kfree(qrecord);
  725. else
  726. qrecord_inserted = 1;
  727. }
  728. trace_add_delayed_ref_head(trans->fs_info, head_ref, action);
  729. existing = htree_insert(&delayed_refs->href_root,
  730. &head_ref->href_node);
  731. if (existing) {
  732. update_existing_head_ref(trans, existing, head_ref);
  733. /*
  734. * we've updated the existing ref, free the newly
  735. * allocated ref
  736. */
  737. kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
  738. head_ref = existing;
  739. } else {
  740. if (head_ref->is_data && head_ref->ref_mod < 0) {
  741. delayed_refs->pending_csums += head_ref->num_bytes;
  742. trans->delayed_ref_updates +=
  743. btrfs_csum_bytes_to_leaves(trans->fs_info,
  744. head_ref->num_bytes);
  745. }
  746. delayed_refs->num_heads++;
  747. delayed_refs->num_heads_ready++;
  748. atomic_inc(&delayed_refs->num_entries);
  749. trans->delayed_ref_updates++;
  750. }
  751. if (qrecord_inserted_ret)
  752. *qrecord_inserted_ret = qrecord_inserted;
  753. return head_ref;
  754. }
  755. /*
  756. * init_delayed_ref_common - Initialize the structure which represents a
  757. * modification to a an extent.
  758. *
  759. * @fs_info: Internal to the mounted filesystem mount structure.
  760. *
  761. * @ref: The structure which is going to be initialized.
  762. *
  763. * @bytenr: The logical address of the extent for which a modification is
  764. * going to be recorded.
  765. *
  766. * @num_bytes: Size of the extent whose modification is being recorded.
  767. *
  768. * @ref_root: The id of the root where this modification has originated, this
  769. * can be either one of the well-known metadata trees or the
  770. * subvolume id which references this extent.
  771. *
  772. * @action: Can be one of BTRFS_ADD_DELAYED_REF/BTRFS_DROP_DELAYED_REF or
  773. * BTRFS_ADD_DELAYED_EXTENT
  774. *
  775. * @ref_type: Holds the type of the extent which is being recorded, can be
  776. * one of BTRFS_SHARED_BLOCK_REF_KEY/BTRFS_TREE_BLOCK_REF_KEY
  777. * when recording a metadata extent or BTRFS_SHARED_DATA_REF_KEY/
  778. * BTRFS_EXTENT_DATA_REF_KEY when recording data extent
  779. */
  780. static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
  781. struct btrfs_delayed_ref_node *ref,
  782. u64 bytenr, u64 num_bytes, u64 ref_root,
  783. int action, u8 ref_type)
  784. {
  785. u64 seq = 0;
  786. if (action == BTRFS_ADD_DELAYED_EXTENT)
  787. action = BTRFS_ADD_DELAYED_REF;
  788. if (is_fstree(ref_root))
  789. seq = atomic64_read(&fs_info->tree_mod_seq);
  790. refcount_set(&ref->refs, 1);
  791. ref->bytenr = bytenr;
  792. ref->num_bytes = num_bytes;
  793. ref->ref_mod = 1;
  794. ref->action = action;
  795. ref->is_head = 0;
  796. ref->in_tree = 1;
  797. ref->seq = seq;
  798. ref->type = ref_type;
  799. RB_CLEAR_NODE(&ref->ref_node);
  800. INIT_LIST_HEAD(&ref->add_list);
  801. }
  802. /*
  803. * add a delayed tree ref. This does all of the accounting required
  804. * to make sure the delayed ref is eventually processed before this
  805. * transaction commits.
  806. */
  807. int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
  808. struct btrfs_ref *generic_ref,
  809. struct btrfs_delayed_extent_op *extent_op)
  810. {
  811. struct btrfs_fs_info *fs_info = trans->fs_info;
  812. struct btrfs_delayed_tree_ref *ref;
  813. struct btrfs_delayed_ref_head *head_ref;
  814. struct btrfs_delayed_ref_root *delayed_refs;
  815. struct btrfs_qgroup_extent_record *record = NULL;
  816. int qrecord_inserted;
  817. bool is_system;
  818. int action = generic_ref->action;
  819. int level = generic_ref->tree_ref.level;
  820. int ret;
  821. u64 bytenr = generic_ref->bytenr;
  822. u64 num_bytes = generic_ref->len;
  823. u64 parent = generic_ref->parent;
  824. u8 ref_type;
  825. is_system = (generic_ref->tree_ref.owning_root == BTRFS_CHUNK_TREE_OBJECTID);
  826. ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action);
  827. ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
  828. if (!ref)
  829. return -ENOMEM;
  830. head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
  831. if (!head_ref) {
  832. kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
  833. return -ENOMEM;
  834. }
  835. if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
  836. !generic_ref->skip_qgroup) {
  837. record = kzalloc(sizeof(*record), GFP_NOFS);
  838. if (!record) {
  839. kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
  840. kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
  841. return -ENOMEM;
  842. }
  843. }
  844. if (parent)
  845. ref_type = BTRFS_SHARED_BLOCK_REF_KEY;
  846. else
  847. ref_type = BTRFS_TREE_BLOCK_REF_KEY;
  848. init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
  849. generic_ref->tree_ref.owning_root, action,
  850. ref_type);
  851. ref->root = generic_ref->tree_ref.owning_root;
  852. ref->parent = parent;
  853. ref->level = level;
  854. init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
  855. generic_ref->tree_ref.owning_root, 0, action,
  856. false, is_system);
  857. head_ref->extent_op = extent_op;
  858. delayed_refs = &trans->transaction->delayed_refs;
  859. spin_lock(&delayed_refs->lock);
  860. /*
  861. * insert both the head node and the new ref without dropping
  862. * the spin lock
  863. */
  864. head_ref = add_delayed_ref_head(trans, head_ref, record,
  865. action, &qrecord_inserted);
  866. ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
  867. spin_unlock(&delayed_refs->lock);
  868. /*
  869. * Need to update the delayed_refs_rsv with any changes we may have
  870. * made.
  871. */
  872. btrfs_update_delayed_refs_rsv(trans);
  873. trace_add_delayed_tree_ref(fs_info, &ref->node, ref,
  874. action == BTRFS_ADD_DELAYED_EXTENT ?
  875. BTRFS_ADD_DELAYED_REF : action);
  876. if (ret > 0)
  877. kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
  878. if (qrecord_inserted)
  879. btrfs_qgroup_trace_extent_post(trans, record);
  880. return 0;
  881. }
  882. /*
  883. * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
  884. */
  885. int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
  886. struct btrfs_ref *generic_ref,
  887. u64 reserved)
  888. {
  889. struct btrfs_fs_info *fs_info = trans->fs_info;
  890. struct btrfs_delayed_data_ref *ref;
  891. struct btrfs_delayed_ref_head *head_ref;
  892. struct btrfs_delayed_ref_root *delayed_refs;
  893. struct btrfs_qgroup_extent_record *record = NULL;
  894. int qrecord_inserted;
  895. int action = generic_ref->action;
  896. int ret;
  897. u64 bytenr = generic_ref->bytenr;
  898. u64 num_bytes = generic_ref->len;
  899. u64 parent = generic_ref->parent;
  900. u64 ref_root = generic_ref->data_ref.owning_root;
  901. u64 owner = generic_ref->data_ref.ino;
  902. u64 offset = generic_ref->data_ref.offset;
  903. u8 ref_type;
  904. ASSERT(generic_ref->type == BTRFS_REF_DATA && action);
  905. ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
  906. if (!ref)
  907. return -ENOMEM;
  908. if (parent)
  909. ref_type = BTRFS_SHARED_DATA_REF_KEY;
  910. else
  911. ref_type = BTRFS_EXTENT_DATA_REF_KEY;
  912. init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
  913. ref_root, action, ref_type);
  914. ref->root = ref_root;
  915. ref->parent = parent;
  916. ref->objectid = owner;
  917. ref->offset = offset;
  918. head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
  919. if (!head_ref) {
  920. kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
  921. return -ENOMEM;
  922. }
  923. if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
  924. !generic_ref->skip_qgroup) {
  925. record = kzalloc(sizeof(*record), GFP_NOFS);
  926. if (!record) {
  927. kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
  928. kmem_cache_free(btrfs_delayed_ref_head_cachep,
  929. head_ref);
  930. return -ENOMEM;
  931. }
  932. }
  933. init_delayed_ref_head(head_ref, record, bytenr, num_bytes, ref_root,
  934. reserved, action, true, false);
  935. head_ref->extent_op = NULL;
  936. delayed_refs = &trans->transaction->delayed_refs;
  937. spin_lock(&delayed_refs->lock);
  938. /*
  939. * insert both the head node and the new ref without dropping
  940. * the spin lock
  941. */
  942. head_ref = add_delayed_ref_head(trans, head_ref, record,
  943. action, &qrecord_inserted);
  944. ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
  945. spin_unlock(&delayed_refs->lock);
  946. /*
  947. * Need to update the delayed_refs_rsv with any changes we may have
  948. * made.
  949. */
  950. btrfs_update_delayed_refs_rsv(trans);
  951. trace_add_delayed_data_ref(trans->fs_info, &ref->node, ref,
  952. action == BTRFS_ADD_DELAYED_EXTENT ?
  953. BTRFS_ADD_DELAYED_REF : action);
  954. if (ret > 0)
  955. kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
  956. if (qrecord_inserted)
  957. return btrfs_qgroup_trace_extent_post(trans, record);
  958. return 0;
  959. }
  960. int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
  961. u64 bytenr, u64 num_bytes,
  962. struct btrfs_delayed_extent_op *extent_op)
  963. {
  964. struct btrfs_delayed_ref_head *head_ref;
  965. struct btrfs_delayed_ref_root *delayed_refs;
  966. head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
  967. if (!head_ref)
  968. return -ENOMEM;
  969. init_delayed_ref_head(head_ref, NULL, bytenr, num_bytes, 0, 0,
  970. BTRFS_UPDATE_DELAYED_HEAD, false, false);
  971. head_ref->extent_op = extent_op;
  972. delayed_refs = &trans->transaction->delayed_refs;
  973. spin_lock(&delayed_refs->lock);
  974. add_delayed_ref_head(trans, head_ref, NULL, BTRFS_UPDATE_DELAYED_HEAD,
  975. NULL);
  976. spin_unlock(&delayed_refs->lock);
  977. /*
  978. * Need to update the delayed_refs_rsv with any changes we may have
  979. * made.
  980. */
  981. btrfs_update_delayed_refs_rsv(trans);
  982. return 0;
  983. }
  984. /*
  985. * This does a simple search for the head node for a given extent. Returns the
  986. * head node if found, or NULL if not.
  987. */
  988. struct btrfs_delayed_ref_head *
  989. btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
  990. {
  991. lockdep_assert_held(&delayed_refs->lock);
  992. return find_ref_head(delayed_refs, bytenr, false);
  993. }
  994. void __cold btrfs_delayed_ref_exit(void)
  995. {
  996. kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
  997. kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
  998. kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
  999. kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
  1000. }
  1001. int __init btrfs_delayed_ref_init(void)
  1002. {
  1003. btrfs_delayed_ref_head_cachep = kmem_cache_create(
  1004. "btrfs_delayed_ref_head",
  1005. sizeof(struct btrfs_delayed_ref_head), 0,
  1006. SLAB_MEM_SPREAD, NULL);
  1007. if (!btrfs_delayed_ref_head_cachep)
  1008. goto fail;
  1009. btrfs_delayed_tree_ref_cachep = kmem_cache_create(
  1010. "btrfs_delayed_tree_ref",
  1011. sizeof(struct btrfs_delayed_tree_ref), 0,
  1012. SLAB_MEM_SPREAD, NULL);
  1013. if (!btrfs_delayed_tree_ref_cachep)
  1014. goto fail;
  1015. btrfs_delayed_data_ref_cachep = kmem_cache_create(
  1016. "btrfs_delayed_data_ref",
  1017. sizeof(struct btrfs_delayed_data_ref), 0,
  1018. SLAB_MEM_SPREAD, NULL);
  1019. if (!btrfs_delayed_data_ref_cachep)
  1020. goto fail;
  1021. btrfs_delayed_extent_op_cachep = kmem_cache_create(
  1022. "btrfs_delayed_extent_op",
  1023. sizeof(struct btrfs_delayed_extent_op), 0,
  1024. SLAB_MEM_SPREAD, NULL);
  1025. if (!btrfs_delayed_extent_op_cachep)
  1026. goto fail;
  1027. return 0;
  1028. fail:
  1029. btrfs_delayed_ref_exit();
  1030. return -ENOMEM;
  1031. }