extent-io-tree.c 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/slab.h>
  3. #include <trace/events/btrfs.h>
  4. #include "ctree.h"
  5. #include "extent-io-tree.h"
  6. #include "btrfs_inode.h"
  7. #include "misc.h"
  8. static struct kmem_cache *extent_state_cache;
  9. static inline bool extent_state_in_tree(const struct extent_state *state)
  10. {
  11. return !RB_EMPTY_NODE(&state->rb_node);
  12. }
  13. #ifdef CONFIG_BTRFS_DEBUG
  14. static LIST_HEAD(states);
  15. static DEFINE_SPINLOCK(leak_lock);
  16. static inline void btrfs_leak_debug_add_state(struct extent_state *state)
  17. {
  18. unsigned long flags;
  19. spin_lock_irqsave(&leak_lock, flags);
  20. list_add(&state->leak_list, &states);
  21. spin_unlock_irqrestore(&leak_lock, flags);
  22. }
  23. static inline void btrfs_leak_debug_del_state(struct extent_state *state)
  24. {
  25. unsigned long flags;
  26. spin_lock_irqsave(&leak_lock, flags);
  27. list_del(&state->leak_list);
  28. spin_unlock_irqrestore(&leak_lock, flags);
  29. }
  30. static inline void btrfs_extent_state_leak_debug_check(void)
  31. {
  32. struct extent_state *state;
  33. while (!list_empty(&states)) {
  34. state = list_entry(states.next, struct extent_state, leak_list);
  35. pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
  36. state->start, state->end, state->state,
  37. extent_state_in_tree(state),
  38. refcount_read(&state->refs));
  39. list_del(&state->leak_list);
  40. kmem_cache_free(extent_state_cache, state);
  41. }
  42. }
  43. #define btrfs_debug_check_extent_io_range(tree, start, end) \
  44. __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
  45. static inline void __btrfs_debug_check_extent_io_range(const char *caller,
  46. struct extent_io_tree *tree,
  47. u64 start, u64 end)
  48. {
  49. struct inode *inode = tree->private_data;
  50. u64 isize;
  51. if (!inode)
  52. return;
  53. isize = i_size_read(inode);
  54. if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
  55. btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
  56. "%s: ino %llu isize %llu odd range [%llu,%llu]",
  57. caller, btrfs_ino(BTRFS_I(inode)), isize, start, end);
  58. }
  59. }
  60. #else
  61. #define btrfs_leak_debug_add_state(state) do {} while (0)
  62. #define btrfs_leak_debug_del_state(state) do {} while (0)
  63. #define btrfs_extent_state_leak_debug_check() do {} while (0)
  64. #define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
  65. #endif
  66. /*
  67. * For the file_extent_tree, we want to hold the inode lock when we lookup and
  68. * update the disk_i_size, but lockdep will complain because our io_tree we hold
  69. * the tree lock and get the inode lock when setting delalloc. These two things
  70. * are unrelated, so make a class for the file_extent_tree so we don't get the
  71. * two locking patterns mixed up.
  72. */
  73. static struct lock_class_key file_extent_tree_class;
  74. struct tree_entry {
  75. u64 start;
  76. u64 end;
  77. struct rb_node rb_node;
  78. };
  79. void extent_io_tree_init(struct btrfs_fs_info *fs_info,
  80. struct extent_io_tree *tree, unsigned int owner,
  81. void *private_data)
  82. {
  83. tree->fs_info = fs_info;
  84. tree->state = RB_ROOT;
  85. spin_lock_init(&tree->lock);
  86. tree->private_data = private_data;
  87. tree->owner = owner;
  88. if (owner == IO_TREE_INODE_FILE_EXTENT)
  89. lockdep_set_class(&tree->lock, &file_extent_tree_class);
  90. }
  91. void extent_io_tree_release(struct extent_io_tree *tree)
  92. {
  93. spin_lock(&tree->lock);
  94. /*
  95. * Do a single barrier for the waitqueue_active check here, the state
  96. * of the waitqueue should not change once extent_io_tree_release is
  97. * called.
  98. */
  99. smp_mb();
  100. while (!RB_EMPTY_ROOT(&tree->state)) {
  101. struct rb_node *node;
  102. struct extent_state *state;
  103. node = rb_first(&tree->state);
  104. state = rb_entry(node, struct extent_state, rb_node);
  105. rb_erase(&state->rb_node, &tree->state);
  106. RB_CLEAR_NODE(&state->rb_node);
  107. /*
  108. * btree io trees aren't supposed to have tasks waiting for
  109. * changes in the flags of extent states ever.
  110. */
  111. ASSERT(!waitqueue_active(&state->wq));
  112. free_extent_state(state);
  113. cond_resched_lock(&tree->lock);
  114. }
  115. spin_unlock(&tree->lock);
  116. }
  117. static struct extent_state *alloc_extent_state(gfp_t mask)
  118. {
  119. struct extent_state *state;
  120. /*
  121. * The given mask might be not appropriate for the slab allocator,
  122. * drop the unsupported bits
  123. */
  124. mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
  125. state = kmem_cache_alloc(extent_state_cache, mask);
  126. if (!state)
  127. return state;
  128. state->state = 0;
  129. RB_CLEAR_NODE(&state->rb_node);
  130. btrfs_leak_debug_add_state(state);
  131. refcount_set(&state->refs, 1);
  132. init_waitqueue_head(&state->wq);
  133. trace_alloc_extent_state(state, mask, _RET_IP_);
  134. return state;
  135. }
  136. static struct extent_state *alloc_extent_state_atomic(struct extent_state *prealloc)
  137. {
  138. if (!prealloc)
  139. prealloc = alloc_extent_state(GFP_ATOMIC);
  140. return prealloc;
  141. }
  142. void free_extent_state(struct extent_state *state)
  143. {
  144. if (!state)
  145. return;
  146. if (refcount_dec_and_test(&state->refs)) {
  147. WARN_ON(extent_state_in_tree(state));
  148. btrfs_leak_debug_del_state(state);
  149. trace_free_extent_state(state, _RET_IP_);
  150. kmem_cache_free(extent_state_cache, state);
  151. }
  152. }
  153. static int add_extent_changeset(struct extent_state *state, u32 bits,
  154. struct extent_changeset *changeset,
  155. int set)
  156. {
  157. int ret;
  158. if (!changeset)
  159. return 0;
  160. if (set && (state->state & bits) == bits)
  161. return 0;
  162. if (!set && (state->state & bits) == 0)
  163. return 0;
  164. changeset->bytes_changed += state->end - state->start + 1;
  165. ret = ulist_add(&changeset->range_changed, state->start, state->end,
  166. GFP_ATOMIC);
  167. return ret;
  168. }
  169. static inline struct extent_state *next_state(struct extent_state *state)
  170. {
  171. struct rb_node *next = rb_next(&state->rb_node);
  172. if (next)
  173. return rb_entry(next, struct extent_state, rb_node);
  174. else
  175. return NULL;
  176. }
  177. static inline struct extent_state *prev_state(struct extent_state *state)
  178. {
  179. struct rb_node *next = rb_prev(&state->rb_node);
  180. if (next)
  181. return rb_entry(next, struct extent_state, rb_node);
  182. else
  183. return NULL;
  184. }
  185. /*
  186. * Search @tree for an entry that contains @offset. Such entry would have
  187. * entry->start <= offset && entry->end >= offset.
  188. *
  189. * @tree: the tree to search
  190. * @offset: offset that should fall within an entry in @tree
  191. * @node_ret: pointer where new node should be anchored (used when inserting an
  192. * entry in the tree)
  193. * @parent_ret: points to entry which would have been the parent of the entry,
  194. * containing @offset
  195. *
  196. * Return a pointer to the entry that contains @offset byte address and don't change
  197. * @node_ret and @parent_ret.
  198. *
  199. * If no such entry exists, return pointer to entry that ends before @offset
  200. * and fill parameters @node_ret and @parent_ret, ie. does not return NULL.
  201. */
  202. static inline struct extent_state *tree_search_for_insert(struct extent_io_tree *tree,
  203. u64 offset,
  204. struct rb_node ***node_ret,
  205. struct rb_node **parent_ret)
  206. {
  207. struct rb_root *root = &tree->state;
  208. struct rb_node **node = &root->rb_node;
  209. struct rb_node *prev = NULL;
  210. struct extent_state *entry = NULL;
  211. while (*node) {
  212. prev = *node;
  213. entry = rb_entry(prev, struct extent_state, rb_node);
  214. if (offset < entry->start)
  215. node = &(*node)->rb_left;
  216. else if (offset > entry->end)
  217. node = &(*node)->rb_right;
  218. else
  219. return entry;
  220. }
  221. if (node_ret)
  222. *node_ret = node;
  223. if (parent_ret)
  224. *parent_ret = prev;
  225. /* Search neighbors until we find the first one past the end */
  226. while (entry && offset > entry->end)
  227. entry = next_state(entry);
  228. return entry;
  229. }
  230. /*
  231. * Search offset in the tree or fill neighbor rbtree node pointers.
  232. *
  233. * @tree: the tree to search
  234. * @offset: offset that should fall within an entry in @tree
  235. * @next_ret: pointer to the first entry whose range ends after @offset
  236. * @prev_ret: pointer to the first entry whose range begins before @offset
  237. *
  238. * Return a pointer to the entry that contains @offset byte address. If no
  239. * such entry exists, then return NULL and fill @prev_ret and @next_ret.
  240. * Otherwise return the found entry and other pointers are left untouched.
  241. */
  242. static struct extent_state *tree_search_prev_next(struct extent_io_tree *tree,
  243. u64 offset,
  244. struct extent_state **prev_ret,
  245. struct extent_state **next_ret)
  246. {
  247. struct rb_root *root = &tree->state;
  248. struct rb_node **node = &root->rb_node;
  249. struct extent_state *orig_prev;
  250. struct extent_state *entry = NULL;
  251. ASSERT(prev_ret);
  252. ASSERT(next_ret);
  253. while (*node) {
  254. entry = rb_entry(*node, struct extent_state, rb_node);
  255. if (offset < entry->start)
  256. node = &(*node)->rb_left;
  257. else if (offset > entry->end)
  258. node = &(*node)->rb_right;
  259. else
  260. return entry;
  261. }
  262. orig_prev = entry;
  263. while (entry && offset > entry->end)
  264. entry = next_state(entry);
  265. *next_ret = entry;
  266. entry = orig_prev;
  267. while (entry && offset < entry->start)
  268. entry = prev_state(entry);
  269. *prev_ret = entry;
  270. return NULL;
  271. }
  272. /*
  273. * Inexact rb-tree search, return the next entry if @offset is not found
  274. */
  275. static inline struct extent_state *tree_search(struct extent_io_tree *tree, u64 offset)
  276. {
  277. return tree_search_for_insert(tree, offset, NULL, NULL);
  278. }
  279. static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
  280. {
  281. btrfs_panic(tree->fs_info, err,
  282. "locking error: extent tree was modified by another thread while locked");
  283. }
  284. /*
  285. * Utility function to look for merge candidates inside a given range. Any
  286. * extents with matching state are merged together into a single extent in the
  287. * tree. Extents with EXTENT_IO in their state field are not merged because
  288. * the end_io handlers need to be able to do operations on them without
  289. * sleeping (or doing allocations/splits).
  290. *
  291. * This should be called with the tree lock held.
  292. */
  293. static void merge_state(struct extent_io_tree *tree, struct extent_state *state)
  294. {
  295. struct extent_state *other;
  296. if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY))
  297. return;
  298. other = prev_state(state);
  299. if (other && other->end == state->start - 1 &&
  300. other->state == state->state) {
  301. if (tree->private_data)
  302. btrfs_merge_delalloc_extent(tree->private_data,
  303. state, other);
  304. state->start = other->start;
  305. rb_erase(&other->rb_node, &tree->state);
  306. RB_CLEAR_NODE(&other->rb_node);
  307. free_extent_state(other);
  308. }
  309. other = next_state(state);
  310. if (other && other->start == state->end + 1 &&
  311. other->state == state->state) {
  312. if (tree->private_data)
  313. btrfs_merge_delalloc_extent(tree->private_data, state,
  314. other);
  315. state->end = other->end;
  316. rb_erase(&other->rb_node, &tree->state);
  317. RB_CLEAR_NODE(&other->rb_node);
  318. free_extent_state(other);
  319. }
  320. }
  321. static void set_state_bits(struct extent_io_tree *tree,
  322. struct extent_state *state,
  323. u32 bits, struct extent_changeset *changeset)
  324. {
  325. u32 bits_to_set = bits & ~EXTENT_CTLBITS;
  326. int ret;
  327. if (tree->private_data)
  328. btrfs_set_delalloc_extent(tree->private_data, state, bits);
  329. ret = add_extent_changeset(state, bits_to_set, changeset, 1);
  330. BUG_ON(ret < 0);
  331. state->state |= bits_to_set;
  332. }
  333. /*
  334. * Insert an extent_state struct into the tree. 'bits' are set on the
  335. * struct before it is inserted.
  336. *
  337. * This may return -EEXIST if the extent is already there, in which case the
  338. * state struct is freed.
  339. *
  340. * The tree lock is not taken internally. This is a utility function and
  341. * probably isn't what you want to call (see set/clear_extent_bit).
  342. */
  343. static int insert_state(struct extent_io_tree *tree,
  344. struct extent_state *state,
  345. u32 bits, struct extent_changeset *changeset)
  346. {
  347. struct rb_node **node;
  348. struct rb_node *parent = NULL;
  349. const u64 end = state->end;
  350. set_state_bits(tree, state, bits, changeset);
  351. node = &tree->state.rb_node;
  352. while (*node) {
  353. struct extent_state *entry;
  354. parent = *node;
  355. entry = rb_entry(parent, struct extent_state, rb_node);
  356. if (end < entry->start) {
  357. node = &(*node)->rb_left;
  358. } else if (end > entry->end) {
  359. node = &(*node)->rb_right;
  360. } else {
  361. btrfs_err(tree->fs_info,
  362. "found node %llu %llu on insert of %llu %llu",
  363. entry->start, entry->end, state->start, end);
  364. return -EEXIST;
  365. }
  366. }
  367. rb_link_node(&state->rb_node, parent, node);
  368. rb_insert_color(&state->rb_node, &tree->state);
  369. merge_state(tree, state);
  370. return 0;
  371. }
  372. /*
  373. * Insert state to @tree to the location given by @node and @parent.
  374. */
  375. static void insert_state_fast(struct extent_io_tree *tree,
  376. struct extent_state *state, struct rb_node **node,
  377. struct rb_node *parent, unsigned bits,
  378. struct extent_changeset *changeset)
  379. {
  380. set_state_bits(tree, state, bits, changeset);
  381. rb_link_node(&state->rb_node, parent, node);
  382. rb_insert_color(&state->rb_node, &tree->state);
  383. merge_state(tree, state);
  384. }
  385. /*
  386. * Split a given extent state struct in two, inserting the preallocated
  387. * struct 'prealloc' as the newly created second half. 'split' indicates an
  388. * offset inside 'orig' where it should be split.
  389. *
  390. * Before calling,
  391. * the tree has 'orig' at [orig->start, orig->end]. After calling, there
  392. * are two extent state structs in the tree:
  393. * prealloc: [orig->start, split - 1]
  394. * orig: [ split, orig->end ]
  395. *
  396. * The tree locks are not taken by this function. They need to be held
  397. * by the caller.
  398. */
  399. static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
  400. struct extent_state *prealloc, u64 split)
  401. {
  402. struct rb_node *parent = NULL;
  403. struct rb_node **node;
  404. if (tree->private_data)
  405. btrfs_split_delalloc_extent(tree->private_data, orig, split);
  406. prealloc->start = orig->start;
  407. prealloc->end = split - 1;
  408. prealloc->state = orig->state;
  409. orig->start = split;
  410. parent = &orig->rb_node;
  411. node = &parent;
  412. while (*node) {
  413. struct extent_state *entry;
  414. parent = *node;
  415. entry = rb_entry(parent, struct extent_state, rb_node);
  416. if (prealloc->end < entry->start) {
  417. node = &(*node)->rb_left;
  418. } else if (prealloc->end > entry->end) {
  419. node = &(*node)->rb_right;
  420. } else {
  421. free_extent_state(prealloc);
  422. return -EEXIST;
  423. }
  424. }
  425. rb_link_node(&prealloc->rb_node, parent, node);
  426. rb_insert_color(&prealloc->rb_node, &tree->state);
  427. return 0;
  428. }
  429. /*
  430. * Utility function to clear some bits in an extent state struct. It will
  431. * optionally wake up anyone waiting on this state (wake == 1).
  432. *
  433. * If no bits are set on the state struct after clearing things, the
  434. * struct is freed and removed from the tree
  435. */
  436. static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
  437. struct extent_state *state,
  438. u32 bits, int wake,
  439. struct extent_changeset *changeset)
  440. {
  441. struct extent_state *next;
  442. u32 bits_to_clear = bits & ~EXTENT_CTLBITS;
  443. int ret;
  444. if (tree->private_data)
  445. btrfs_clear_delalloc_extent(tree->private_data, state, bits);
  446. ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
  447. BUG_ON(ret < 0);
  448. state->state &= ~bits_to_clear;
  449. if (wake)
  450. wake_up(&state->wq);
  451. if (state->state == 0) {
  452. next = next_state(state);
  453. if (extent_state_in_tree(state)) {
  454. rb_erase(&state->rb_node, &tree->state);
  455. RB_CLEAR_NODE(&state->rb_node);
  456. free_extent_state(state);
  457. } else {
  458. WARN_ON(1);
  459. }
  460. } else {
  461. merge_state(tree, state);
  462. next = next_state(state);
  463. }
  464. return next;
  465. }
  466. /*
  467. * Clear some bits on a range in the tree. This may require splitting or
  468. * inserting elements in the tree, so the gfp mask is used to indicate which
  469. * allocations or sleeping are allowed.
  470. *
  471. * Pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove the given
  472. * range from the tree regardless of state (ie for truncate).
  473. *
  474. * The range [start, end] is inclusive.
  475. *
  476. * This takes the tree lock, and returns 0 on success and < 0 on error.
  477. */
  478. int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
  479. u32 bits, struct extent_state **cached_state,
  480. gfp_t mask, struct extent_changeset *changeset)
  481. {
  482. struct extent_state *state;
  483. struct extent_state *cached;
  484. struct extent_state *prealloc = NULL;
  485. u64 last_end;
  486. int err;
  487. int clear = 0;
  488. int wake;
  489. int delete = (bits & EXTENT_CLEAR_ALL_BITS);
  490. btrfs_debug_check_extent_io_range(tree, start, end);
  491. trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
  492. if (delete)
  493. bits |= ~EXTENT_CTLBITS;
  494. if (bits & EXTENT_DELALLOC)
  495. bits |= EXTENT_NORESERVE;
  496. wake = (bits & EXTENT_LOCKED) ? 1 : 0;
  497. if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY))
  498. clear = 1;
  499. again:
  500. if (!prealloc) {
  501. /*
  502. * Don't care for allocation failure here because we might end
  503. * up not needing the pre-allocated extent state at all, which
  504. * is the case if we only have in the tree extent states that
  505. * cover our input range and don't cover too any other range.
  506. * If we end up needing a new extent state we allocate it later.
  507. */
  508. prealloc = alloc_extent_state(mask);
  509. }
  510. spin_lock(&tree->lock);
  511. if (cached_state) {
  512. cached = *cached_state;
  513. if (clear) {
  514. *cached_state = NULL;
  515. cached_state = NULL;
  516. }
  517. if (cached && extent_state_in_tree(cached) &&
  518. cached->start <= start && cached->end > start) {
  519. if (clear)
  520. refcount_dec(&cached->refs);
  521. state = cached;
  522. goto hit_next;
  523. }
  524. if (clear)
  525. free_extent_state(cached);
  526. }
  527. /* This search will find the extents that end after our range starts. */
  528. state = tree_search(tree, start);
  529. if (!state)
  530. goto out;
  531. hit_next:
  532. if (state->start > end)
  533. goto out;
  534. WARN_ON(state->end < start);
  535. last_end = state->end;
  536. /* The state doesn't have the wanted bits, go ahead. */
  537. if (!(state->state & bits)) {
  538. state = next_state(state);
  539. goto next;
  540. }
  541. /*
  542. * | ---- desired range ---- |
  543. * | state | or
  544. * | ------------- state -------------- |
  545. *
  546. * We need to split the extent we found, and may flip bits on second
  547. * half.
  548. *
  549. * If the extent we found extends past our range, we just split and
  550. * search again. It'll get split again the next time though.
  551. *
  552. * If the extent we found is inside our range, we clear the desired bit
  553. * on it.
  554. */
  555. if (state->start < start) {
  556. prealloc = alloc_extent_state_atomic(prealloc);
  557. if (!prealloc)
  558. goto search_again;
  559. err = split_state(tree, state, prealloc, start);
  560. if (err)
  561. extent_io_tree_panic(tree, err);
  562. prealloc = NULL;
  563. if (err)
  564. goto out;
  565. if (state->end <= end) {
  566. state = clear_state_bit(tree, state, bits, wake, changeset);
  567. goto next;
  568. }
  569. goto search_again;
  570. }
  571. /*
  572. * | ---- desired range ---- |
  573. * | state |
  574. * We need to split the extent, and clear the bit on the first half.
  575. */
  576. if (state->start <= end && state->end > end) {
  577. prealloc = alloc_extent_state_atomic(prealloc);
  578. if (!prealloc)
  579. goto search_again;
  580. err = split_state(tree, state, prealloc, end + 1);
  581. if (err)
  582. extent_io_tree_panic(tree, err);
  583. if (wake)
  584. wake_up(&state->wq);
  585. clear_state_bit(tree, prealloc, bits, wake, changeset);
  586. prealloc = NULL;
  587. goto out;
  588. }
  589. state = clear_state_bit(tree, state, bits, wake, changeset);
  590. next:
  591. if (last_end == (u64)-1)
  592. goto out;
  593. start = last_end + 1;
  594. if (start <= end && state && !need_resched())
  595. goto hit_next;
  596. search_again:
  597. if (start > end)
  598. goto out;
  599. spin_unlock(&tree->lock);
  600. if (gfpflags_allow_blocking(mask))
  601. cond_resched();
  602. goto again;
  603. out:
  604. spin_unlock(&tree->lock);
  605. if (prealloc)
  606. free_extent_state(prealloc);
  607. return 0;
  608. }
  609. static void wait_on_state(struct extent_io_tree *tree,
  610. struct extent_state *state)
  611. __releases(tree->lock)
  612. __acquires(tree->lock)
  613. {
  614. DEFINE_WAIT(wait);
  615. prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
  616. spin_unlock(&tree->lock);
  617. schedule();
  618. spin_lock(&tree->lock);
  619. finish_wait(&state->wq, &wait);
  620. }
  621. /*
  622. * Wait for one or more bits to clear on a range in the state tree.
  623. * The range [start, end] is inclusive.
  624. * The tree lock is taken by this function
  625. */
  626. void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits)
  627. {
  628. struct extent_state *state;
  629. btrfs_debug_check_extent_io_range(tree, start, end);
  630. spin_lock(&tree->lock);
  631. again:
  632. while (1) {
  633. /*
  634. * This search will find all the extents that end after our
  635. * range starts.
  636. */
  637. state = tree_search(tree, start);
  638. process_node:
  639. if (!state)
  640. break;
  641. if (state->start > end)
  642. goto out;
  643. if (state->state & bits) {
  644. start = state->start;
  645. refcount_inc(&state->refs);
  646. wait_on_state(tree, state);
  647. free_extent_state(state);
  648. goto again;
  649. }
  650. start = state->end + 1;
  651. if (start > end)
  652. break;
  653. if (!cond_resched_lock(&tree->lock)) {
  654. state = next_state(state);
  655. goto process_node;
  656. }
  657. }
  658. out:
  659. spin_unlock(&tree->lock);
  660. }
  661. static void cache_state_if_flags(struct extent_state *state,
  662. struct extent_state **cached_ptr,
  663. unsigned flags)
  664. {
  665. if (cached_ptr && !(*cached_ptr)) {
  666. if (!flags || (state->state & flags)) {
  667. *cached_ptr = state;
  668. refcount_inc(&state->refs);
  669. }
  670. }
  671. }
  672. static void cache_state(struct extent_state *state,
  673. struct extent_state **cached_ptr)
  674. {
  675. return cache_state_if_flags(state, cached_ptr,
  676. EXTENT_LOCKED | EXTENT_BOUNDARY);
  677. }
  678. /*
  679. * Find the first state struct with 'bits' set after 'start', and return it.
  680. * tree->lock must be held. NULL will returned if nothing was found after
  681. * 'start'.
  682. */
  683. static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
  684. u64 start, u32 bits)
  685. {
  686. struct extent_state *state;
  687. /*
  688. * This search will find all the extents that end after our range
  689. * starts.
  690. */
  691. state = tree_search(tree, start);
  692. while (state) {
  693. if (state->end >= start && (state->state & bits))
  694. return state;
  695. state = next_state(state);
  696. }
  697. return NULL;
  698. }
  699. /*
  700. * Find the first offset in the io tree with one or more @bits set.
  701. *
  702. * Note: If there are multiple bits set in @bits, any of them will match.
  703. *
  704. * Return 0 if we find something, and update @start_ret and @end_ret.
  705. * Return 1 if we found nothing.
  706. */
  707. int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
  708. u64 *start_ret, u64 *end_ret, u32 bits,
  709. struct extent_state **cached_state)
  710. {
  711. struct extent_state *state;
  712. int ret = 1;
  713. spin_lock(&tree->lock);
  714. if (cached_state && *cached_state) {
  715. state = *cached_state;
  716. if (state->end == start - 1 && extent_state_in_tree(state)) {
  717. while ((state = next_state(state)) != NULL) {
  718. if (state->state & bits)
  719. goto got_it;
  720. }
  721. free_extent_state(*cached_state);
  722. *cached_state = NULL;
  723. goto out;
  724. }
  725. free_extent_state(*cached_state);
  726. *cached_state = NULL;
  727. }
  728. state = find_first_extent_bit_state(tree, start, bits);
  729. got_it:
  730. if (state) {
  731. cache_state_if_flags(state, cached_state, 0);
  732. *start_ret = state->start;
  733. *end_ret = state->end;
  734. ret = 0;
  735. }
  736. out:
  737. spin_unlock(&tree->lock);
  738. return ret;
  739. }
  740. /*
  741. * Find a contiguous area of bits
  742. *
  743. * @tree: io tree to check
  744. * @start: offset to start the search from
  745. * @start_ret: the first offset we found with the bits set
  746. * @end_ret: the final contiguous range of the bits that were set
  747. * @bits: bits to look for
  748. *
  749. * set_extent_bit and clear_extent_bit can temporarily split contiguous ranges
  750. * to set bits appropriately, and then merge them again. During this time it
  751. * will drop the tree->lock, so use this helper if you want to find the actual
  752. * contiguous area for given bits. We will search to the first bit we find, and
  753. * then walk down the tree until we find a non-contiguous area. The area
  754. * returned will be the full contiguous area with the bits set.
  755. */
  756. int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
  757. u64 *start_ret, u64 *end_ret, u32 bits)
  758. {
  759. struct extent_state *state;
  760. int ret = 1;
  761. spin_lock(&tree->lock);
  762. state = find_first_extent_bit_state(tree, start, bits);
  763. if (state) {
  764. *start_ret = state->start;
  765. *end_ret = state->end;
  766. while ((state = next_state(state)) != NULL) {
  767. if (state->start > (*end_ret + 1))
  768. break;
  769. *end_ret = state->end;
  770. }
  771. ret = 0;
  772. }
  773. spin_unlock(&tree->lock);
  774. return ret;
  775. }
  776. /*
  777. * Find a contiguous range of bytes in the file marked as delalloc, not more
  778. * than 'max_bytes'. start and end are used to return the range,
  779. *
  780. * True is returned if we find something, false if nothing was in the tree.
  781. */
  782. bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
  783. u64 *end, u64 max_bytes,
  784. struct extent_state **cached_state)
  785. {
  786. struct extent_state *state;
  787. u64 cur_start = *start;
  788. bool found = false;
  789. u64 total_bytes = 0;
  790. spin_lock(&tree->lock);
  791. /*
  792. * This search will find all the extents that end after our range
  793. * starts.
  794. */
  795. state = tree_search(tree, cur_start);
  796. if (!state) {
  797. *end = (u64)-1;
  798. goto out;
  799. }
  800. while (state) {
  801. if (found && (state->start != cur_start ||
  802. (state->state & EXTENT_BOUNDARY))) {
  803. goto out;
  804. }
  805. if (!(state->state & EXTENT_DELALLOC)) {
  806. if (!found)
  807. *end = state->end;
  808. goto out;
  809. }
  810. if (!found) {
  811. *start = state->start;
  812. *cached_state = state;
  813. refcount_inc(&state->refs);
  814. }
  815. found = true;
  816. *end = state->end;
  817. cur_start = state->end + 1;
  818. total_bytes += state->end - state->start + 1;
  819. if (total_bytes >= max_bytes)
  820. break;
  821. state = next_state(state);
  822. }
  823. out:
  824. spin_unlock(&tree->lock);
  825. return found;
  826. }
  827. /*
  828. * Set some bits on a range in the tree. This may require allocations or
  829. * sleeping, so the gfp mask is used to indicate what is allowed.
  830. *
  831. * If any of the exclusive bits are set, this will fail with -EEXIST if some
  832. * part of the range already has the desired bits set. The start of the
  833. * existing range is returned in failed_start in this case.
  834. *
  835. * [start, end] is inclusive This takes the tree lock.
  836. */
  837. static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
  838. u32 bits, u64 *failed_start,
  839. struct extent_state **cached_state,
  840. struct extent_changeset *changeset, gfp_t mask)
  841. {
  842. struct extent_state *state;
  843. struct extent_state *prealloc = NULL;
  844. struct rb_node **p;
  845. struct rb_node *parent;
  846. int err = 0;
  847. u64 last_start;
  848. u64 last_end;
  849. u32 exclusive_bits = (bits & EXTENT_LOCKED);
  850. btrfs_debug_check_extent_io_range(tree, start, end);
  851. trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
  852. if (exclusive_bits)
  853. ASSERT(failed_start);
  854. else
  855. ASSERT(failed_start == NULL);
  856. again:
  857. if (!prealloc) {
  858. /*
  859. * Don't care for allocation failure here because we might end
  860. * up not needing the pre-allocated extent state at all, which
  861. * is the case if we only have in the tree extent states that
  862. * cover our input range and don't cover too any other range.
  863. * If we end up needing a new extent state we allocate it later.
  864. */
  865. prealloc = alloc_extent_state(mask);
  866. }
  867. spin_lock(&tree->lock);
  868. if (cached_state && *cached_state) {
  869. state = *cached_state;
  870. if (state->start <= start && state->end > start &&
  871. extent_state_in_tree(state))
  872. goto hit_next;
  873. }
  874. /*
  875. * This search will find all the extents that end after our range
  876. * starts.
  877. */
  878. state = tree_search_for_insert(tree, start, &p, &parent);
  879. if (!state) {
  880. prealloc = alloc_extent_state_atomic(prealloc);
  881. if (!prealloc)
  882. goto search_again;
  883. prealloc->start = start;
  884. prealloc->end = end;
  885. insert_state_fast(tree, prealloc, p, parent, bits, changeset);
  886. cache_state(prealloc, cached_state);
  887. prealloc = NULL;
  888. goto out;
  889. }
  890. hit_next:
  891. last_start = state->start;
  892. last_end = state->end;
  893. /*
  894. * | ---- desired range ---- |
  895. * | state |
  896. *
  897. * Just lock what we found and keep going
  898. */
  899. if (state->start == start && state->end <= end) {
  900. if (state->state & exclusive_bits) {
  901. *failed_start = state->start;
  902. err = -EEXIST;
  903. goto out;
  904. }
  905. set_state_bits(tree, state, bits, changeset);
  906. cache_state(state, cached_state);
  907. merge_state(tree, state);
  908. if (last_end == (u64)-1)
  909. goto out;
  910. start = last_end + 1;
  911. state = next_state(state);
  912. if (start < end && state && state->start == start &&
  913. !need_resched())
  914. goto hit_next;
  915. goto search_again;
  916. }
  917. /*
  918. * | ---- desired range ---- |
  919. * | state |
  920. * or
  921. * | ------------- state -------------- |
  922. *
  923. * We need to split the extent we found, and may flip bits on second
  924. * half.
  925. *
  926. * If the extent we found extends past our range, we just split and
  927. * search again. It'll get split again the next time though.
  928. *
  929. * If the extent we found is inside our range, we set the desired bit
  930. * on it.
  931. */
  932. if (state->start < start) {
  933. if (state->state & exclusive_bits) {
  934. *failed_start = start;
  935. err = -EEXIST;
  936. goto out;
  937. }
  938. /*
  939. * If this extent already has all the bits we want set, then
  940. * skip it, not necessary to split it or do anything with it.
  941. */
  942. if ((state->state & bits) == bits) {
  943. start = state->end + 1;
  944. cache_state(state, cached_state);
  945. goto search_again;
  946. }
  947. prealloc = alloc_extent_state_atomic(prealloc);
  948. if (!prealloc)
  949. goto search_again;
  950. err = split_state(tree, state, prealloc, start);
  951. if (err)
  952. extent_io_tree_panic(tree, err);
  953. prealloc = NULL;
  954. if (err)
  955. goto out;
  956. if (state->end <= end) {
  957. set_state_bits(tree, state, bits, changeset);
  958. cache_state(state, cached_state);
  959. merge_state(tree, state);
  960. if (last_end == (u64)-1)
  961. goto out;
  962. start = last_end + 1;
  963. state = next_state(state);
  964. if (start < end && state && state->start == start &&
  965. !need_resched())
  966. goto hit_next;
  967. }
  968. goto search_again;
  969. }
  970. /*
  971. * | ---- desired range ---- |
  972. * | state | or | state |
  973. *
  974. * There's a hole, we need to insert something in it and ignore the
  975. * extent we found.
  976. */
  977. if (state->start > start) {
  978. u64 this_end;
  979. if (end < last_start)
  980. this_end = end;
  981. else
  982. this_end = last_start - 1;
  983. prealloc = alloc_extent_state_atomic(prealloc);
  984. if (!prealloc)
  985. goto search_again;
  986. /*
  987. * Avoid to free 'prealloc' if it can be merged with the later
  988. * extent.
  989. */
  990. prealloc->start = start;
  991. prealloc->end = this_end;
  992. err = insert_state(tree, prealloc, bits, changeset);
  993. if (err)
  994. extent_io_tree_panic(tree, err);
  995. cache_state(prealloc, cached_state);
  996. prealloc = NULL;
  997. start = this_end + 1;
  998. goto search_again;
  999. }
  1000. /*
  1001. * | ---- desired range ---- |
  1002. * | state |
  1003. *
  1004. * We need to split the extent, and set the bit on the first half
  1005. */
  1006. if (state->start <= end && state->end > end) {
  1007. if (state->state & exclusive_bits) {
  1008. *failed_start = start;
  1009. err = -EEXIST;
  1010. goto out;
  1011. }
  1012. prealloc = alloc_extent_state_atomic(prealloc);
  1013. if (!prealloc)
  1014. goto search_again;
  1015. err = split_state(tree, state, prealloc, end + 1);
  1016. if (err)
  1017. extent_io_tree_panic(tree, err);
  1018. set_state_bits(tree, prealloc, bits, changeset);
  1019. cache_state(prealloc, cached_state);
  1020. merge_state(tree, prealloc);
  1021. prealloc = NULL;
  1022. goto out;
  1023. }
  1024. search_again:
  1025. if (start > end)
  1026. goto out;
  1027. spin_unlock(&tree->lock);
  1028. if (gfpflags_allow_blocking(mask))
  1029. cond_resched();
  1030. goto again;
  1031. out:
  1032. spin_unlock(&tree->lock);
  1033. if (prealloc)
  1034. free_extent_state(prealloc);
  1035. return err;
  1036. }
  1037. int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
  1038. u32 bits, struct extent_state **cached_state, gfp_t mask)
  1039. {
  1040. return __set_extent_bit(tree, start, end, bits, NULL, cached_state,
  1041. NULL, mask);
  1042. }
  1043. /*
  1044. * Convert all bits in a given range from one bit to another
  1045. *
  1046. * @tree: the io tree to search
  1047. * @start: the start offset in bytes
  1048. * @end: the end offset in bytes (inclusive)
  1049. * @bits: the bits to set in this range
  1050. * @clear_bits: the bits to clear in this range
  1051. * @cached_state: state that we're going to cache
  1052. *
  1053. * This will go through and set bits for the given range. If any states exist
  1054. * already in this range they are set with the given bit and cleared of the
  1055. * clear_bits. This is only meant to be used by things that are mergeable, ie.
  1056. * converting from say DELALLOC to DIRTY. This is not meant to be used with
  1057. * boundary bits like LOCK.
  1058. *
  1059. * All allocations are done with GFP_NOFS.
  1060. */
  1061. int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
  1062. u32 bits, u32 clear_bits,
  1063. struct extent_state **cached_state)
  1064. {
  1065. struct extent_state *state;
  1066. struct extent_state *prealloc = NULL;
  1067. struct rb_node **p;
  1068. struct rb_node *parent;
  1069. int err = 0;
  1070. u64 last_start;
  1071. u64 last_end;
  1072. bool first_iteration = true;
  1073. btrfs_debug_check_extent_io_range(tree, start, end);
  1074. trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits,
  1075. clear_bits);
  1076. again:
  1077. if (!prealloc) {
  1078. /*
  1079. * Best effort, don't worry if extent state allocation fails
  1080. * here for the first iteration. We might have a cached state
  1081. * that matches exactly the target range, in which case no
  1082. * extent state allocations are needed. We'll only know this
  1083. * after locking the tree.
  1084. */
  1085. prealloc = alloc_extent_state(GFP_NOFS);
  1086. if (!prealloc && !first_iteration)
  1087. return -ENOMEM;
  1088. }
  1089. spin_lock(&tree->lock);
  1090. if (cached_state && *cached_state) {
  1091. state = *cached_state;
  1092. if (state->start <= start && state->end > start &&
  1093. extent_state_in_tree(state))
  1094. goto hit_next;
  1095. }
  1096. /*
  1097. * This search will find all the extents that end after our range
  1098. * starts.
  1099. */
  1100. state = tree_search_for_insert(tree, start, &p, &parent);
  1101. if (!state) {
  1102. prealloc = alloc_extent_state_atomic(prealloc);
  1103. if (!prealloc) {
  1104. err = -ENOMEM;
  1105. goto out;
  1106. }
  1107. prealloc->start = start;
  1108. prealloc->end = end;
  1109. insert_state_fast(tree, prealloc, p, parent, bits, NULL);
  1110. cache_state(prealloc, cached_state);
  1111. prealloc = NULL;
  1112. goto out;
  1113. }
  1114. hit_next:
  1115. last_start = state->start;
  1116. last_end = state->end;
  1117. /*
  1118. * | ---- desired range ---- |
  1119. * | state |
  1120. *
  1121. * Just lock what we found and keep going.
  1122. */
  1123. if (state->start == start && state->end <= end) {
  1124. set_state_bits(tree, state, bits, NULL);
  1125. cache_state(state, cached_state);
  1126. state = clear_state_bit(tree, state, clear_bits, 0, NULL);
  1127. if (last_end == (u64)-1)
  1128. goto out;
  1129. start = last_end + 1;
  1130. if (start < end && state && state->start == start &&
  1131. !need_resched())
  1132. goto hit_next;
  1133. goto search_again;
  1134. }
  1135. /*
  1136. * | ---- desired range ---- |
  1137. * | state |
  1138. * or
  1139. * | ------------- state -------------- |
  1140. *
  1141. * We need to split the extent we found, and may flip bits on second
  1142. * half.
  1143. *
  1144. * If the extent we found extends past our range, we just split and
  1145. * search again. It'll get split again the next time though.
  1146. *
  1147. * If the extent we found is inside our range, we set the desired bit
  1148. * on it.
  1149. */
  1150. if (state->start < start) {
  1151. prealloc = alloc_extent_state_atomic(prealloc);
  1152. if (!prealloc) {
  1153. err = -ENOMEM;
  1154. goto out;
  1155. }
  1156. err = split_state(tree, state, prealloc, start);
  1157. if (err)
  1158. extent_io_tree_panic(tree, err);
  1159. prealloc = NULL;
  1160. if (err)
  1161. goto out;
  1162. if (state->end <= end) {
  1163. set_state_bits(tree, state, bits, NULL);
  1164. cache_state(state, cached_state);
  1165. state = clear_state_bit(tree, state, clear_bits, 0, NULL);
  1166. if (last_end == (u64)-1)
  1167. goto out;
  1168. start = last_end + 1;
  1169. if (start < end && state && state->start == start &&
  1170. !need_resched())
  1171. goto hit_next;
  1172. }
  1173. goto search_again;
  1174. }
  1175. /*
  1176. * | ---- desired range ---- |
  1177. * | state | or | state |
  1178. *
  1179. * There's a hole, we need to insert something in it and ignore the
  1180. * extent we found.
  1181. */
  1182. if (state->start > start) {
  1183. u64 this_end;
  1184. if (end < last_start)
  1185. this_end = end;
  1186. else
  1187. this_end = last_start - 1;
  1188. prealloc = alloc_extent_state_atomic(prealloc);
  1189. if (!prealloc) {
  1190. err = -ENOMEM;
  1191. goto out;
  1192. }
  1193. /*
  1194. * Avoid to free 'prealloc' if it can be merged with the later
  1195. * extent.
  1196. */
  1197. prealloc->start = start;
  1198. prealloc->end = this_end;
  1199. err = insert_state(tree, prealloc, bits, NULL);
  1200. if (err)
  1201. extent_io_tree_panic(tree, err);
  1202. cache_state(prealloc, cached_state);
  1203. prealloc = NULL;
  1204. start = this_end + 1;
  1205. goto search_again;
  1206. }
  1207. /*
  1208. * | ---- desired range ---- |
  1209. * | state |
  1210. *
  1211. * We need to split the extent, and set the bit on the first half.
  1212. */
  1213. if (state->start <= end && state->end > end) {
  1214. prealloc = alloc_extent_state_atomic(prealloc);
  1215. if (!prealloc) {
  1216. err = -ENOMEM;
  1217. goto out;
  1218. }
  1219. err = split_state(tree, state, prealloc, end + 1);
  1220. if (err)
  1221. extent_io_tree_panic(tree, err);
  1222. set_state_bits(tree, prealloc, bits, NULL);
  1223. cache_state(prealloc, cached_state);
  1224. clear_state_bit(tree, prealloc, clear_bits, 0, NULL);
  1225. prealloc = NULL;
  1226. goto out;
  1227. }
  1228. search_again:
  1229. if (start > end)
  1230. goto out;
  1231. spin_unlock(&tree->lock);
  1232. cond_resched();
  1233. first_iteration = false;
  1234. goto again;
  1235. out:
  1236. spin_unlock(&tree->lock);
  1237. if (prealloc)
  1238. free_extent_state(prealloc);
  1239. return err;
  1240. }
  1241. /*
  1242. * Find the first range that has @bits not set. This range could start before
  1243. * @start.
  1244. *
  1245. * @tree: the tree to search
  1246. * @start: offset at/after which the found extent should start
  1247. * @start_ret: records the beginning of the range
  1248. * @end_ret: records the end of the range (inclusive)
  1249. * @bits: the set of bits which must be unset
  1250. *
  1251. * Since unallocated range is also considered one which doesn't have the bits
  1252. * set it's possible that @end_ret contains -1, this happens in case the range
  1253. * spans (last_range_end, end of device]. In this case it's up to the caller to
  1254. * trim @end_ret to the appropriate size.
  1255. */
  1256. void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
  1257. u64 *start_ret, u64 *end_ret, u32 bits)
  1258. {
  1259. struct extent_state *state;
  1260. struct extent_state *prev = NULL, *next;
  1261. spin_lock(&tree->lock);
  1262. /* Find first extent with bits cleared */
  1263. while (1) {
  1264. state = tree_search_prev_next(tree, start, &prev, &next);
  1265. if (!state && !next && !prev) {
  1266. /*
  1267. * Tree is completely empty, send full range and let
  1268. * caller deal with it
  1269. */
  1270. *start_ret = 0;
  1271. *end_ret = -1;
  1272. goto out;
  1273. } else if (!state && !next) {
  1274. /*
  1275. * We are past the last allocated chunk, set start at
  1276. * the end of the last extent.
  1277. */
  1278. *start_ret = prev->end + 1;
  1279. *end_ret = -1;
  1280. goto out;
  1281. } else if (!state) {
  1282. state = next;
  1283. }
  1284. /*
  1285. * At this point 'state' either contains 'start' or start is
  1286. * before 'state'
  1287. */
  1288. if (in_range(start, state->start, state->end - state->start + 1)) {
  1289. if (state->state & bits) {
  1290. /*
  1291. * |--range with bits sets--|
  1292. * |
  1293. * start
  1294. */
  1295. start = state->end + 1;
  1296. } else {
  1297. /*
  1298. * 'start' falls within a range that doesn't
  1299. * have the bits set, so take its start as the
  1300. * beginning of the desired range
  1301. *
  1302. * |--range with bits cleared----|
  1303. * |
  1304. * start
  1305. */
  1306. *start_ret = state->start;
  1307. break;
  1308. }
  1309. } else {
  1310. /*
  1311. * |---prev range---|---hole/unset---|---node range---|
  1312. * |
  1313. * start
  1314. *
  1315. * or
  1316. *
  1317. * |---hole/unset--||--first node--|
  1318. * 0 |
  1319. * start
  1320. */
  1321. if (prev)
  1322. *start_ret = prev->end + 1;
  1323. else
  1324. *start_ret = 0;
  1325. break;
  1326. }
  1327. }
  1328. /*
  1329. * Find the longest stretch from start until an entry which has the
  1330. * bits set
  1331. */
  1332. while (state) {
  1333. if (state->end >= start && !(state->state & bits)) {
  1334. *end_ret = state->end;
  1335. } else {
  1336. *end_ret = state->start - 1;
  1337. break;
  1338. }
  1339. state = next_state(state);
  1340. }
  1341. out:
  1342. spin_unlock(&tree->lock);
  1343. }
  1344. /*
  1345. * Count the number of bytes in the tree that have a given bit(s) set. This
  1346. * can be fairly slow, except for EXTENT_DIRTY which is cached. The total
  1347. * number found is returned.
  1348. */
  1349. u64 count_range_bits(struct extent_io_tree *tree,
  1350. u64 *start, u64 search_end, u64 max_bytes,
  1351. u32 bits, int contig)
  1352. {
  1353. struct extent_state *state;
  1354. u64 cur_start = *start;
  1355. u64 total_bytes = 0;
  1356. u64 last = 0;
  1357. int found = 0;
  1358. if (WARN_ON(search_end < cur_start))
  1359. return 0;
  1360. spin_lock(&tree->lock);
  1361. /*
  1362. * This search will find all the extents that end after our range
  1363. * starts.
  1364. */
  1365. state = tree_search(tree, cur_start);
  1366. while (state) {
  1367. if (state->start > search_end)
  1368. break;
  1369. if (contig && found && state->start > last + 1)
  1370. break;
  1371. if (state->end >= cur_start && (state->state & bits) == bits) {
  1372. total_bytes += min(search_end, state->end) + 1 -
  1373. max(cur_start, state->start);
  1374. if (total_bytes >= max_bytes)
  1375. break;
  1376. if (!found) {
  1377. *start = max(cur_start, state->start);
  1378. found = 1;
  1379. }
  1380. last = state->end;
  1381. } else if (contig && found) {
  1382. break;
  1383. }
  1384. state = next_state(state);
  1385. }
  1386. spin_unlock(&tree->lock);
  1387. return total_bytes;
  1388. }
  1389. /*
  1390. * Searche a range in the state tree for a given mask. If 'filled' == 1, this
  1391. * returns 1 only if every extent in the tree has the bits set. Otherwise, 1
  1392. * is returned if any bit in the range is found set.
  1393. */
  1394. int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
  1395. u32 bits, int filled, struct extent_state *cached)
  1396. {
  1397. struct extent_state *state = NULL;
  1398. int bitset = 0;
  1399. spin_lock(&tree->lock);
  1400. if (cached && extent_state_in_tree(cached) && cached->start <= start &&
  1401. cached->end > start)
  1402. state = cached;
  1403. else
  1404. state = tree_search(tree, start);
  1405. while (state && start <= end) {
  1406. if (filled && state->start > start) {
  1407. bitset = 0;
  1408. break;
  1409. }
  1410. if (state->start > end)
  1411. break;
  1412. if (state->state & bits) {
  1413. bitset = 1;
  1414. if (!filled)
  1415. break;
  1416. } else if (filled) {
  1417. bitset = 0;
  1418. break;
  1419. }
  1420. if (state->end == (u64)-1)
  1421. break;
  1422. start = state->end + 1;
  1423. if (start > end)
  1424. break;
  1425. state = next_state(state);
  1426. }
  1427. /* We ran out of states and were still inside of our range. */
  1428. if (filled && !state)
  1429. bitset = 0;
  1430. spin_unlock(&tree->lock);
  1431. return bitset;
  1432. }
  1433. /* Wrappers around set/clear extent bit */
  1434. int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
  1435. u32 bits, struct extent_changeset *changeset)
  1436. {
  1437. /*
  1438. * We don't support EXTENT_LOCKED yet, as current changeset will
  1439. * record any bits changed, so for EXTENT_LOCKED case, it will
  1440. * either fail with -EEXIST or changeset will record the whole
  1441. * range.
  1442. */
  1443. ASSERT(!(bits & EXTENT_LOCKED));
  1444. return __set_extent_bit(tree, start, end, bits, NULL, NULL, changeset,
  1445. GFP_NOFS);
  1446. }
  1447. int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
  1448. u32 bits, struct extent_changeset *changeset)
  1449. {
  1450. /*
  1451. * Don't support EXTENT_LOCKED case, same reason as
  1452. * set_record_extent_bits().
  1453. */
  1454. ASSERT(!(bits & EXTENT_LOCKED));
  1455. return __clear_extent_bit(tree, start, end, bits, NULL, GFP_NOFS,
  1456. changeset);
  1457. }
  1458. int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
  1459. {
  1460. int err;
  1461. u64 failed_start;
  1462. err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
  1463. NULL, NULL, GFP_NOFS);
  1464. if (err == -EEXIST) {
  1465. if (failed_start > start)
  1466. clear_extent_bit(tree, start, failed_start - 1,
  1467. EXTENT_LOCKED, NULL);
  1468. return 0;
  1469. }
  1470. return 1;
  1471. }
  1472. /*
  1473. * Either insert or lock state struct between start and end use mask to tell
  1474. * us if waiting is desired.
  1475. */
  1476. int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
  1477. struct extent_state **cached_state)
  1478. {
  1479. int err;
  1480. u64 failed_start;
  1481. err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
  1482. cached_state, NULL, GFP_NOFS);
  1483. while (err == -EEXIST) {
  1484. if (failed_start != start)
  1485. clear_extent_bit(tree, start, failed_start - 1,
  1486. EXTENT_LOCKED, cached_state);
  1487. wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
  1488. err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
  1489. &failed_start, cached_state, NULL,
  1490. GFP_NOFS);
  1491. }
  1492. return err;
  1493. }
  1494. void __cold extent_state_free_cachep(void)
  1495. {
  1496. btrfs_extent_state_leak_debug_check();
  1497. kmem_cache_destroy(extent_state_cache);
  1498. }
  1499. int __init extent_state_init_cachep(void)
  1500. {
  1501. extent_state_cache = kmem_cache_create("btrfs_extent_state",
  1502. sizeof(struct extent_state), 0,
  1503. SLAB_MEM_SPREAD, NULL);
  1504. if (!extent_state_cache)
  1505. return -ENOMEM;
  1506. return 0;
  1507. }