xfs_buf_item.c 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (c) 2000-2005 Silicon Graphics, Inc.
  4. * All Rights Reserved.
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_shared.h"
  9. #include "xfs_format.h"
  10. #include "xfs_log_format.h"
  11. #include "xfs_trans_resv.h"
  12. #include "xfs_bit.h"
  13. #include "xfs_mount.h"
  14. #include "xfs_trans.h"
  15. #include "xfs_trans_priv.h"
  16. #include "xfs_buf_item.h"
  17. #include "xfs_inode.h"
  18. #include "xfs_inode_item.h"
  19. #include "xfs_quota.h"
  20. #include "xfs_dquot_item.h"
  21. #include "xfs_dquot.h"
  22. #include "xfs_trace.h"
  23. #include "xfs_log.h"
  24. #include "xfs_log_priv.h"
  25. struct kmem_cache *xfs_buf_item_cache;
  26. static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
  27. {
  28. return container_of(lip, struct xfs_buf_log_item, bli_item);
  29. }
  30. /* Is this log iovec plausibly large enough to contain the buffer log format? */
  31. bool
  32. xfs_buf_log_check_iovec(
  33. struct xfs_log_iovec *iovec)
  34. {
  35. struct xfs_buf_log_format *blfp = iovec->i_addr;
  36. char *bmp_end;
  37. char *item_end;
  38. if (offsetof(struct xfs_buf_log_format, blf_data_map) > iovec->i_len)
  39. return false;
  40. item_end = (char *)iovec->i_addr + iovec->i_len;
  41. bmp_end = (char *)&blfp->blf_data_map[blfp->blf_map_size];
  42. return bmp_end <= item_end;
  43. }
  44. static inline int
  45. xfs_buf_log_format_size(
  46. struct xfs_buf_log_format *blfp)
  47. {
  48. return offsetof(struct xfs_buf_log_format, blf_data_map) +
  49. (blfp->blf_map_size * sizeof(blfp->blf_data_map[0]));
  50. }
  51. static inline bool
  52. xfs_buf_item_straddle(
  53. struct xfs_buf *bp,
  54. uint offset,
  55. int first_bit,
  56. int nbits)
  57. {
  58. void *first, *last;
  59. first = xfs_buf_offset(bp, offset + (first_bit << XFS_BLF_SHIFT));
  60. last = xfs_buf_offset(bp,
  61. offset + ((first_bit + nbits) << XFS_BLF_SHIFT));
  62. if (last - first != nbits * XFS_BLF_CHUNK)
  63. return true;
  64. return false;
  65. }
  66. /*
  67. * Return the number of log iovecs and space needed to log the given buf log
  68. * item segment.
  69. *
  70. * It calculates this as 1 iovec for the buf log format structure and 1 for each
  71. * stretch of non-contiguous chunks to be logged. Contiguous chunks are logged
  72. * in a single iovec.
  73. */
  74. STATIC void
  75. xfs_buf_item_size_segment(
  76. struct xfs_buf_log_item *bip,
  77. struct xfs_buf_log_format *blfp,
  78. uint offset,
  79. int *nvecs,
  80. int *nbytes)
  81. {
  82. struct xfs_buf *bp = bip->bli_buf;
  83. int first_bit;
  84. int nbits;
  85. int next_bit;
  86. int last_bit;
  87. first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
  88. if (first_bit == -1)
  89. return;
  90. (*nvecs)++;
  91. *nbytes += xfs_buf_log_format_size(blfp);
  92. do {
  93. nbits = xfs_contig_bits(blfp->blf_data_map,
  94. blfp->blf_map_size, first_bit);
  95. ASSERT(nbits > 0);
  96. /*
  97. * Straddling a page is rare because we don't log contiguous
  98. * chunks of unmapped buffers anywhere.
  99. */
  100. if (nbits > 1 &&
  101. xfs_buf_item_straddle(bp, offset, first_bit, nbits))
  102. goto slow_scan;
  103. (*nvecs)++;
  104. *nbytes += nbits * XFS_BLF_CHUNK;
  105. /*
  106. * This takes the bit number to start looking from and
  107. * returns the next set bit from there. It returns -1
  108. * if there are no more bits set or the start bit is
  109. * beyond the end of the bitmap.
  110. */
  111. first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size,
  112. (uint)first_bit + nbits + 1);
  113. } while (first_bit != -1);
  114. return;
  115. slow_scan:
  116. /* Count the first bit we jumped out of the above loop from */
  117. (*nvecs)++;
  118. *nbytes += XFS_BLF_CHUNK;
  119. last_bit = first_bit;
  120. while (last_bit != -1) {
  121. /*
  122. * This takes the bit number to start looking from and
  123. * returns the next set bit from there. It returns -1
  124. * if there are no more bits set or the start bit is
  125. * beyond the end of the bitmap.
  126. */
  127. next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size,
  128. last_bit + 1);
  129. /*
  130. * If we run out of bits, leave the loop,
  131. * else if we find a new set of bits bump the number of vecs,
  132. * else keep scanning the current set of bits.
  133. */
  134. if (next_bit == -1) {
  135. break;
  136. } else if (next_bit != last_bit + 1 ||
  137. xfs_buf_item_straddle(bp, offset, first_bit, nbits)) {
  138. last_bit = next_bit;
  139. first_bit = next_bit;
  140. (*nvecs)++;
  141. nbits = 1;
  142. } else {
  143. last_bit++;
  144. nbits++;
  145. }
  146. *nbytes += XFS_BLF_CHUNK;
  147. }
  148. }
  149. /*
  150. * Return the number of log iovecs and space needed to log the given buf log
  151. * item.
  152. *
  153. * Discontiguous buffers need a format structure per region that is being
  154. * logged. This makes the changes in the buffer appear to log recovery as though
  155. * they came from separate buffers, just like would occur if multiple buffers
  156. * were used instead of a single discontiguous buffer. This enables
  157. * discontiguous buffers to be in-memory constructs, completely transparent to
  158. * what ends up on disk.
  159. *
  160. * If the XFS_BLI_STALE flag has been set, then log nothing but the buf log
  161. * format structures. If the item has previously been logged and has dirty
  162. * regions, we do not relog them in stale buffers. This has the effect of
  163. * reducing the size of the relogged item by the amount of dirty data tracked
  164. * by the log item. This can result in the committing transaction reducing the
  165. * amount of space being consumed by the CIL.
  166. */
  167. STATIC void
  168. xfs_buf_item_size(
  169. struct xfs_log_item *lip,
  170. int *nvecs,
  171. int *nbytes)
  172. {
  173. struct xfs_buf_log_item *bip = BUF_ITEM(lip);
  174. struct xfs_buf *bp = bip->bli_buf;
  175. int i;
  176. int bytes;
  177. uint offset = 0;
  178. ASSERT(atomic_read(&bip->bli_refcount) > 0);
  179. if (bip->bli_flags & XFS_BLI_STALE) {
  180. /*
  181. * The buffer is stale, so all we need to log is the buf log
  182. * format structure with the cancel flag in it as we are never
  183. * going to replay the changes tracked in the log item.
  184. */
  185. trace_xfs_buf_item_size_stale(bip);
  186. ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
  187. *nvecs += bip->bli_format_count;
  188. for (i = 0; i < bip->bli_format_count; i++) {
  189. *nbytes += xfs_buf_log_format_size(&bip->bli_formats[i]);
  190. }
  191. return;
  192. }
  193. ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
  194. if (bip->bli_flags & XFS_BLI_ORDERED) {
  195. /*
  196. * The buffer has been logged just to order it. It is not being
  197. * included in the transaction commit, so no vectors are used at
  198. * all.
  199. */
  200. trace_xfs_buf_item_size_ordered(bip);
  201. *nvecs = XFS_LOG_VEC_ORDERED;
  202. return;
  203. }
  204. /*
  205. * The vector count is based on the number of buffer vectors we have
  206. * dirty bits in. This will only be greater than one when we have a
  207. * compound buffer with more than one segment dirty. Hence for compound
  208. * buffers we need to track which segment the dirty bits correspond to,
  209. * and when we move from one segment to the next increment the vector
  210. * count for the extra buf log format structure that will need to be
  211. * written.
  212. */
  213. bytes = 0;
  214. for (i = 0; i < bip->bli_format_count; i++) {
  215. xfs_buf_item_size_segment(bip, &bip->bli_formats[i], offset,
  216. nvecs, &bytes);
  217. offset += BBTOB(bp->b_maps[i].bm_len);
  218. }
  219. /*
  220. * Round up the buffer size required to minimise the number of memory
  221. * allocations that need to be done as this item grows when relogged by
  222. * repeated modifications.
  223. */
  224. *nbytes = round_up(bytes, 512);
  225. trace_xfs_buf_item_size(bip);
  226. }
  227. static inline void
  228. xfs_buf_item_copy_iovec(
  229. struct xfs_log_vec *lv,
  230. struct xfs_log_iovec **vecp,
  231. struct xfs_buf *bp,
  232. uint offset,
  233. int first_bit,
  234. uint nbits)
  235. {
  236. offset += first_bit * XFS_BLF_CHUNK;
  237. xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BCHUNK,
  238. xfs_buf_offset(bp, offset),
  239. nbits * XFS_BLF_CHUNK);
  240. }
  241. static void
  242. xfs_buf_item_format_segment(
  243. struct xfs_buf_log_item *bip,
  244. struct xfs_log_vec *lv,
  245. struct xfs_log_iovec **vecp,
  246. uint offset,
  247. struct xfs_buf_log_format *blfp)
  248. {
  249. struct xfs_buf *bp = bip->bli_buf;
  250. uint base_size;
  251. int first_bit;
  252. int last_bit;
  253. int next_bit;
  254. uint nbits;
  255. /* copy the flags across from the base format item */
  256. blfp->blf_flags = bip->__bli_format.blf_flags;
  257. /*
  258. * Base size is the actual size of the ondisk structure - it reflects
  259. * the actual size of the dirty bitmap rather than the size of the in
  260. * memory structure.
  261. */
  262. base_size = xfs_buf_log_format_size(blfp);
  263. first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
  264. if (!(bip->bli_flags & XFS_BLI_STALE) && first_bit == -1) {
  265. /*
  266. * If the map is not be dirty in the transaction, mark
  267. * the size as zero and do not advance the vector pointer.
  268. */
  269. return;
  270. }
  271. blfp = xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BFORMAT, blfp, base_size);
  272. blfp->blf_size = 1;
  273. if (bip->bli_flags & XFS_BLI_STALE) {
  274. /*
  275. * The buffer is stale, so all we need to log
  276. * is the buf log format structure with the
  277. * cancel flag in it.
  278. */
  279. trace_xfs_buf_item_format_stale(bip);
  280. ASSERT(blfp->blf_flags & XFS_BLF_CANCEL);
  281. return;
  282. }
  283. /*
  284. * Fill in an iovec for each set of contiguous chunks.
  285. */
  286. do {
  287. ASSERT(first_bit >= 0);
  288. nbits = xfs_contig_bits(blfp->blf_data_map,
  289. blfp->blf_map_size, first_bit);
  290. ASSERT(nbits > 0);
  291. /*
  292. * Straddling a page is rare because we don't log contiguous
  293. * chunks of unmapped buffers anywhere.
  294. */
  295. if (nbits > 1 &&
  296. xfs_buf_item_straddle(bp, offset, first_bit, nbits))
  297. goto slow_scan;
  298. xfs_buf_item_copy_iovec(lv, vecp, bp, offset,
  299. first_bit, nbits);
  300. blfp->blf_size++;
  301. /*
  302. * This takes the bit number to start looking from and
  303. * returns the next set bit from there. It returns -1
  304. * if there are no more bits set or the start bit is
  305. * beyond the end of the bitmap.
  306. */
  307. first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size,
  308. (uint)first_bit + nbits + 1);
  309. } while (first_bit != -1);
  310. return;
  311. slow_scan:
  312. ASSERT(bp->b_addr == NULL);
  313. last_bit = first_bit;
  314. nbits = 1;
  315. for (;;) {
  316. /*
  317. * This takes the bit number to start looking from and
  318. * returns the next set bit from there. It returns -1
  319. * if there are no more bits set or the start bit is
  320. * beyond the end of the bitmap.
  321. */
  322. next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size,
  323. (uint)last_bit + 1);
  324. /*
  325. * If we run out of bits fill in the last iovec and get out of
  326. * the loop. Else if we start a new set of bits then fill in
  327. * the iovec for the series we were looking at and start
  328. * counting the bits in the new one. Else we're still in the
  329. * same set of bits so just keep counting and scanning.
  330. */
  331. if (next_bit == -1) {
  332. xfs_buf_item_copy_iovec(lv, vecp, bp, offset,
  333. first_bit, nbits);
  334. blfp->blf_size++;
  335. break;
  336. } else if (next_bit != last_bit + 1 ||
  337. xfs_buf_item_straddle(bp, offset, first_bit, nbits)) {
  338. xfs_buf_item_copy_iovec(lv, vecp, bp, offset,
  339. first_bit, nbits);
  340. blfp->blf_size++;
  341. first_bit = next_bit;
  342. last_bit = next_bit;
  343. nbits = 1;
  344. } else {
  345. last_bit++;
  346. nbits++;
  347. }
  348. }
  349. }
  350. /*
  351. * This is called to fill in the vector of log iovecs for the
  352. * given log buf item. It fills the first entry with a buf log
  353. * format structure, and the rest point to contiguous chunks
  354. * within the buffer.
  355. */
  356. STATIC void
  357. xfs_buf_item_format(
  358. struct xfs_log_item *lip,
  359. struct xfs_log_vec *lv)
  360. {
  361. struct xfs_buf_log_item *bip = BUF_ITEM(lip);
  362. struct xfs_buf *bp = bip->bli_buf;
  363. struct xfs_log_iovec *vecp = NULL;
  364. uint offset = 0;
  365. int i;
  366. ASSERT(atomic_read(&bip->bli_refcount) > 0);
  367. ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
  368. (bip->bli_flags & XFS_BLI_STALE));
  369. ASSERT((bip->bli_flags & XFS_BLI_STALE) ||
  370. (xfs_blft_from_flags(&bip->__bli_format) > XFS_BLFT_UNKNOWN_BUF
  371. && xfs_blft_from_flags(&bip->__bli_format) < XFS_BLFT_MAX_BUF));
  372. ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED) ||
  373. (bip->bli_flags & XFS_BLI_STALE));
  374. /*
  375. * If it is an inode buffer, transfer the in-memory state to the
  376. * format flags and clear the in-memory state.
  377. *
  378. * For buffer based inode allocation, we do not transfer
  379. * this state if the inode buffer allocation has not yet been committed
  380. * to the log as setting the XFS_BLI_INODE_BUF flag will prevent
  381. * correct replay of the inode allocation.
  382. *
  383. * For icreate item based inode allocation, the buffers aren't written
  384. * to the journal during allocation, and hence we should always tag the
  385. * buffer as an inode buffer so that the correct unlinked list replay
  386. * occurs during recovery.
  387. */
  388. if (bip->bli_flags & XFS_BLI_INODE_BUF) {
  389. if (xfs_has_v3inodes(lip->li_log->l_mp) ||
  390. !((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
  391. xfs_log_item_in_current_chkpt(lip)))
  392. bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF;
  393. bip->bli_flags &= ~XFS_BLI_INODE_BUF;
  394. }
  395. for (i = 0; i < bip->bli_format_count; i++) {
  396. xfs_buf_item_format_segment(bip, lv, &vecp, offset,
  397. &bip->bli_formats[i]);
  398. offset += BBTOB(bp->b_maps[i].bm_len);
  399. }
  400. /*
  401. * Check to make sure everything is consistent.
  402. */
  403. trace_xfs_buf_item_format(bip);
  404. }
  405. /*
  406. * This is called to pin the buffer associated with the buf log item in memory
  407. * so it cannot be written out.
  408. *
  409. * We also always take a reference to the buffer log item here so that the bli
  410. * is held while the item is pinned in memory. This means that we can
  411. * unconditionally drop the reference count a transaction holds when the
  412. * transaction is completed.
  413. */
  414. STATIC void
  415. xfs_buf_item_pin(
  416. struct xfs_log_item *lip)
  417. {
  418. struct xfs_buf_log_item *bip = BUF_ITEM(lip);
  419. ASSERT(atomic_read(&bip->bli_refcount) > 0);
  420. ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
  421. (bip->bli_flags & XFS_BLI_ORDERED) ||
  422. (bip->bli_flags & XFS_BLI_STALE));
  423. trace_xfs_buf_item_pin(bip);
  424. atomic_inc(&bip->bli_refcount);
  425. atomic_inc(&bip->bli_buf->b_pin_count);
  426. }
  427. /*
  428. * This is called to unpin the buffer associated with the buf log item which
  429. * was previously pinned with a call to xfs_buf_item_pin().
  430. */
  431. STATIC void
  432. xfs_buf_item_unpin(
  433. struct xfs_log_item *lip,
  434. int remove)
  435. {
  436. struct xfs_buf_log_item *bip = BUF_ITEM(lip);
  437. struct xfs_buf *bp = bip->bli_buf;
  438. int stale = bip->bli_flags & XFS_BLI_STALE;
  439. int freed;
  440. ASSERT(bp->b_log_item == bip);
  441. ASSERT(atomic_read(&bip->bli_refcount) > 0);
  442. trace_xfs_buf_item_unpin(bip);
  443. /*
  444. * Drop the bli ref associated with the pin and grab the hold required
  445. * for the I/O simulation failure in the abort case. We have to do this
  446. * before the pin count drops because the AIL doesn't acquire a bli
  447. * reference. Therefore if the refcount drops to zero, the bli could
  448. * still be AIL resident and the buffer submitted for I/O (and freed on
  449. * completion) at any point before we return. This can be removed once
  450. * the AIL properly holds a reference on the bli.
  451. */
  452. freed = atomic_dec_and_test(&bip->bli_refcount);
  453. if (freed && !stale && remove)
  454. xfs_buf_hold(bp);
  455. if (atomic_dec_and_test(&bp->b_pin_count))
  456. wake_up_all(&bp->b_waiters);
  457. /* nothing to do but drop the pin count if the bli is active */
  458. if (!freed)
  459. return;
  460. if (stale) {
  461. ASSERT(bip->bli_flags & XFS_BLI_STALE);
  462. ASSERT(xfs_buf_islocked(bp));
  463. ASSERT(bp->b_flags & XBF_STALE);
  464. ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
  465. ASSERT(list_empty(&lip->li_trans));
  466. ASSERT(!bp->b_transp);
  467. trace_xfs_buf_item_unpin_stale(bip);
  468. /*
  469. * If we get called here because of an IO error, we may or may
  470. * not have the item on the AIL. xfs_trans_ail_delete() will
  471. * take care of that situation. xfs_trans_ail_delete() drops
  472. * the AIL lock.
  473. */
  474. if (bip->bli_flags & XFS_BLI_STALE_INODE) {
  475. xfs_buf_item_done(bp);
  476. xfs_buf_inode_iodone(bp);
  477. ASSERT(list_empty(&bp->b_li_list));
  478. } else {
  479. xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR);
  480. xfs_buf_item_relse(bp);
  481. ASSERT(bp->b_log_item == NULL);
  482. }
  483. xfs_buf_relse(bp);
  484. } else if (remove) {
  485. /*
  486. * The buffer must be locked and held by the caller to simulate
  487. * an async I/O failure. We acquired the hold for this case
  488. * before the buffer was unpinned.
  489. */
  490. xfs_buf_lock(bp);
  491. bp->b_flags |= XBF_ASYNC;
  492. xfs_buf_ioend_fail(bp);
  493. }
  494. }
  495. STATIC uint
  496. xfs_buf_item_push(
  497. struct xfs_log_item *lip,
  498. struct list_head *buffer_list)
  499. {
  500. struct xfs_buf_log_item *bip = BUF_ITEM(lip);
  501. struct xfs_buf *bp = bip->bli_buf;
  502. uint rval = XFS_ITEM_SUCCESS;
  503. if (xfs_buf_ispinned(bp))
  504. return XFS_ITEM_PINNED;
  505. if (!xfs_buf_trylock(bp)) {
  506. /*
  507. * If we have just raced with a buffer being pinned and it has
  508. * been marked stale, we could end up stalling until someone else
  509. * issues a log force to unpin the stale buffer. Check for the
  510. * race condition here so xfsaild recognizes the buffer is pinned
  511. * and queues a log force to move it along.
  512. */
  513. if (xfs_buf_ispinned(bp))
  514. return XFS_ITEM_PINNED;
  515. return XFS_ITEM_LOCKED;
  516. }
  517. ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
  518. trace_xfs_buf_item_push(bip);
  519. /* has a previous flush failed due to IO errors? */
  520. if (bp->b_flags & XBF_WRITE_FAIL) {
  521. xfs_buf_alert_ratelimited(bp, "XFS: Failing async write",
  522. "Failing async write on buffer block 0x%llx. Retrying async write.",
  523. (long long)xfs_buf_daddr(bp));
  524. }
  525. if (!xfs_buf_delwri_queue(bp, buffer_list))
  526. rval = XFS_ITEM_FLUSHING;
  527. xfs_buf_unlock(bp);
  528. return rval;
  529. }
  530. /*
  531. * Drop the buffer log item refcount and take appropriate action. This helper
  532. * determines whether the bli must be freed or not, since a decrement to zero
  533. * does not necessarily mean the bli is unused.
  534. *
  535. * Return true if the bli is freed, false otherwise.
  536. */
  537. bool
  538. xfs_buf_item_put(
  539. struct xfs_buf_log_item *bip)
  540. {
  541. struct xfs_log_item *lip = &bip->bli_item;
  542. bool aborted;
  543. bool dirty;
  544. /* drop the bli ref and return if it wasn't the last one */
  545. if (!atomic_dec_and_test(&bip->bli_refcount))
  546. return false;
  547. /*
  548. * We dropped the last ref and must free the item if clean or aborted.
  549. * If the bli is dirty and non-aborted, the buffer was clean in the
  550. * transaction but still awaiting writeback from previous changes. In
  551. * that case, the bli is freed on buffer writeback completion.
  552. */
  553. aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) ||
  554. xlog_is_shutdown(lip->li_log);
  555. dirty = bip->bli_flags & XFS_BLI_DIRTY;
  556. if (dirty && !aborted)
  557. return false;
  558. /*
  559. * The bli is aborted or clean. An aborted item may be in the AIL
  560. * regardless of dirty state. For example, consider an aborted
  561. * transaction that invalidated a dirty bli and cleared the dirty
  562. * state.
  563. */
  564. if (aborted)
  565. xfs_trans_ail_delete(lip, 0);
  566. xfs_buf_item_relse(bip->bli_buf);
  567. return true;
  568. }
  569. /*
  570. * Release the buffer associated with the buf log item. If there is no dirty
  571. * logged data associated with the buffer recorded in the buf log item, then
  572. * free the buf log item and remove the reference to it in the buffer.
  573. *
  574. * This call ignores the recursion count. It is only called when the buffer
  575. * should REALLY be unlocked, regardless of the recursion count.
  576. *
  577. * We unconditionally drop the transaction's reference to the log item. If the
  578. * item was logged, then another reference was taken when it was pinned, so we
  579. * can safely drop the transaction reference now. This also allows us to avoid
  580. * potential races with the unpin code freeing the bli by not referencing the
  581. * bli after we've dropped the reference count.
  582. *
  583. * If the XFS_BLI_HOLD flag is set in the buf log item, then free the log item
  584. * if necessary but do not unlock the buffer. This is for support of
  585. * xfs_trans_bhold(). Make sure the XFS_BLI_HOLD field is cleared if we don't
  586. * free the item.
  587. */
  588. STATIC void
  589. xfs_buf_item_release(
  590. struct xfs_log_item *lip)
  591. {
  592. struct xfs_buf_log_item *bip = BUF_ITEM(lip);
  593. struct xfs_buf *bp = bip->bli_buf;
  594. bool released;
  595. bool hold = bip->bli_flags & XFS_BLI_HOLD;
  596. bool stale = bip->bli_flags & XFS_BLI_STALE;
  597. #if defined(DEBUG) || defined(XFS_WARN)
  598. bool ordered = bip->bli_flags & XFS_BLI_ORDERED;
  599. bool dirty = bip->bli_flags & XFS_BLI_DIRTY;
  600. bool aborted = test_bit(XFS_LI_ABORTED,
  601. &lip->li_flags);
  602. #endif
  603. trace_xfs_buf_item_release(bip);
  604. /*
  605. * The bli dirty state should match whether the blf has logged segments
  606. * except for ordered buffers, where only the bli should be dirty.
  607. */
  608. ASSERT((!ordered && dirty == xfs_buf_item_dirty_format(bip)) ||
  609. (ordered && dirty && !xfs_buf_item_dirty_format(bip)));
  610. ASSERT(!stale || (bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
  611. /*
  612. * Clear the buffer's association with this transaction and
  613. * per-transaction state from the bli, which has been copied above.
  614. */
  615. bp->b_transp = NULL;
  616. bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED);
  617. /*
  618. * Unref the item and unlock the buffer unless held or stale. Stale
  619. * buffers remain locked until final unpin unless the bli is freed by
  620. * the unref call. The latter implies shutdown because buffer
  621. * invalidation dirties the bli and transaction.
  622. */
  623. released = xfs_buf_item_put(bip);
  624. if (hold || (stale && !released))
  625. return;
  626. ASSERT(!stale || aborted);
  627. xfs_buf_relse(bp);
  628. }
  629. STATIC void
  630. xfs_buf_item_committing(
  631. struct xfs_log_item *lip,
  632. xfs_csn_t seq)
  633. {
  634. return xfs_buf_item_release(lip);
  635. }
  636. /*
  637. * This is called to find out where the oldest active copy of the
  638. * buf log item in the on disk log resides now that the last log
  639. * write of it completed at the given lsn.
  640. * We always re-log all the dirty data in a buffer, so usually the
  641. * latest copy in the on disk log is the only one that matters. For
  642. * those cases we simply return the given lsn.
  643. *
  644. * The one exception to this is for buffers full of newly allocated
  645. * inodes. These buffers are only relogged with the XFS_BLI_INODE_BUF
  646. * flag set, indicating that only the di_next_unlinked fields from the
  647. * inodes in the buffers will be replayed during recovery. If the
  648. * original newly allocated inode images have not yet been flushed
  649. * when the buffer is so relogged, then we need to make sure that we
  650. * keep the old images in the 'active' portion of the log. We do this
  651. * by returning the original lsn of that transaction here rather than
  652. * the current one.
  653. */
  654. STATIC xfs_lsn_t
  655. xfs_buf_item_committed(
  656. struct xfs_log_item *lip,
  657. xfs_lsn_t lsn)
  658. {
  659. struct xfs_buf_log_item *bip = BUF_ITEM(lip);
  660. trace_xfs_buf_item_committed(bip);
  661. if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && lip->li_lsn != 0)
  662. return lip->li_lsn;
  663. return lsn;
  664. }
  665. static const struct xfs_item_ops xfs_buf_item_ops = {
  666. .iop_size = xfs_buf_item_size,
  667. .iop_format = xfs_buf_item_format,
  668. .iop_pin = xfs_buf_item_pin,
  669. .iop_unpin = xfs_buf_item_unpin,
  670. .iop_release = xfs_buf_item_release,
  671. .iop_committing = xfs_buf_item_committing,
  672. .iop_committed = xfs_buf_item_committed,
  673. .iop_push = xfs_buf_item_push,
  674. };
  675. STATIC void
  676. xfs_buf_item_get_format(
  677. struct xfs_buf_log_item *bip,
  678. int count)
  679. {
  680. ASSERT(bip->bli_formats == NULL);
  681. bip->bli_format_count = count;
  682. if (count == 1) {
  683. bip->bli_formats = &bip->__bli_format;
  684. return;
  685. }
  686. bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format),
  687. 0);
  688. }
  689. STATIC void
  690. xfs_buf_item_free_format(
  691. struct xfs_buf_log_item *bip)
  692. {
  693. if (bip->bli_formats != &bip->__bli_format) {
  694. kmem_free(bip->bli_formats);
  695. bip->bli_formats = NULL;
  696. }
  697. }
  698. /*
  699. * Allocate a new buf log item to go with the given buffer.
  700. * Set the buffer's b_log_item field to point to the new
  701. * buf log item.
  702. */
  703. int
  704. xfs_buf_item_init(
  705. struct xfs_buf *bp,
  706. struct xfs_mount *mp)
  707. {
  708. struct xfs_buf_log_item *bip = bp->b_log_item;
  709. int chunks;
  710. int map_size;
  711. int i;
  712. /*
  713. * Check to see if there is already a buf log item for
  714. * this buffer. If we do already have one, there is
  715. * nothing to do here so return.
  716. */
  717. ASSERT(bp->b_mount == mp);
  718. if (bip) {
  719. ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
  720. ASSERT(!bp->b_transp);
  721. ASSERT(bip->bli_buf == bp);
  722. return 0;
  723. }
  724. bip = kmem_cache_zalloc(xfs_buf_item_cache, GFP_KERNEL | __GFP_NOFAIL);
  725. xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops);
  726. bip->bli_buf = bp;
  727. /*
  728. * chunks is the number of XFS_BLF_CHUNK size pieces the buffer
  729. * can be divided into. Make sure not to truncate any pieces.
  730. * map_size is the size of the bitmap needed to describe the
  731. * chunks of the buffer.
  732. *
  733. * Discontiguous buffer support follows the layout of the underlying
  734. * buffer. This makes the implementation as simple as possible.
  735. */
  736. xfs_buf_item_get_format(bip, bp->b_map_count);
  737. for (i = 0; i < bip->bli_format_count; i++) {
  738. chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len),
  739. XFS_BLF_CHUNK);
  740. map_size = DIV_ROUND_UP(chunks, NBWORD);
  741. if (map_size > XFS_BLF_DATAMAP_SIZE) {
  742. kmem_cache_free(xfs_buf_item_cache, bip);
  743. xfs_err(mp,
  744. "buffer item dirty bitmap (%u uints) too small to reflect %u bytes!",
  745. map_size,
  746. BBTOB(bp->b_maps[i].bm_len));
  747. return -EFSCORRUPTED;
  748. }
  749. bip->bli_formats[i].blf_type = XFS_LI_BUF;
  750. bip->bli_formats[i].blf_blkno = bp->b_maps[i].bm_bn;
  751. bip->bli_formats[i].blf_len = bp->b_maps[i].bm_len;
  752. bip->bli_formats[i].blf_map_size = map_size;
  753. }
  754. bp->b_log_item = bip;
  755. xfs_buf_hold(bp);
  756. return 0;
  757. }
  758. /*
  759. * Mark bytes first through last inclusive as dirty in the buf
  760. * item's bitmap.
  761. */
  762. static void
  763. xfs_buf_item_log_segment(
  764. uint first,
  765. uint last,
  766. uint *map)
  767. {
  768. uint first_bit;
  769. uint last_bit;
  770. uint bits_to_set;
  771. uint bits_set;
  772. uint word_num;
  773. uint *wordp;
  774. uint bit;
  775. uint end_bit;
  776. uint mask;
  777. ASSERT(first < XFS_BLF_DATAMAP_SIZE * XFS_BLF_CHUNK * NBWORD);
  778. ASSERT(last < XFS_BLF_DATAMAP_SIZE * XFS_BLF_CHUNK * NBWORD);
  779. /*
  780. * Convert byte offsets to bit numbers.
  781. */
  782. first_bit = first >> XFS_BLF_SHIFT;
  783. last_bit = last >> XFS_BLF_SHIFT;
  784. /*
  785. * Calculate the total number of bits to be set.
  786. */
  787. bits_to_set = last_bit - first_bit + 1;
  788. /*
  789. * Get a pointer to the first word in the bitmap
  790. * to set a bit in.
  791. */
  792. word_num = first_bit >> BIT_TO_WORD_SHIFT;
  793. wordp = &map[word_num];
  794. /*
  795. * Calculate the starting bit in the first word.
  796. */
  797. bit = first_bit & (uint)(NBWORD - 1);
  798. /*
  799. * First set any bits in the first word of our range.
  800. * If it starts at bit 0 of the word, it will be
  801. * set below rather than here. That is what the variable
  802. * bit tells us. The variable bits_set tracks the number
  803. * of bits that have been set so far. End_bit is the number
  804. * of the last bit to be set in this word plus one.
  805. */
  806. if (bit) {
  807. end_bit = min(bit + bits_to_set, (uint)NBWORD);
  808. mask = ((1U << (end_bit - bit)) - 1) << bit;
  809. *wordp |= mask;
  810. wordp++;
  811. bits_set = end_bit - bit;
  812. } else {
  813. bits_set = 0;
  814. }
  815. /*
  816. * Now set bits a whole word at a time that are between
  817. * first_bit and last_bit.
  818. */
  819. while ((bits_to_set - bits_set) >= NBWORD) {
  820. *wordp = 0xffffffff;
  821. bits_set += NBWORD;
  822. wordp++;
  823. }
  824. /*
  825. * Finally, set any bits left to be set in one last partial word.
  826. */
  827. end_bit = bits_to_set - bits_set;
  828. if (end_bit) {
  829. mask = (1U << end_bit) - 1;
  830. *wordp |= mask;
  831. }
  832. }
  833. /*
  834. * Mark bytes first through last inclusive as dirty in the buf
  835. * item's bitmap.
  836. */
  837. void
  838. xfs_buf_item_log(
  839. struct xfs_buf_log_item *bip,
  840. uint first,
  841. uint last)
  842. {
  843. int i;
  844. uint start;
  845. uint end;
  846. struct xfs_buf *bp = bip->bli_buf;
  847. /*
  848. * walk each buffer segment and mark them dirty appropriately.
  849. */
  850. start = 0;
  851. for (i = 0; i < bip->bli_format_count; i++) {
  852. if (start > last)
  853. break;
  854. end = start + BBTOB(bp->b_maps[i].bm_len) - 1;
  855. /* skip to the map that includes the first byte to log */
  856. if (first > end) {
  857. start += BBTOB(bp->b_maps[i].bm_len);
  858. continue;
  859. }
  860. /*
  861. * Trim the range to this segment and mark it in the bitmap.
  862. * Note that we must convert buffer offsets to segment relative
  863. * offsets (e.g., the first byte of each segment is byte 0 of
  864. * that segment).
  865. */
  866. if (first < start)
  867. first = start;
  868. if (end > last)
  869. end = last;
  870. xfs_buf_item_log_segment(first - start, end - start,
  871. &bip->bli_formats[i].blf_data_map[0]);
  872. start += BBTOB(bp->b_maps[i].bm_len);
  873. }
  874. }
  875. /*
  876. * Return true if the buffer has any ranges logged/dirtied by a transaction,
  877. * false otherwise.
  878. */
  879. bool
  880. xfs_buf_item_dirty_format(
  881. struct xfs_buf_log_item *bip)
  882. {
  883. int i;
  884. for (i = 0; i < bip->bli_format_count; i++) {
  885. if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map,
  886. bip->bli_formats[i].blf_map_size))
  887. return true;
  888. }
  889. return false;
  890. }
  891. STATIC void
  892. xfs_buf_item_free(
  893. struct xfs_buf_log_item *bip)
  894. {
  895. xfs_buf_item_free_format(bip);
  896. kmem_free(bip->bli_item.li_lv_shadow);
  897. kmem_cache_free(xfs_buf_item_cache, bip);
  898. }
  899. /*
  900. * xfs_buf_item_relse() is called when the buf log item is no longer needed.
  901. */
  902. void
  903. xfs_buf_item_relse(
  904. struct xfs_buf *bp)
  905. {
  906. struct xfs_buf_log_item *bip = bp->b_log_item;
  907. trace_xfs_buf_item_relse(bp, _RET_IP_);
  908. ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
  909. bp->b_log_item = NULL;
  910. xfs_buf_rele(bp);
  911. xfs_buf_item_free(bip);
  912. }
  913. void
  914. xfs_buf_item_done(
  915. struct xfs_buf *bp)
  916. {
  917. /*
  918. * If we are forcibly shutting down, this may well be off the AIL
  919. * already. That's because we simulate the log-committed callbacks to
  920. * unpin these buffers. Or we may never have put this item on AIL
  921. * because of the transaction was aborted forcibly.
  922. * xfs_trans_ail_delete() takes care of these.
  923. *
  924. * Either way, AIL is useless if we're forcing a shutdown.
  925. *
  926. * Note that log recovery writes might have buffer items that are not on
  927. * the AIL even when the file system is not shut down.
  928. */
  929. xfs_trans_ail_delete(&bp->b_log_item->bli_item,
  930. (bp->b_flags & _XBF_LOGRECOVERY) ? 0 :
  931. SHUTDOWN_CORRUPT_INCORE);
  932. xfs_buf_item_relse(bp);
  933. }