meta_io.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
  4. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
  5. */
  6. #include <linux/sched.h>
  7. #include <linux/slab.h>
  8. #include <linux/spinlock.h>
  9. #include <linux/completion.h>
  10. #include <linux/buffer_head.h>
  11. #include <linux/mm.h>
  12. #include <linux/pagemap.h>
  13. #include <linux/writeback.h>
  14. #include <linux/swap.h>
  15. #include <linux/delay.h>
  16. #include <linux/bio.h>
  17. #include <linux/gfs2_ondisk.h>
  18. #include "gfs2.h"
  19. #include "incore.h"
  20. #include "glock.h"
  21. #include "glops.h"
  22. #include "inode.h"
  23. #include "log.h"
  24. #include "lops.h"
  25. #include "meta_io.h"
  26. #include "rgrp.h"
  27. #include "trans.h"
  28. #include "util.h"
  29. #include "trace_gfs2.h"
  30. static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc)
  31. {
  32. struct buffer_head *bh, *head;
  33. int nr_underway = 0;
  34. blk_opf_t write_flags = REQ_META | REQ_PRIO | wbc_to_write_flags(wbc);
  35. BUG_ON(!PageLocked(page));
  36. BUG_ON(!page_has_buffers(page));
  37. head = page_buffers(page);
  38. bh = head;
  39. do {
  40. if (!buffer_mapped(bh))
  41. continue;
  42. /*
  43. * If it's a fully non-blocking write attempt and we cannot
  44. * lock the buffer then redirty the page. Note that this can
  45. * potentially cause a busy-wait loop from flusher thread and kswapd
  46. * activity, but those code paths have their own higher-level
  47. * throttling.
  48. */
  49. if (wbc->sync_mode != WB_SYNC_NONE) {
  50. lock_buffer(bh);
  51. } else if (!trylock_buffer(bh)) {
  52. redirty_page_for_writepage(wbc, page);
  53. continue;
  54. }
  55. if (test_clear_buffer_dirty(bh)) {
  56. mark_buffer_async_write(bh);
  57. } else {
  58. unlock_buffer(bh);
  59. }
  60. } while ((bh = bh->b_this_page) != head);
  61. /*
  62. * The page and its buffers are protected by PageWriteback(), so we can
  63. * drop the bh refcounts early.
  64. */
  65. BUG_ON(PageWriteback(page));
  66. set_page_writeback(page);
  67. do {
  68. struct buffer_head *next = bh->b_this_page;
  69. if (buffer_async_write(bh)) {
  70. submit_bh(REQ_OP_WRITE | write_flags, bh);
  71. nr_underway++;
  72. }
  73. bh = next;
  74. } while (bh != head);
  75. unlock_page(page);
  76. if (nr_underway == 0)
  77. end_page_writeback(page);
  78. return 0;
  79. }
  80. const struct address_space_operations gfs2_meta_aops = {
  81. .dirty_folio = block_dirty_folio,
  82. .invalidate_folio = block_invalidate_folio,
  83. .writepage = gfs2_aspace_writepage,
  84. .release_folio = gfs2_release_folio,
  85. };
  86. const struct address_space_operations gfs2_rgrp_aops = {
  87. .dirty_folio = block_dirty_folio,
  88. .invalidate_folio = block_invalidate_folio,
  89. .writepage = gfs2_aspace_writepage,
  90. .release_folio = gfs2_release_folio,
  91. };
  92. /**
  93. * gfs2_getbuf - Get a buffer with a given address space
  94. * @gl: the glock
  95. * @blkno: the block number (filesystem scope)
  96. * @create: 1 if the buffer should be created
  97. *
  98. * Returns: the buffer
  99. */
  100. struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
  101. {
  102. struct address_space *mapping = gfs2_glock2aspace(gl);
  103. struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
  104. struct page *page;
  105. struct buffer_head *bh;
  106. unsigned int shift;
  107. unsigned long index;
  108. unsigned int bufnum;
  109. if (mapping == NULL)
  110. mapping = &sdp->sd_aspace;
  111. shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
  112. index = blkno >> shift; /* convert block to page */
  113. bufnum = blkno - (index << shift); /* block buf index within page */
  114. if (create) {
  115. for (;;) {
  116. page = grab_cache_page(mapping, index);
  117. if (page)
  118. break;
  119. yield();
  120. }
  121. if (!page_has_buffers(page))
  122. create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0);
  123. } else {
  124. page = find_get_page_flags(mapping, index,
  125. FGP_LOCK|FGP_ACCESSED);
  126. if (!page)
  127. return NULL;
  128. if (!page_has_buffers(page)) {
  129. bh = NULL;
  130. goto out_unlock;
  131. }
  132. }
  133. /* Locate header for our buffer within our page */
  134. for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
  135. /* Do nothing */;
  136. get_bh(bh);
  137. if (!buffer_mapped(bh))
  138. map_bh(bh, sdp->sd_vfs, blkno);
  139. out_unlock:
  140. unlock_page(page);
  141. put_page(page);
  142. return bh;
  143. }
  144. static void meta_prep_new(struct buffer_head *bh)
  145. {
  146. struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
  147. lock_buffer(bh);
  148. clear_buffer_dirty(bh);
  149. set_buffer_uptodate(bh);
  150. unlock_buffer(bh);
  151. mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
  152. }
  153. /**
  154. * gfs2_meta_new - Get a block
  155. * @gl: The glock associated with this block
  156. * @blkno: The block number
  157. *
  158. * Returns: The buffer
  159. */
  160. struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
  161. {
  162. struct buffer_head *bh;
  163. bh = gfs2_getbuf(gl, blkno, CREATE);
  164. meta_prep_new(bh);
  165. return bh;
  166. }
  167. static void gfs2_meta_read_endio(struct bio *bio)
  168. {
  169. struct bio_vec *bvec;
  170. struct bvec_iter_all iter_all;
  171. bio_for_each_segment_all(bvec, bio, iter_all) {
  172. struct page *page = bvec->bv_page;
  173. struct buffer_head *bh = page_buffers(page);
  174. unsigned int len = bvec->bv_len;
  175. while (bh_offset(bh) < bvec->bv_offset)
  176. bh = bh->b_this_page;
  177. do {
  178. struct buffer_head *next = bh->b_this_page;
  179. len -= bh->b_size;
  180. bh->b_end_io(bh, !bio->bi_status);
  181. bh = next;
  182. } while (bh && len);
  183. }
  184. bio_put(bio);
  185. }
  186. /*
  187. * Submit several consecutive buffer head I/O requests as a single bio I/O
  188. * request. (See submit_bh_wbc.)
  189. */
  190. static void gfs2_submit_bhs(blk_opf_t opf, struct buffer_head *bhs[], int num)
  191. {
  192. while (num > 0) {
  193. struct buffer_head *bh = *bhs;
  194. struct bio *bio;
  195. bio = bio_alloc(bh->b_bdev, num, opf, GFP_NOIO);
  196. bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
  197. while (num > 0) {
  198. bh = *bhs;
  199. if (!bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh))) {
  200. BUG_ON(bio->bi_iter.bi_size == 0);
  201. break;
  202. }
  203. bhs++;
  204. num--;
  205. }
  206. bio->bi_end_io = gfs2_meta_read_endio;
  207. submit_bio(bio);
  208. }
  209. }
  210. /**
  211. * gfs2_meta_read - Read a block from disk
  212. * @gl: The glock covering the block
  213. * @blkno: The block number
  214. * @flags: flags
  215. * @rahead: Do read-ahead
  216. * @bhp: the place where the buffer is returned (NULL on failure)
  217. *
  218. * Returns: errno
  219. */
  220. int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
  221. int rahead, struct buffer_head **bhp)
  222. {
  223. struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
  224. struct buffer_head *bh, *bhs[2];
  225. int num = 0;
  226. if (unlikely(gfs2_withdrawn(sdp)) && !gfs2_withdraw_in_prog(sdp)) {
  227. *bhp = NULL;
  228. return -EIO;
  229. }
  230. *bhp = bh = gfs2_getbuf(gl, blkno, CREATE);
  231. lock_buffer(bh);
  232. if (buffer_uptodate(bh)) {
  233. unlock_buffer(bh);
  234. flags &= ~DIO_WAIT;
  235. } else {
  236. bh->b_end_io = end_buffer_read_sync;
  237. get_bh(bh);
  238. bhs[num++] = bh;
  239. }
  240. if (rahead) {
  241. bh = gfs2_getbuf(gl, blkno + 1, CREATE);
  242. lock_buffer(bh);
  243. if (buffer_uptodate(bh)) {
  244. unlock_buffer(bh);
  245. brelse(bh);
  246. } else {
  247. bh->b_end_io = end_buffer_read_sync;
  248. bhs[num++] = bh;
  249. }
  250. }
  251. gfs2_submit_bhs(REQ_OP_READ | REQ_META | REQ_PRIO, bhs, num);
  252. if (!(flags & DIO_WAIT))
  253. return 0;
  254. bh = *bhp;
  255. wait_on_buffer(bh);
  256. if (unlikely(!buffer_uptodate(bh))) {
  257. struct gfs2_trans *tr = current->journal_info;
  258. if (tr && test_bit(TR_TOUCHED, &tr->tr_flags))
  259. gfs2_io_error_bh_wd(sdp, bh);
  260. brelse(bh);
  261. *bhp = NULL;
  262. return -EIO;
  263. }
  264. return 0;
  265. }
  266. /**
  267. * gfs2_meta_wait - Reread a block from disk
  268. * @sdp: the filesystem
  269. * @bh: The block to wait for
  270. *
  271. * Returns: errno
  272. */
  273. int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
  274. {
  275. if (unlikely(gfs2_withdrawn(sdp)) && !gfs2_withdraw_in_prog(sdp))
  276. return -EIO;
  277. wait_on_buffer(bh);
  278. if (!buffer_uptodate(bh)) {
  279. struct gfs2_trans *tr = current->journal_info;
  280. if (tr && test_bit(TR_TOUCHED, &tr->tr_flags))
  281. gfs2_io_error_bh_wd(sdp, bh);
  282. return -EIO;
  283. }
  284. if (unlikely(gfs2_withdrawn(sdp)) && !gfs2_withdraw_in_prog(sdp))
  285. return -EIO;
  286. return 0;
  287. }
  288. void gfs2_remove_from_journal(struct buffer_head *bh, int meta)
  289. {
  290. struct address_space *mapping = bh->b_page->mapping;
  291. struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
  292. struct gfs2_bufdata *bd = bh->b_private;
  293. struct gfs2_trans *tr = current->journal_info;
  294. int was_pinned = 0;
  295. if (test_clear_buffer_pinned(bh)) {
  296. trace_gfs2_pin(bd, 0);
  297. atomic_dec(&sdp->sd_log_pinned);
  298. list_del_init(&bd->bd_list);
  299. if (meta == REMOVE_META)
  300. tr->tr_num_buf_rm++;
  301. else
  302. tr->tr_num_databuf_rm++;
  303. set_bit(TR_TOUCHED, &tr->tr_flags);
  304. was_pinned = 1;
  305. brelse(bh);
  306. }
  307. if (bd) {
  308. if (bd->bd_tr) {
  309. gfs2_trans_add_revoke(sdp, bd);
  310. } else if (was_pinned) {
  311. bh->b_private = NULL;
  312. kmem_cache_free(gfs2_bufdata_cachep, bd);
  313. } else if (!list_empty(&bd->bd_ail_st_list) &&
  314. !list_empty(&bd->bd_ail_gl_list)) {
  315. gfs2_remove_from_ail(bd);
  316. }
  317. }
  318. clear_buffer_dirty(bh);
  319. clear_buffer_uptodate(bh);
  320. }
  321. /**
  322. * gfs2_ail1_wipe - remove deleted/freed buffers from the ail1 list
  323. * @sdp: superblock
  324. * @bstart: starting block address of buffers to remove
  325. * @blen: length of buffers to be removed
  326. *
  327. * This function is called from gfs2_journal wipe, whose job is to remove
  328. * buffers, corresponding to deleted blocks, from the journal. If we find any
  329. * bufdata elements on the system ail1 list, they haven't been written to
  330. * the journal yet. So we remove them.
  331. */
  332. static void gfs2_ail1_wipe(struct gfs2_sbd *sdp, u64 bstart, u32 blen)
  333. {
  334. struct gfs2_trans *tr, *s;
  335. struct gfs2_bufdata *bd, *bs;
  336. struct buffer_head *bh;
  337. u64 end = bstart + blen;
  338. gfs2_log_lock(sdp);
  339. spin_lock(&sdp->sd_ail_lock);
  340. list_for_each_entry_safe(tr, s, &sdp->sd_ail1_list, tr_list) {
  341. list_for_each_entry_safe(bd, bs, &tr->tr_ail1_list,
  342. bd_ail_st_list) {
  343. bh = bd->bd_bh;
  344. if (bh->b_blocknr < bstart || bh->b_blocknr >= end)
  345. continue;
  346. gfs2_remove_from_journal(bh, REMOVE_JDATA);
  347. }
  348. }
  349. spin_unlock(&sdp->sd_ail_lock);
  350. gfs2_log_unlock(sdp);
  351. }
  352. static struct buffer_head *gfs2_getjdatabuf(struct gfs2_inode *ip, u64 blkno)
  353. {
  354. struct address_space *mapping = ip->i_inode.i_mapping;
  355. struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
  356. struct page *page;
  357. struct buffer_head *bh;
  358. unsigned int shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
  359. unsigned long index = blkno >> shift; /* convert block to page */
  360. unsigned int bufnum = blkno - (index << shift);
  361. page = find_get_page_flags(mapping, index, FGP_LOCK|FGP_ACCESSED);
  362. if (!page)
  363. return NULL;
  364. if (!page_has_buffers(page)) {
  365. unlock_page(page);
  366. put_page(page);
  367. return NULL;
  368. }
  369. /* Locate header for our buffer within our page */
  370. for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
  371. /* Do nothing */;
  372. get_bh(bh);
  373. unlock_page(page);
  374. put_page(page);
  375. return bh;
  376. }
  377. /**
  378. * gfs2_journal_wipe - make inode's buffers so they aren't dirty/pinned anymore
  379. * @ip: the inode who owns the buffers
  380. * @bstart: the first buffer in the run
  381. * @blen: the number of buffers in the run
  382. *
  383. */
  384. void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
  385. {
  386. struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
  387. struct buffer_head *bh;
  388. int ty;
  389. gfs2_ail1_wipe(sdp, bstart, blen);
  390. while (blen) {
  391. ty = REMOVE_META;
  392. bh = gfs2_getbuf(ip->i_gl, bstart, NO_CREATE);
  393. if (!bh && gfs2_is_jdata(ip)) {
  394. bh = gfs2_getjdatabuf(ip, bstart);
  395. ty = REMOVE_JDATA;
  396. }
  397. if (bh) {
  398. lock_buffer(bh);
  399. gfs2_log_lock(sdp);
  400. spin_lock(&sdp->sd_ail_lock);
  401. gfs2_remove_from_journal(bh, ty);
  402. spin_unlock(&sdp->sd_ail_lock);
  403. gfs2_log_unlock(sdp);
  404. unlock_buffer(bh);
  405. brelse(bh);
  406. }
  407. bstart++;
  408. blen--;
  409. }
  410. }
  411. /**
  412. * gfs2_meta_buffer - Get a metadata buffer
  413. * @ip: The GFS2 inode
  414. * @mtype: The block type (GFS2_METATYPE_*)
  415. * @num: The block number (device relative) of the buffer
  416. * @bhp: the buffer is returned here
  417. *
  418. * Returns: errno
  419. */
  420. int gfs2_meta_buffer(struct gfs2_inode *ip, u32 mtype, u64 num,
  421. struct buffer_head **bhp)
  422. {
  423. struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
  424. struct gfs2_glock *gl = ip->i_gl;
  425. struct buffer_head *bh;
  426. int ret = 0;
  427. int rahead = 0;
  428. if (num == ip->i_no_addr)
  429. rahead = ip->i_rahead;
  430. ret = gfs2_meta_read(gl, num, DIO_WAIT, rahead, &bh);
  431. if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) {
  432. brelse(bh);
  433. ret = -EIO;
  434. } else {
  435. *bhp = bh;
  436. }
  437. return ret;
  438. }
  439. /**
  440. * gfs2_meta_ra - start readahead on an extent of a file
  441. * @gl: the glock the blocks belong to
  442. * @dblock: the starting disk block
  443. * @extlen: the number of blocks in the extent
  444. *
  445. * returns: the first buffer in the extent
  446. */
  447. struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
  448. {
  449. struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
  450. struct buffer_head *first_bh, *bh;
  451. u32 max_ra = gfs2_tune_get(sdp, gt_max_readahead) >>
  452. sdp->sd_sb.sb_bsize_shift;
  453. BUG_ON(!extlen);
  454. if (max_ra < 1)
  455. max_ra = 1;
  456. if (extlen > max_ra)
  457. extlen = max_ra;
  458. first_bh = gfs2_getbuf(gl, dblock, CREATE);
  459. if (buffer_uptodate(first_bh))
  460. goto out;
  461. bh_read_nowait(first_bh, REQ_META | REQ_PRIO);
  462. dblock++;
  463. extlen--;
  464. while (extlen) {
  465. bh = gfs2_getbuf(gl, dblock, CREATE);
  466. bh_readahead(bh, REQ_RAHEAD | REQ_META | REQ_PRIO);
  467. brelse(bh);
  468. dblock++;
  469. extlen--;
  470. if (!buffer_locked(first_bh) && buffer_uptodate(first_bh))
  471. goto out;
  472. }
  473. wait_on_buffer(first_bh);
  474. out:
  475. return first_bh;
  476. }