lops.c 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
  4. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
  5. */
  6. #include <linux/sched.h>
  7. #include <linux/slab.h>
  8. #include <linux/spinlock.h>
  9. #include <linux/completion.h>
  10. #include <linux/buffer_head.h>
  11. #include <linux/mempool.h>
  12. #include <linux/gfs2_ondisk.h>
  13. #include <linux/bio.h>
  14. #include <linux/fs.h>
  15. #include <linux/list_sort.h>
  16. #include <linux/blkdev.h>
  17. #include "bmap.h"
  18. #include "dir.h"
  19. #include "gfs2.h"
  20. #include "incore.h"
  21. #include "inode.h"
  22. #include "glock.h"
  23. #include "glops.h"
  24. #include "log.h"
  25. #include "lops.h"
  26. #include "meta_io.h"
  27. #include "recovery.h"
  28. #include "rgrp.h"
  29. #include "trans.h"
  30. #include "util.h"
  31. #include "trace_gfs2.h"
  32. /**
  33. * gfs2_pin - Pin a buffer in memory
  34. * @sdp: The superblock
  35. * @bh: The buffer to be pinned
  36. *
  37. * The log lock must be held when calling this function
  38. */
  39. void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
  40. {
  41. struct gfs2_bufdata *bd;
  42. BUG_ON(!current->journal_info);
  43. clear_buffer_dirty(bh);
  44. if (test_set_buffer_pinned(bh))
  45. gfs2_assert_withdraw(sdp, 0);
  46. if (!buffer_uptodate(bh))
  47. gfs2_io_error_bh_wd(sdp, bh);
  48. bd = bh->b_private;
  49. /* If this buffer is in the AIL and it has already been written
  50. * to in-place disk block, remove it from the AIL.
  51. */
  52. spin_lock(&sdp->sd_ail_lock);
  53. if (bd->bd_tr)
  54. list_move(&bd->bd_ail_st_list, &bd->bd_tr->tr_ail2_list);
  55. spin_unlock(&sdp->sd_ail_lock);
  56. get_bh(bh);
  57. atomic_inc(&sdp->sd_log_pinned);
  58. trace_gfs2_pin(bd, 1);
  59. }
  60. static bool buffer_is_rgrp(const struct gfs2_bufdata *bd)
  61. {
  62. return bd->bd_gl->gl_name.ln_type == LM_TYPE_RGRP;
  63. }
  64. static void maybe_release_space(struct gfs2_bufdata *bd)
  65. {
  66. struct gfs2_glock *gl = bd->bd_gl;
  67. struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
  68. struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
  69. unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
  70. struct gfs2_bitmap *bi = rgd->rd_bits + index;
  71. rgrp_lock_local(rgd);
  72. if (bi->bi_clone == NULL)
  73. goto out;
  74. if (sdp->sd_args.ar_discard)
  75. gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
  76. memcpy(bi->bi_clone + bi->bi_offset,
  77. bd->bd_bh->b_data + bi->bi_offset, bi->bi_bytes);
  78. clear_bit(GBF_FULL, &bi->bi_flags);
  79. rgd->rd_free_clone = rgd->rd_free;
  80. BUG_ON(rgd->rd_free_clone < rgd->rd_reserved);
  81. rgd->rd_extfail_pt = rgd->rd_free;
  82. out:
  83. rgrp_unlock_local(rgd);
  84. }
  85. /**
  86. * gfs2_unpin - Unpin a buffer
  87. * @sdp: the filesystem the buffer belongs to
  88. * @bh: The buffer to unpin
  89. * @tr: The system transaction being flushed
  90. */
  91. static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
  92. struct gfs2_trans *tr)
  93. {
  94. struct gfs2_bufdata *bd = bh->b_private;
  95. BUG_ON(!buffer_uptodate(bh));
  96. BUG_ON(!buffer_pinned(bh));
  97. lock_buffer(bh);
  98. mark_buffer_dirty(bh);
  99. clear_buffer_pinned(bh);
  100. if (buffer_is_rgrp(bd))
  101. maybe_release_space(bd);
  102. spin_lock(&sdp->sd_ail_lock);
  103. if (bd->bd_tr) {
  104. list_del(&bd->bd_ail_st_list);
  105. brelse(bh);
  106. } else {
  107. struct gfs2_glock *gl = bd->bd_gl;
  108. list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
  109. atomic_inc(&gl->gl_ail_count);
  110. }
  111. bd->bd_tr = tr;
  112. list_add(&bd->bd_ail_st_list, &tr->tr_ail1_list);
  113. spin_unlock(&sdp->sd_ail_lock);
  114. clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
  115. trace_gfs2_pin(bd, 0);
  116. unlock_buffer(bh);
  117. atomic_dec(&sdp->sd_log_pinned);
  118. }
  119. void gfs2_log_incr_head(struct gfs2_sbd *sdp)
  120. {
  121. BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) &&
  122. (sdp->sd_log_flush_head != sdp->sd_log_head));
  123. if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks)
  124. sdp->sd_log_flush_head = 0;
  125. }
  126. u64 gfs2_log_bmap(struct gfs2_jdesc *jd, unsigned int lblock)
  127. {
  128. struct gfs2_journal_extent *je;
  129. list_for_each_entry(je, &jd->extent_list, list) {
  130. if (lblock >= je->lblock && lblock < je->lblock + je->blocks)
  131. return je->dblock + lblock - je->lblock;
  132. }
  133. return -1;
  134. }
  135. /**
  136. * gfs2_end_log_write_bh - end log write of pagecache data with buffers
  137. * @sdp: The superblock
  138. * @bvec: The bio_vec
  139. * @error: The i/o status
  140. *
  141. * This finds the relevant buffers and unlocks them and sets the
  142. * error flag according to the status of the i/o request. This is
  143. * used when the log is writing data which has an in-place version
  144. * that is pinned in the pagecache.
  145. */
  146. static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp,
  147. struct bio_vec *bvec,
  148. blk_status_t error)
  149. {
  150. struct buffer_head *bh, *next;
  151. struct page *page = bvec->bv_page;
  152. unsigned size;
  153. bh = page_buffers(page);
  154. size = bvec->bv_len;
  155. while (bh_offset(bh) < bvec->bv_offset)
  156. bh = bh->b_this_page;
  157. do {
  158. if (error)
  159. mark_buffer_write_io_error(bh);
  160. unlock_buffer(bh);
  161. next = bh->b_this_page;
  162. size -= bh->b_size;
  163. brelse(bh);
  164. bh = next;
  165. } while(bh && size);
  166. }
  167. /**
  168. * gfs2_end_log_write - end of i/o to the log
  169. * @bio: The bio
  170. *
  171. * Each bio_vec contains either data from the pagecache or data
  172. * relating to the log itself. Here we iterate over the bio_vec
  173. * array, processing both kinds of data.
  174. *
  175. */
  176. static void gfs2_end_log_write(struct bio *bio)
  177. {
  178. struct gfs2_sbd *sdp = bio->bi_private;
  179. struct bio_vec *bvec;
  180. struct page *page;
  181. struct bvec_iter_all iter_all;
  182. if (bio->bi_status) {
  183. if (!cmpxchg(&sdp->sd_log_error, 0, (int)bio->bi_status))
  184. fs_err(sdp, "Error %d writing to journal, jid=%u\n",
  185. bio->bi_status, sdp->sd_jdesc->jd_jid);
  186. gfs2_withdraw_delayed(sdp);
  187. /* prevent more writes to the journal */
  188. clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
  189. wake_up(&sdp->sd_logd_waitq);
  190. }
  191. bio_for_each_segment_all(bvec, bio, iter_all) {
  192. page = bvec->bv_page;
  193. if (page_has_buffers(page))
  194. gfs2_end_log_write_bh(sdp, bvec, bio->bi_status);
  195. else
  196. mempool_free(page, gfs2_page_pool);
  197. }
  198. bio_put(bio);
  199. if (atomic_dec_and_test(&sdp->sd_log_in_flight))
  200. wake_up(&sdp->sd_log_flush_wait);
  201. }
  202. /**
  203. * gfs2_log_submit_bio - Submit any pending log bio
  204. * @biop: Address of the bio pointer
  205. * @opf: REQ_OP | op_flags
  206. *
  207. * Submit any pending part-built or full bio to the block device. If
  208. * there is no pending bio, then this is a no-op.
  209. */
  210. void gfs2_log_submit_bio(struct bio **biop, blk_opf_t opf)
  211. {
  212. struct bio *bio = *biop;
  213. if (bio) {
  214. struct gfs2_sbd *sdp = bio->bi_private;
  215. atomic_inc(&sdp->sd_log_in_flight);
  216. bio->bi_opf = opf;
  217. submit_bio(bio);
  218. *biop = NULL;
  219. }
  220. }
  221. /**
  222. * gfs2_log_alloc_bio - Allocate a bio
  223. * @sdp: The super block
  224. * @blkno: The device block number we want to write to
  225. * @end_io: The bi_end_io callback
  226. *
  227. * Allocate a new bio, initialize it with the given parameters and return it.
  228. *
  229. * Returns: The newly allocated bio
  230. */
  231. static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno,
  232. bio_end_io_t *end_io)
  233. {
  234. struct super_block *sb = sdp->sd_vfs;
  235. struct bio *bio = bio_alloc(sb->s_bdev, BIO_MAX_VECS, 0, GFP_NOIO);
  236. bio->bi_iter.bi_sector = blkno << sdp->sd_fsb2bb_shift;
  237. bio->bi_end_io = end_io;
  238. bio->bi_private = sdp;
  239. return bio;
  240. }
  241. /**
  242. * gfs2_log_get_bio - Get cached log bio, or allocate a new one
  243. * @sdp: The super block
  244. * @blkno: The device block number we want to write to
  245. * @biop: The bio to get or allocate
  246. * @op: REQ_OP
  247. * @end_io: The bi_end_io callback
  248. * @flush: Always flush the current bio and allocate a new one?
  249. *
  250. * If there is a cached bio, then if the next block number is sequential
  251. * with the previous one, return it, otherwise flush the bio to the
  252. * device. If there is no cached bio, or we just flushed it, then
  253. * allocate a new one.
  254. *
  255. * Returns: The bio to use for log writes
  256. */
  257. static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno,
  258. struct bio **biop, enum req_op op,
  259. bio_end_io_t *end_io, bool flush)
  260. {
  261. struct bio *bio = *biop;
  262. if (bio) {
  263. u64 nblk;
  264. nblk = bio_end_sector(bio);
  265. nblk >>= sdp->sd_fsb2bb_shift;
  266. if (blkno == nblk && !flush)
  267. return bio;
  268. gfs2_log_submit_bio(biop, op);
  269. }
  270. *biop = gfs2_log_alloc_bio(sdp, blkno, end_io);
  271. return *biop;
  272. }
  273. /**
  274. * gfs2_log_write - write to log
  275. * @sdp: the filesystem
  276. * @jd: The journal descriptor
  277. * @page: the page to write
  278. * @size: the size of the data to write
  279. * @offset: the offset within the page
  280. * @blkno: block number of the log entry
  281. *
  282. * Try and add the page segment to the current bio. If that fails,
  283. * submit the current bio to the device and create a new one, and
  284. * then add the page segment to that.
  285. */
  286. void gfs2_log_write(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
  287. struct page *page, unsigned size, unsigned offset,
  288. u64 blkno)
  289. {
  290. struct bio *bio;
  291. int ret;
  292. bio = gfs2_log_get_bio(sdp, blkno, &jd->jd_log_bio, REQ_OP_WRITE,
  293. gfs2_end_log_write, false);
  294. ret = bio_add_page(bio, page, size, offset);
  295. if (ret == 0) {
  296. bio = gfs2_log_get_bio(sdp, blkno, &jd->jd_log_bio,
  297. REQ_OP_WRITE, gfs2_end_log_write, true);
  298. ret = bio_add_page(bio, page, size, offset);
  299. WARN_ON(ret == 0);
  300. }
  301. }
  302. /**
  303. * gfs2_log_write_bh - write a buffer's content to the log
  304. * @sdp: The super block
  305. * @bh: The buffer pointing to the in-place location
  306. *
  307. * This writes the content of the buffer to the next available location
  308. * in the log. The buffer will be unlocked once the i/o to the log has
  309. * completed.
  310. */
  311. static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
  312. {
  313. u64 dblock;
  314. dblock = gfs2_log_bmap(sdp->sd_jdesc, sdp->sd_log_flush_head);
  315. gfs2_log_incr_head(sdp);
  316. gfs2_log_write(sdp, sdp->sd_jdesc, bh->b_page, bh->b_size,
  317. bh_offset(bh), dblock);
  318. }
  319. /**
  320. * gfs2_log_write_page - write one block stored in a page, into the log
  321. * @sdp: The superblock
  322. * @page: The struct page
  323. *
  324. * This writes the first block-sized part of the page into the log. Note
  325. * that the page must have been allocated from the gfs2_page_pool mempool
  326. * and that after this has been called, ownership has been transferred and
  327. * the page may be freed at any time.
  328. */
  329. static void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
  330. {
  331. struct super_block *sb = sdp->sd_vfs;
  332. u64 dblock;
  333. dblock = gfs2_log_bmap(sdp->sd_jdesc, sdp->sd_log_flush_head);
  334. gfs2_log_incr_head(sdp);
  335. gfs2_log_write(sdp, sdp->sd_jdesc, page, sb->s_blocksize, 0, dblock);
  336. }
  337. /**
  338. * gfs2_end_log_read - end I/O callback for reads from the log
  339. * @bio: The bio
  340. *
  341. * Simply unlock the pages in the bio. The main thread will wait on them and
  342. * process them in order as necessary.
  343. */
  344. static void gfs2_end_log_read(struct bio *bio)
  345. {
  346. struct page *page;
  347. struct bio_vec *bvec;
  348. struct bvec_iter_all iter_all;
  349. bio_for_each_segment_all(bvec, bio, iter_all) {
  350. page = bvec->bv_page;
  351. if (bio->bi_status) {
  352. int err = blk_status_to_errno(bio->bi_status);
  353. SetPageError(page);
  354. mapping_set_error(page->mapping, err);
  355. }
  356. unlock_page(page);
  357. }
  358. bio_put(bio);
  359. }
  360. /**
  361. * gfs2_jhead_pg_srch - Look for the journal head in a given page.
  362. * @jd: The journal descriptor
  363. * @head: The journal head to start from
  364. * @page: The page to look in
  365. *
  366. * Returns: 1 if found, 0 otherwise.
  367. */
  368. static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
  369. struct gfs2_log_header_host *head,
  370. struct page *page)
  371. {
  372. struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
  373. struct gfs2_log_header_host lh;
  374. void *kaddr = kmap_atomic(page);
  375. unsigned int offset;
  376. bool ret = false;
  377. for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) {
  378. if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) {
  379. if (lh.lh_sequence >= head->lh_sequence)
  380. *head = lh;
  381. else {
  382. ret = true;
  383. break;
  384. }
  385. }
  386. }
  387. kunmap_atomic(kaddr);
  388. return ret;
  389. }
  390. /**
  391. * gfs2_jhead_process_page - Search/cleanup a page
  392. * @jd: The journal descriptor
  393. * @index: Index of the page to look into
  394. * @head: The journal head to start from
  395. * @done: If set, perform only cleanup, else search and set if found.
  396. *
  397. * Find the folio with 'index' in the journal's mapping. Search the folio for
  398. * the journal head if requested (cleanup == false). Release refs on the
  399. * folio so the page cache can reclaim it. We grabbed a
  400. * reference on this folio twice, first when we did a find_or_create_page()
  401. * to obtain the folio to add it to the bio and second when we do a
  402. * filemap_get_folio() here to get the folio to wait on while I/O on it is being
  403. * completed.
  404. * This function is also used to free up a folio we might've grabbed but not
  405. * used. Maybe we added it to a bio, but not submitted it for I/O. Or we
  406. * submitted the I/O, but we already found the jhead so we only need to drop
  407. * our references to the folio.
  408. */
  409. static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index,
  410. struct gfs2_log_header_host *head,
  411. bool *done)
  412. {
  413. struct folio *folio;
  414. folio = filemap_get_folio(jd->jd_inode->i_mapping, index);
  415. folio_wait_locked(folio);
  416. if (folio_test_error(folio))
  417. *done = true;
  418. if (!*done)
  419. *done = gfs2_jhead_pg_srch(jd, head, &folio->page);
  420. /* filemap_get_folio() and the earlier find_or_create_page() */
  421. folio_put_refs(folio, 2);
  422. }
  423. static struct bio *gfs2_chain_bio(struct bio *prev, unsigned int nr_iovecs)
  424. {
  425. struct bio *new;
  426. new = bio_alloc(prev->bi_bdev, nr_iovecs, prev->bi_opf, GFP_NOIO);
  427. bio_clone_blkg_association(new, prev);
  428. new->bi_iter.bi_sector = bio_end_sector(prev);
  429. bio_chain(new, prev);
  430. submit_bio(prev);
  431. return new;
  432. }
  433. /**
  434. * gfs2_find_jhead - find the head of a log
  435. * @jd: The journal descriptor
  436. * @head: The log descriptor for the head of the log is returned here
  437. * @keep_cache: If set inode pages will not be truncated
  438. *
  439. * Do a search of a journal by reading it in large chunks using bios and find
  440. * the valid log entry with the highest sequence number. (i.e. the log head)
  441. *
  442. * Returns: 0 on success, errno otherwise
  443. */
  444. int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head,
  445. bool keep_cache)
  446. {
  447. struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
  448. struct address_space *mapping = jd->jd_inode->i_mapping;
  449. unsigned int block = 0, blocks_submitted = 0, blocks_read = 0;
  450. unsigned int bsize = sdp->sd_sb.sb_bsize, off;
  451. unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
  452. unsigned int shift = PAGE_SHIFT - bsize_shift;
  453. unsigned int max_blocks = 2 * 1024 * 1024 >> bsize_shift;
  454. struct gfs2_journal_extent *je;
  455. int sz, ret = 0;
  456. struct bio *bio = NULL;
  457. struct page *page = NULL;
  458. bool done = false;
  459. errseq_t since;
  460. memset(head, 0, sizeof(*head));
  461. if (list_empty(&jd->extent_list))
  462. gfs2_map_journal_extents(sdp, jd);
  463. since = filemap_sample_wb_err(mapping);
  464. list_for_each_entry(je, &jd->extent_list, list) {
  465. u64 dblock = je->dblock;
  466. for (; block < je->lblock + je->blocks; block++, dblock++) {
  467. if (!page) {
  468. page = find_or_create_page(mapping,
  469. block >> shift, GFP_NOFS);
  470. if (!page) {
  471. ret = -ENOMEM;
  472. done = true;
  473. goto out;
  474. }
  475. off = 0;
  476. }
  477. if (bio && (off || block < blocks_submitted + max_blocks)) {
  478. sector_t sector = dblock << sdp->sd_fsb2bb_shift;
  479. if (bio_end_sector(bio) == sector) {
  480. sz = bio_add_page(bio, page, bsize, off);
  481. if (sz == bsize)
  482. goto block_added;
  483. }
  484. if (off) {
  485. unsigned int blocks =
  486. (PAGE_SIZE - off) >> bsize_shift;
  487. bio = gfs2_chain_bio(bio, blocks);
  488. goto add_block_to_new_bio;
  489. }
  490. }
  491. if (bio) {
  492. blocks_submitted = block;
  493. submit_bio(bio);
  494. }
  495. bio = gfs2_log_alloc_bio(sdp, dblock, gfs2_end_log_read);
  496. bio->bi_opf = REQ_OP_READ;
  497. add_block_to_new_bio:
  498. sz = bio_add_page(bio, page, bsize, off);
  499. BUG_ON(sz != bsize);
  500. block_added:
  501. off += bsize;
  502. if (off == PAGE_SIZE)
  503. page = NULL;
  504. if (blocks_submitted <= blocks_read + max_blocks) {
  505. /* Keep at least one bio in flight */
  506. continue;
  507. }
  508. gfs2_jhead_process_page(jd, blocks_read >> shift, head, &done);
  509. blocks_read += PAGE_SIZE >> bsize_shift;
  510. if (done)
  511. goto out; /* found */
  512. }
  513. }
  514. out:
  515. if (bio)
  516. submit_bio(bio);
  517. while (blocks_read < block) {
  518. gfs2_jhead_process_page(jd, blocks_read >> shift, head, &done);
  519. blocks_read += PAGE_SIZE >> bsize_shift;
  520. }
  521. if (!ret)
  522. ret = filemap_check_wb_err(mapping, since);
  523. if (!keep_cache)
  524. truncate_inode_pages(mapping, 0);
  525. return ret;
  526. }
  527. static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
  528. u32 ld_length, u32 ld_data1)
  529. {
  530. struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
  531. struct gfs2_log_descriptor *ld = page_address(page);
  532. clear_page(ld);
  533. ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
  534. ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
  535. ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
  536. ld->ld_type = cpu_to_be32(ld_type);
  537. ld->ld_length = cpu_to_be32(ld_length);
  538. ld->ld_data1 = cpu_to_be32(ld_data1);
  539. ld->ld_data2 = 0;
  540. return page;
  541. }
  542. static void gfs2_check_magic(struct buffer_head *bh)
  543. {
  544. void *kaddr;
  545. __be32 *ptr;
  546. clear_buffer_escaped(bh);
  547. kaddr = kmap_atomic(bh->b_page);
  548. ptr = kaddr + bh_offset(bh);
  549. if (*ptr == cpu_to_be32(GFS2_MAGIC))
  550. set_buffer_escaped(bh);
  551. kunmap_atomic(kaddr);
  552. }
  553. static int blocknr_cmp(void *priv, const struct list_head *a,
  554. const struct list_head *b)
  555. {
  556. struct gfs2_bufdata *bda, *bdb;
  557. bda = list_entry(a, struct gfs2_bufdata, bd_list);
  558. bdb = list_entry(b, struct gfs2_bufdata, bd_list);
  559. if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr)
  560. return -1;
  561. if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr)
  562. return 1;
  563. return 0;
  564. }
  565. static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit,
  566. unsigned int total, struct list_head *blist,
  567. bool is_databuf)
  568. {
  569. struct gfs2_log_descriptor *ld;
  570. struct gfs2_bufdata *bd1 = NULL, *bd2;
  571. struct page *page;
  572. unsigned int num;
  573. unsigned n;
  574. __be64 *ptr;
  575. gfs2_log_lock(sdp);
  576. list_sort(NULL, blist, blocknr_cmp);
  577. bd1 = bd2 = list_prepare_entry(bd1, blist, bd_list);
  578. while(total) {
  579. num = total;
  580. if (total > limit)
  581. num = limit;
  582. gfs2_log_unlock(sdp);
  583. page = gfs2_get_log_desc(sdp,
  584. is_databuf ? GFS2_LOG_DESC_JDATA :
  585. GFS2_LOG_DESC_METADATA, num + 1, num);
  586. ld = page_address(page);
  587. gfs2_log_lock(sdp);
  588. ptr = (__be64 *)(ld + 1);
  589. n = 0;
  590. list_for_each_entry_continue(bd1, blist, bd_list) {
  591. *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
  592. if (is_databuf) {
  593. gfs2_check_magic(bd1->bd_bh);
  594. *ptr++ = cpu_to_be64(buffer_escaped(bd1->bd_bh) ? 1 : 0);
  595. }
  596. if (++n >= num)
  597. break;
  598. }
  599. gfs2_log_unlock(sdp);
  600. gfs2_log_write_page(sdp, page);
  601. gfs2_log_lock(sdp);
  602. n = 0;
  603. list_for_each_entry_continue(bd2, blist, bd_list) {
  604. get_bh(bd2->bd_bh);
  605. gfs2_log_unlock(sdp);
  606. lock_buffer(bd2->bd_bh);
  607. if (buffer_escaped(bd2->bd_bh)) {
  608. void *kaddr;
  609. page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
  610. ptr = page_address(page);
  611. kaddr = kmap_atomic(bd2->bd_bh->b_page);
  612. memcpy(ptr, kaddr + bh_offset(bd2->bd_bh),
  613. bd2->bd_bh->b_size);
  614. kunmap_atomic(kaddr);
  615. *(__be32 *)ptr = 0;
  616. clear_buffer_escaped(bd2->bd_bh);
  617. unlock_buffer(bd2->bd_bh);
  618. brelse(bd2->bd_bh);
  619. gfs2_log_write_page(sdp, page);
  620. } else {
  621. gfs2_log_write_bh(sdp, bd2->bd_bh);
  622. }
  623. gfs2_log_lock(sdp);
  624. if (++n >= num)
  625. break;
  626. }
  627. BUG_ON(total < num);
  628. total -= num;
  629. }
  630. gfs2_log_unlock(sdp);
  631. }
  632. static void buf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
  633. {
  634. unsigned int limit = buf_limit(sdp); /* 503 for 4k blocks */
  635. unsigned int nbuf;
  636. if (tr == NULL)
  637. return;
  638. nbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm;
  639. gfs2_before_commit(sdp, limit, nbuf, &tr->tr_buf, 0);
  640. }
  641. static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
  642. {
  643. struct list_head *head;
  644. struct gfs2_bufdata *bd;
  645. if (tr == NULL)
  646. return;
  647. head = &tr->tr_buf;
  648. while (!list_empty(head)) {
  649. bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
  650. list_del_init(&bd->bd_list);
  651. gfs2_unpin(sdp, bd->bd_bh, tr);
  652. }
  653. }
  654. static void buf_lo_before_scan(struct gfs2_jdesc *jd,
  655. struct gfs2_log_header_host *head, int pass)
  656. {
  657. if (pass != 0)
  658. return;
  659. jd->jd_found_blocks = 0;
  660. jd->jd_replayed_blocks = 0;
  661. }
  662. #define obsolete_rgrp_replay \
  663. "Replaying 0x%llx from jid=%d/0x%llx but we already have a bh!\n"
  664. #define obsolete_rgrp_replay2 \
  665. "busy:%d, pinned:%d rg_gen:0x%llx, j_gen:0x%llx\n"
  666. static void obsolete_rgrp(struct gfs2_jdesc *jd, struct buffer_head *bh_log,
  667. u64 blkno)
  668. {
  669. struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
  670. struct gfs2_rgrpd *rgd;
  671. struct gfs2_rgrp *jrgd = (struct gfs2_rgrp *)bh_log->b_data;
  672. rgd = gfs2_blk2rgrpd(sdp, blkno, false);
  673. if (rgd && rgd->rd_addr == blkno &&
  674. rgd->rd_bits && rgd->rd_bits->bi_bh) {
  675. fs_info(sdp, obsolete_rgrp_replay, (unsigned long long)blkno,
  676. jd->jd_jid, bh_log->b_blocknr);
  677. fs_info(sdp, obsolete_rgrp_replay2,
  678. buffer_busy(rgd->rd_bits->bi_bh) ? 1 : 0,
  679. buffer_pinned(rgd->rd_bits->bi_bh),
  680. rgd->rd_igeneration,
  681. be64_to_cpu(jrgd->rg_igeneration));
  682. gfs2_dump_glock(NULL, rgd->rd_gl, true);
  683. }
  684. }
  685. static int buf_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
  686. struct gfs2_log_descriptor *ld, __be64 *ptr,
  687. int pass)
  688. {
  689. struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
  690. struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
  691. struct gfs2_glock *gl = ip->i_gl;
  692. unsigned int blks = be32_to_cpu(ld->ld_data1);
  693. struct buffer_head *bh_log, *bh_ip;
  694. u64 blkno;
  695. int error = 0;
  696. if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
  697. return 0;
  698. gfs2_replay_incr_blk(jd, &start);
  699. for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
  700. blkno = be64_to_cpu(*ptr++);
  701. jd->jd_found_blocks++;
  702. if (gfs2_revoke_check(jd, blkno, start))
  703. continue;
  704. error = gfs2_replay_read_block(jd, start, &bh_log);
  705. if (error)
  706. return error;
  707. bh_ip = gfs2_meta_new(gl, blkno);
  708. memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
  709. if (gfs2_meta_check(sdp, bh_ip))
  710. error = -EIO;
  711. else {
  712. struct gfs2_meta_header *mh =
  713. (struct gfs2_meta_header *)bh_ip->b_data;
  714. if (mh->mh_type == cpu_to_be32(GFS2_METATYPE_RG))
  715. obsolete_rgrp(jd, bh_log, blkno);
  716. mark_buffer_dirty(bh_ip);
  717. }
  718. brelse(bh_log);
  719. brelse(bh_ip);
  720. if (error)
  721. break;
  722. jd->jd_replayed_blocks++;
  723. }
  724. return error;
  725. }
  726. static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
  727. {
  728. struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
  729. struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
  730. if (error) {
  731. gfs2_inode_metasync(ip->i_gl);
  732. return;
  733. }
  734. if (pass != 1)
  735. return;
  736. gfs2_inode_metasync(ip->i_gl);
  737. fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
  738. jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks);
  739. }
  740. static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
  741. {
  742. struct gfs2_meta_header *mh;
  743. unsigned int offset;
  744. struct list_head *head = &sdp->sd_log_revokes;
  745. struct gfs2_bufdata *bd;
  746. struct page *page;
  747. unsigned int length;
  748. gfs2_flush_revokes(sdp);
  749. if (!sdp->sd_log_num_revoke)
  750. return;
  751. length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke);
  752. page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke);
  753. offset = sizeof(struct gfs2_log_descriptor);
  754. list_for_each_entry(bd, head, bd_list) {
  755. sdp->sd_log_num_revoke--;
  756. if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
  757. gfs2_log_write_page(sdp, page);
  758. page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
  759. mh = page_address(page);
  760. clear_page(mh);
  761. mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
  762. mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
  763. mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
  764. offset = sizeof(struct gfs2_meta_header);
  765. }
  766. *(__be64 *)(page_address(page) + offset) = cpu_to_be64(bd->bd_blkno);
  767. offset += sizeof(u64);
  768. }
  769. gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
  770. gfs2_log_write_page(sdp, page);
  771. }
  772. void gfs2_drain_revokes(struct gfs2_sbd *sdp)
  773. {
  774. struct list_head *head = &sdp->sd_log_revokes;
  775. struct gfs2_bufdata *bd;
  776. struct gfs2_glock *gl;
  777. while (!list_empty(head)) {
  778. bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
  779. list_del_init(&bd->bd_list);
  780. gl = bd->bd_gl;
  781. gfs2_glock_remove_revoke(gl);
  782. kmem_cache_free(gfs2_bufdata_cachep, bd);
  783. }
  784. }
  785. static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
  786. {
  787. gfs2_drain_revokes(sdp);
  788. }
  789. static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
  790. struct gfs2_log_header_host *head, int pass)
  791. {
  792. if (pass != 0)
  793. return;
  794. jd->jd_found_revokes = 0;
  795. jd->jd_replay_tail = head->lh_tail;
  796. }
  797. static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
  798. struct gfs2_log_descriptor *ld, __be64 *ptr,
  799. int pass)
  800. {
  801. struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
  802. unsigned int blks = be32_to_cpu(ld->ld_length);
  803. unsigned int revokes = be32_to_cpu(ld->ld_data1);
  804. struct buffer_head *bh;
  805. unsigned int offset;
  806. u64 blkno;
  807. int first = 1;
  808. int error;
  809. if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
  810. return 0;
  811. offset = sizeof(struct gfs2_log_descriptor);
  812. for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
  813. error = gfs2_replay_read_block(jd, start, &bh);
  814. if (error)
  815. return error;
  816. if (!first)
  817. gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
  818. while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) {
  819. blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
  820. error = gfs2_revoke_add(jd, blkno, start);
  821. if (error < 0) {
  822. brelse(bh);
  823. return error;
  824. }
  825. else if (error)
  826. jd->jd_found_revokes++;
  827. if (!--revokes)
  828. break;
  829. offset += sizeof(u64);
  830. }
  831. brelse(bh);
  832. offset = sizeof(struct gfs2_meta_header);
  833. first = 0;
  834. }
  835. return 0;
  836. }
  837. static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
  838. {
  839. struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
  840. if (error) {
  841. gfs2_revoke_clean(jd);
  842. return;
  843. }
  844. if (pass != 1)
  845. return;
  846. fs_info(sdp, "jid=%u: Found %u revoke tags\n",
  847. jd->jd_jid, jd->jd_found_revokes);
  848. gfs2_revoke_clean(jd);
  849. }
  850. /**
  851. * databuf_lo_before_commit - Scan the data buffers, writing as we go
  852. * @sdp: The filesystem
  853. * @tr: The system transaction being flushed
  854. */
  855. static void databuf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
  856. {
  857. unsigned int limit = databuf_limit(sdp);
  858. unsigned int nbuf;
  859. if (tr == NULL)
  860. return;
  861. nbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
  862. gfs2_before_commit(sdp, limit, nbuf, &tr->tr_databuf, 1);
  863. }
  864. static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
  865. struct gfs2_log_descriptor *ld,
  866. __be64 *ptr, int pass)
  867. {
  868. struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
  869. struct gfs2_glock *gl = ip->i_gl;
  870. unsigned int blks = be32_to_cpu(ld->ld_data1);
  871. struct buffer_head *bh_log, *bh_ip;
  872. u64 blkno;
  873. u64 esc;
  874. int error = 0;
  875. if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
  876. return 0;
  877. gfs2_replay_incr_blk(jd, &start);
  878. for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
  879. blkno = be64_to_cpu(*ptr++);
  880. esc = be64_to_cpu(*ptr++);
  881. jd->jd_found_blocks++;
  882. if (gfs2_revoke_check(jd, blkno, start))
  883. continue;
  884. error = gfs2_replay_read_block(jd, start, &bh_log);
  885. if (error)
  886. return error;
  887. bh_ip = gfs2_meta_new(gl, blkno);
  888. memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
  889. /* Unescape */
  890. if (esc) {
  891. __be32 *eptr = (__be32 *)bh_ip->b_data;
  892. *eptr = cpu_to_be32(GFS2_MAGIC);
  893. }
  894. mark_buffer_dirty(bh_ip);
  895. brelse(bh_log);
  896. brelse(bh_ip);
  897. jd->jd_replayed_blocks++;
  898. }
  899. return error;
  900. }
  901. /* FIXME: sort out accounting for log blocks etc. */
  902. static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
  903. {
  904. struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
  905. struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
  906. if (error) {
  907. gfs2_inode_metasync(ip->i_gl);
  908. return;
  909. }
  910. if (pass != 1)
  911. return;
  912. /* data sync? */
  913. gfs2_inode_metasync(ip->i_gl);
  914. fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
  915. jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks);
  916. }
  917. static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
  918. {
  919. struct list_head *head;
  920. struct gfs2_bufdata *bd;
  921. if (tr == NULL)
  922. return;
  923. head = &tr->tr_databuf;
  924. while (!list_empty(head)) {
  925. bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
  926. list_del_init(&bd->bd_list);
  927. gfs2_unpin(sdp, bd->bd_bh, tr);
  928. }
  929. }
  930. static const struct gfs2_log_operations gfs2_buf_lops = {
  931. .lo_before_commit = buf_lo_before_commit,
  932. .lo_after_commit = buf_lo_after_commit,
  933. .lo_before_scan = buf_lo_before_scan,
  934. .lo_scan_elements = buf_lo_scan_elements,
  935. .lo_after_scan = buf_lo_after_scan,
  936. .lo_name = "buf",
  937. };
  938. static const struct gfs2_log_operations gfs2_revoke_lops = {
  939. .lo_before_commit = revoke_lo_before_commit,
  940. .lo_after_commit = revoke_lo_after_commit,
  941. .lo_before_scan = revoke_lo_before_scan,
  942. .lo_scan_elements = revoke_lo_scan_elements,
  943. .lo_after_scan = revoke_lo_after_scan,
  944. .lo_name = "revoke",
  945. };
  946. static const struct gfs2_log_operations gfs2_databuf_lops = {
  947. .lo_before_commit = databuf_lo_before_commit,
  948. .lo_after_commit = databuf_lo_after_commit,
  949. .lo_scan_elements = databuf_lo_scan_elements,
  950. .lo_after_scan = databuf_lo_after_scan,
  951. .lo_name = "databuf",
  952. };
  953. const struct gfs2_log_operations *gfs2_log_ops[] = {
  954. &gfs2_databuf_lops,
  955. &gfs2_buf_lops,
  956. &gfs2_revoke_lops,
  957. NULL,
  958. };