log.c 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
  4. * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
  5. */
  6. #include <linux/sched.h>
  7. #include <linux/slab.h>
  8. #include <linux/spinlock.h>
  9. #include <linux/completion.h>
  10. #include <linux/buffer_head.h>
  11. #include <linux/gfs2_ondisk.h>
  12. #include <linux/crc32.h>
  13. #include <linux/crc32c.h>
  14. #include <linux/delay.h>
  15. #include <linux/kthread.h>
  16. #include <linux/freezer.h>
  17. #include <linux/bio.h>
  18. #include <linux/blkdev.h>
  19. #include <linux/writeback.h>
  20. #include <linux/list_sort.h>
  21. #include "gfs2.h"
  22. #include "incore.h"
  23. #include "bmap.h"
  24. #include "glock.h"
  25. #include "log.h"
  26. #include "lops.h"
  27. #include "meta_io.h"
  28. #include "util.h"
  29. #include "dir.h"
  30. #include "trace_gfs2.h"
  31. #include "trans.h"
  32. static void gfs2_log_shutdown(struct gfs2_sbd *sdp);
  33. /**
  34. * gfs2_struct2blk - compute stuff
  35. * @sdp: the filesystem
  36. * @nstruct: the number of structures
  37. *
  38. * Compute the number of log descriptor blocks needed to hold a certain number
  39. * of structures of a certain size.
  40. *
  41. * Returns: the number of blocks needed (minimum is always 1)
  42. */
  43. unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct)
  44. {
  45. unsigned int blks;
  46. unsigned int first, second;
  47. /* The initial struct gfs2_log_descriptor block */
  48. blks = 1;
  49. first = sdp->sd_ldptrs;
  50. if (nstruct > first) {
  51. /* Subsequent struct gfs2_meta_header blocks */
  52. second = sdp->sd_inptrs;
  53. blks += DIV_ROUND_UP(nstruct - first, second);
  54. }
  55. return blks;
  56. }
  57. /**
  58. * gfs2_remove_from_ail - Remove an entry from the ail lists, updating counters
  59. * @bd: The gfs2_bufdata to remove
  60. *
  61. * The ail lock _must_ be held when calling this function
  62. *
  63. */
  64. void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
  65. {
  66. bd->bd_tr = NULL;
  67. list_del_init(&bd->bd_ail_st_list);
  68. list_del_init(&bd->bd_ail_gl_list);
  69. atomic_dec(&bd->bd_gl->gl_ail_count);
  70. brelse(bd->bd_bh);
  71. }
  72. static int __gfs2_writepage(struct page *page, struct writeback_control *wbc,
  73. void *data)
  74. {
  75. struct address_space *mapping = data;
  76. int ret = mapping->a_ops->writepage(page, wbc);
  77. mapping_set_error(mapping, ret);
  78. return ret;
  79. }
  80. /**
  81. * gfs2_ail1_start_one - Start I/O on a transaction
  82. * @sdp: The superblock
  83. * @wbc: The writeback control structure
  84. * @tr: The transaction to start I/O on
  85. * @plug: The block plug currently active
  86. */
  87. static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
  88. struct writeback_control *wbc,
  89. struct gfs2_trans *tr, struct blk_plug *plug)
  90. __releases(&sdp->sd_ail_lock)
  91. __acquires(&sdp->sd_ail_lock)
  92. {
  93. struct gfs2_glock *gl = NULL;
  94. struct address_space *mapping;
  95. struct gfs2_bufdata *bd, *s;
  96. struct buffer_head *bh;
  97. int ret = 0;
  98. list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list, bd_ail_st_list) {
  99. bh = bd->bd_bh;
  100. gfs2_assert(sdp, bd->bd_tr == tr);
  101. if (!buffer_busy(bh)) {
  102. if (buffer_uptodate(bh)) {
  103. list_move(&bd->bd_ail_st_list,
  104. &tr->tr_ail2_list);
  105. continue;
  106. }
  107. if (!cmpxchg(&sdp->sd_log_error, 0, -EIO)) {
  108. gfs2_io_error_bh(sdp, bh);
  109. gfs2_withdraw_delayed(sdp);
  110. }
  111. }
  112. if (gfs2_withdrawn(sdp)) {
  113. gfs2_remove_from_ail(bd);
  114. continue;
  115. }
  116. if (!buffer_dirty(bh))
  117. continue;
  118. if (gl == bd->bd_gl)
  119. continue;
  120. gl = bd->bd_gl;
  121. list_move(&bd->bd_ail_st_list, &tr->tr_ail1_list);
  122. mapping = bh->b_page->mapping;
  123. if (!mapping)
  124. continue;
  125. spin_unlock(&sdp->sd_ail_lock);
  126. ret = write_cache_pages(mapping, wbc, __gfs2_writepage, mapping);
  127. if (need_resched()) {
  128. blk_finish_plug(plug);
  129. cond_resched();
  130. blk_start_plug(plug);
  131. }
  132. spin_lock(&sdp->sd_ail_lock);
  133. if (ret == -ENODATA) /* if a jdata write into a new hole */
  134. ret = 0; /* ignore it */
  135. if (ret || wbc->nr_to_write <= 0)
  136. break;
  137. return -EBUSY;
  138. }
  139. return ret;
  140. }
  141. static void dump_ail_list(struct gfs2_sbd *sdp)
  142. {
  143. struct gfs2_trans *tr;
  144. struct gfs2_bufdata *bd;
  145. struct buffer_head *bh;
  146. list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
  147. list_for_each_entry_reverse(bd, &tr->tr_ail1_list,
  148. bd_ail_st_list) {
  149. bh = bd->bd_bh;
  150. fs_err(sdp, "bd %p: blk:0x%llx bh=%p ", bd,
  151. (unsigned long long)bd->bd_blkno, bh);
  152. if (!bh) {
  153. fs_err(sdp, "\n");
  154. continue;
  155. }
  156. fs_err(sdp, "0x%llx up2:%d dirt:%d lkd:%d req:%d "
  157. "map:%d new:%d ar:%d aw:%d delay:%d "
  158. "io err:%d unwritten:%d dfr:%d pin:%d esc:%d\n",
  159. (unsigned long long)bh->b_blocknr,
  160. buffer_uptodate(bh), buffer_dirty(bh),
  161. buffer_locked(bh), buffer_req(bh),
  162. buffer_mapped(bh), buffer_new(bh),
  163. buffer_async_read(bh), buffer_async_write(bh),
  164. buffer_delay(bh), buffer_write_io_error(bh),
  165. buffer_unwritten(bh),
  166. buffer_defer_completion(bh),
  167. buffer_pinned(bh), buffer_escaped(bh));
  168. }
  169. }
  170. }
  171. /**
  172. * gfs2_ail1_flush - start writeback of some ail1 entries
  173. * @sdp: The super block
  174. * @wbc: The writeback control structure
  175. *
  176. * Writes back some ail1 entries, according to the limits in the
  177. * writeback control structure
  178. */
  179. void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
  180. {
  181. struct list_head *head = &sdp->sd_ail1_list;
  182. struct gfs2_trans *tr;
  183. struct blk_plug plug;
  184. int ret;
  185. unsigned long flush_start = jiffies;
  186. trace_gfs2_ail_flush(sdp, wbc, 1);
  187. blk_start_plug(&plug);
  188. spin_lock(&sdp->sd_ail_lock);
  189. restart:
  190. ret = 0;
  191. if (time_after(jiffies, flush_start + (HZ * 600))) {
  192. fs_err(sdp, "Error: In %s for ten minutes! t=%d\n",
  193. __func__, current->journal_info ? 1 : 0);
  194. dump_ail_list(sdp);
  195. goto out;
  196. }
  197. list_for_each_entry_reverse(tr, head, tr_list) {
  198. if (wbc->nr_to_write <= 0)
  199. break;
  200. ret = gfs2_ail1_start_one(sdp, wbc, tr, &plug);
  201. if (ret) {
  202. if (ret == -EBUSY)
  203. goto restart;
  204. break;
  205. }
  206. }
  207. out:
  208. spin_unlock(&sdp->sd_ail_lock);
  209. blk_finish_plug(&plug);
  210. if (ret) {
  211. gfs2_lm(sdp, "gfs2_ail1_start_one returned: %d\n", ret);
  212. gfs2_withdraw(sdp);
  213. }
  214. trace_gfs2_ail_flush(sdp, wbc, 0);
  215. }
  216. /**
  217. * gfs2_ail1_start - start writeback of all ail1 entries
  218. * @sdp: The superblock
  219. */
  220. static void gfs2_ail1_start(struct gfs2_sbd *sdp)
  221. {
  222. struct writeback_control wbc = {
  223. .sync_mode = WB_SYNC_NONE,
  224. .nr_to_write = LONG_MAX,
  225. .range_start = 0,
  226. .range_end = LLONG_MAX,
  227. };
  228. return gfs2_ail1_flush(sdp, &wbc);
  229. }
  230. static void gfs2_log_update_flush_tail(struct gfs2_sbd *sdp)
  231. {
  232. unsigned int new_flush_tail = sdp->sd_log_head;
  233. struct gfs2_trans *tr;
  234. if (!list_empty(&sdp->sd_ail1_list)) {
  235. tr = list_last_entry(&sdp->sd_ail1_list,
  236. struct gfs2_trans, tr_list);
  237. new_flush_tail = tr->tr_first;
  238. }
  239. sdp->sd_log_flush_tail = new_flush_tail;
  240. }
  241. static void gfs2_log_update_head(struct gfs2_sbd *sdp)
  242. {
  243. unsigned int new_head = sdp->sd_log_flush_head;
  244. if (sdp->sd_log_flush_tail == sdp->sd_log_head)
  245. sdp->sd_log_flush_tail = new_head;
  246. sdp->sd_log_head = new_head;
  247. }
  248. /*
  249. * gfs2_ail_empty_tr - empty one of the ail lists of a transaction
  250. */
  251. static void gfs2_ail_empty_tr(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
  252. struct list_head *head)
  253. {
  254. struct gfs2_bufdata *bd;
  255. while (!list_empty(head)) {
  256. bd = list_first_entry(head, struct gfs2_bufdata,
  257. bd_ail_st_list);
  258. gfs2_assert(sdp, bd->bd_tr == tr);
  259. gfs2_remove_from_ail(bd);
  260. }
  261. }
  262. /**
  263. * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
  264. * @sdp: the filesystem
  265. * @tr: the transaction
  266. * @max_revokes: If nonzero, issue revokes for the bd items for written buffers
  267. *
  268. * returns: the transaction's count of remaining active items
  269. */
  270. static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
  271. int *max_revokes)
  272. {
  273. struct gfs2_bufdata *bd, *s;
  274. struct buffer_head *bh;
  275. int active_count = 0;
  276. list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list,
  277. bd_ail_st_list) {
  278. bh = bd->bd_bh;
  279. gfs2_assert(sdp, bd->bd_tr == tr);
  280. /*
  281. * If another process flagged an io error, e.g. writing to the
  282. * journal, error all other bhs and move them off the ail1 to
  283. * prevent a tight loop when unmount tries to flush ail1,
  284. * regardless of whether they're still busy. If no outside
  285. * errors were found and the buffer is busy, move to the next.
  286. * If the ail buffer is not busy and caught an error, flag it
  287. * for others.
  288. */
  289. if (!sdp->sd_log_error && buffer_busy(bh)) {
  290. active_count++;
  291. continue;
  292. }
  293. if (!buffer_uptodate(bh) &&
  294. !cmpxchg(&sdp->sd_log_error, 0, -EIO)) {
  295. gfs2_io_error_bh(sdp, bh);
  296. gfs2_withdraw_delayed(sdp);
  297. }
  298. /*
  299. * If we have space for revokes and the bd is no longer on any
  300. * buf list, we can just add a revoke for it immediately and
  301. * avoid having to put it on the ail2 list, where it would need
  302. * to be revoked later.
  303. */
  304. if (*max_revokes && list_empty(&bd->bd_list)) {
  305. gfs2_add_revoke(sdp, bd);
  306. (*max_revokes)--;
  307. continue;
  308. }
  309. list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
  310. }
  311. return active_count;
  312. }
  313. /**
  314. * gfs2_ail1_empty - Try to empty the ail1 lists
  315. * @sdp: The superblock
  316. * @max_revokes: If non-zero, add revokes where appropriate
  317. *
  318. * Tries to empty the ail1 lists, starting with the oldest first
  319. */
  320. static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes)
  321. {
  322. struct gfs2_trans *tr, *s;
  323. int oldest_tr = 1;
  324. int ret;
  325. spin_lock(&sdp->sd_ail_lock);
  326. list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) {
  327. if (!gfs2_ail1_empty_one(sdp, tr, &max_revokes) && oldest_tr)
  328. list_move(&tr->tr_list, &sdp->sd_ail2_list);
  329. else
  330. oldest_tr = 0;
  331. }
  332. gfs2_log_update_flush_tail(sdp);
  333. ret = list_empty(&sdp->sd_ail1_list);
  334. spin_unlock(&sdp->sd_ail_lock);
  335. if (test_bit(SDF_WITHDRAWING, &sdp->sd_flags)) {
  336. gfs2_lm(sdp, "fatal: I/O error(s)\n");
  337. gfs2_withdraw(sdp);
  338. }
  339. return ret;
  340. }
  341. static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
  342. {
  343. struct gfs2_trans *tr;
  344. struct gfs2_bufdata *bd;
  345. struct buffer_head *bh;
  346. spin_lock(&sdp->sd_ail_lock);
  347. list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
  348. list_for_each_entry(bd, &tr->tr_ail1_list, bd_ail_st_list) {
  349. bh = bd->bd_bh;
  350. if (!buffer_locked(bh))
  351. continue;
  352. get_bh(bh);
  353. spin_unlock(&sdp->sd_ail_lock);
  354. wait_on_buffer(bh);
  355. brelse(bh);
  356. return;
  357. }
  358. }
  359. spin_unlock(&sdp->sd_ail_lock);
  360. }
  361. static void __ail2_empty(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
  362. {
  363. gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
  364. list_del(&tr->tr_list);
  365. gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
  366. gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
  367. gfs2_trans_free(sdp, tr);
  368. }
  369. static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
  370. {
  371. struct list_head *ail2_list = &sdp->sd_ail2_list;
  372. unsigned int old_tail = sdp->sd_log_tail;
  373. struct gfs2_trans *tr, *safe;
  374. spin_lock(&sdp->sd_ail_lock);
  375. if (old_tail <= new_tail) {
  376. list_for_each_entry_safe(tr, safe, ail2_list, tr_list) {
  377. if (old_tail <= tr->tr_first && tr->tr_first < new_tail)
  378. __ail2_empty(sdp, tr);
  379. }
  380. } else {
  381. list_for_each_entry_safe(tr, safe, ail2_list, tr_list) {
  382. if (old_tail <= tr->tr_first || tr->tr_first < new_tail)
  383. __ail2_empty(sdp, tr);
  384. }
  385. }
  386. spin_unlock(&sdp->sd_ail_lock);
  387. }
  388. /**
  389. * gfs2_log_is_empty - Check if the log is empty
  390. * @sdp: The GFS2 superblock
  391. */
  392. bool gfs2_log_is_empty(struct gfs2_sbd *sdp) {
  393. return atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks;
  394. }
  395. static bool __gfs2_log_try_reserve_revokes(struct gfs2_sbd *sdp, unsigned int revokes)
  396. {
  397. unsigned int available;
  398. available = atomic_read(&sdp->sd_log_revokes_available);
  399. while (available >= revokes) {
  400. if (atomic_try_cmpxchg(&sdp->sd_log_revokes_available,
  401. &available, available - revokes))
  402. return true;
  403. }
  404. return false;
  405. }
  406. /**
  407. * gfs2_log_release_revokes - Release a given number of revokes
  408. * @sdp: The GFS2 superblock
  409. * @revokes: The number of revokes to release
  410. *
  411. * sdp->sd_log_flush_lock must be held.
  412. */
  413. void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes)
  414. {
  415. if (revokes)
  416. atomic_add(revokes, &sdp->sd_log_revokes_available);
  417. }
  418. /**
  419. * gfs2_log_release - Release a given number of log blocks
  420. * @sdp: The GFS2 superblock
  421. * @blks: The number of blocks
  422. *
  423. */
  424. void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
  425. {
  426. atomic_add(blks, &sdp->sd_log_blks_free);
  427. trace_gfs2_log_blocks(sdp, blks);
  428. gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
  429. sdp->sd_jdesc->jd_blocks);
  430. if (atomic_read(&sdp->sd_log_blks_needed))
  431. wake_up(&sdp->sd_log_waitq);
  432. }
  433. /**
  434. * __gfs2_log_try_reserve - Try to make a log reservation
  435. * @sdp: The GFS2 superblock
  436. * @blks: The number of blocks to reserve
  437. * @taboo_blks: The number of blocks to leave free
  438. *
  439. * Try to do the same as __gfs2_log_reserve(), but fail if no more log
  440. * space is immediately available.
  441. */
  442. static bool __gfs2_log_try_reserve(struct gfs2_sbd *sdp, unsigned int blks,
  443. unsigned int taboo_blks)
  444. {
  445. unsigned wanted = blks + taboo_blks;
  446. unsigned int free_blocks;
  447. free_blocks = atomic_read(&sdp->sd_log_blks_free);
  448. while (free_blocks >= wanted) {
  449. if (atomic_try_cmpxchg(&sdp->sd_log_blks_free, &free_blocks,
  450. free_blocks - blks)) {
  451. trace_gfs2_log_blocks(sdp, -blks);
  452. return true;
  453. }
  454. }
  455. return false;
  456. }
  457. /**
  458. * __gfs2_log_reserve - Make a log reservation
  459. * @sdp: The GFS2 superblock
  460. * @blks: The number of blocks to reserve
  461. * @taboo_blks: The number of blocks to leave free
  462. *
  463. * @taboo_blks is set to 0 for logd, and to GFS2_LOG_FLUSH_MIN_BLOCKS
  464. * for all other processes. This ensures that when the log is almost full,
  465. * logd will still be able to call gfs2_log_flush one more time without
  466. * blocking, which will advance the tail and make some more log space
  467. * available.
  468. *
  469. * We no longer flush the log here, instead we wake up logd to do that
  470. * for us. To avoid the thundering herd and to ensure that we deal fairly
  471. * with queued waiters, we use an exclusive wait. This means that when we
  472. * get woken with enough journal space to get our reservation, we need to
  473. * wake the next waiter on the list.
  474. */
  475. static void __gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks,
  476. unsigned int taboo_blks)
  477. {
  478. unsigned wanted = blks + taboo_blks;
  479. unsigned int free_blocks;
  480. atomic_add(blks, &sdp->sd_log_blks_needed);
  481. for (;;) {
  482. if (current != sdp->sd_logd_process)
  483. wake_up(&sdp->sd_logd_waitq);
  484. io_wait_event(sdp->sd_log_waitq,
  485. (free_blocks = atomic_read(&sdp->sd_log_blks_free),
  486. free_blocks >= wanted));
  487. do {
  488. if (atomic_try_cmpxchg(&sdp->sd_log_blks_free,
  489. &free_blocks,
  490. free_blocks - blks))
  491. goto reserved;
  492. } while (free_blocks >= wanted);
  493. }
  494. reserved:
  495. trace_gfs2_log_blocks(sdp, -blks);
  496. if (atomic_sub_return(blks, &sdp->sd_log_blks_needed))
  497. wake_up(&sdp->sd_log_waitq);
  498. }
  499. /**
  500. * gfs2_log_try_reserve - Try to make a log reservation
  501. * @sdp: The GFS2 superblock
  502. * @tr: The transaction
  503. * @extra_revokes: The number of additional revokes reserved (output)
  504. *
  505. * This is similar to gfs2_log_reserve, but sdp->sd_log_flush_lock must be
  506. * held for correct revoke accounting.
  507. */
  508. bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
  509. unsigned int *extra_revokes)
  510. {
  511. unsigned int blks = tr->tr_reserved;
  512. unsigned int revokes = tr->tr_revokes;
  513. unsigned int revoke_blks = 0;
  514. *extra_revokes = 0;
  515. if (revokes && !__gfs2_log_try_reserve_revokes(sdp, revokes)) {
  516. revoke_blks = DIV_ROUND_UP(revokes, sdp->sd_inptrs);
  517. *extra_revokes = revoke_blks * sdp->sd_inptrs - revokes;
  518. blks += revoke_blks;
  519. }
  520. if (!blks)
  521. return true;
  522. if (__gfs2_log_try_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS))
  523. return true;
  524. if (!revoke_blks)
  525. gfs2_log_release_revokes(sdp, revokes);
  526. return false;
  527. }
  528. /**
  529. * gfs2_log_reserve - Make a log reservation
  530. * @sdp: The GFS2 superblock
  531. * @tr: The transaction
  532. * @extra_revokes: The number of additional revokes reserved (output)
  533. *
  534. * sdp->sd_log_flush_lock must not be held.
  535. */
  536. void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
  537. unsigned int *extra_revokes)
  538. {
  539. unsigned int blks = tr->tr_reserved;
  540. unsigned int revokes = tr->tr_revokes;
  541. unsigned int revoke_blks;
  542. *extra_revokes = 0;
  543. if (revokes) {
  544. revoke_blks = DIV_ROUND_UP(revokes, sdp->sd_inptrs);
  545. *extra_revokes = revoke_blks * sdp->sd_inptrs - revokes;
  546. blks += revoke_blks;
  547. }
  548. __gfs2_log_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS);
  549. }
  550. /**
  551. * log_distance - Compute distance between two journal blocks
  552. * @sdp: The GFS2 superblock
  553. * @newer: The most recent journal block of the pair
  554. * @older: The older journal block of the pair
  555. *
  556. * Compute the distance (in the journal direction) between two
  557. * blocks in the journal
  558. *
  559. * Returns: the distance in blocks
  560. */
  561. static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer,
  562. unsigned int older)
  563. {
  564. int dist;
  565. dist = newer - older;
  566. if (dist < 0)
  567. dist += sdp->sd_jdesc->jd_blocks;
  568. return dist;
  569. }
  570. /**
  571. * calc_reserved - Calculate the number of blocks to keep reserved
  572. * @sdp: The GFS2 superblock
  573. *
  574. * This is complex. We need to reserve room for all our currently used
  575. * metadata blocks (e.g. normal file I/O rewriting file time stamps) and
  576. * all our journaled data blocks for journaled files (e.g. files in the
  577. * meta_fs like rindex, or files for which chattr +j was done.)
  578. * If we don't reserve enough space, corruption will follow.
  579. *
  580. * We can have metadata blocks and jdata blocks in the same journal. Each
  581. * type gets its own log descriptor, for which we need to reserve a block.
  582. * In fact, each type has the potential for needing more than one log descriptor
  583. * in cases where we have more blocks than will fit in a log descriptor.
  584. * Metadata journal entries take up half the space of journaled buffer entries.
  585. *
  586. * Also, we need to reserve blocks for revoke journal entries and one for an
  587. * overall header for the lot.
  588. *
  589. * Returns: the number of blocks reserved
  590. */
  591. static unsigned int calc_reserved(struct gfs2_sbd *sdp)
  592. {
  593. unsigned int reserved = GFS2_LOG_FLUSH_MIN_BLOCKS;
  594. unsigned int blocks;
  595. struct gfs2_trans *tr = sdp->sd_log_tr;
  596. if (tr) {
  597. blocks = tr->tr_num_buf_new - tr->tr_num_buf_rm;
  598. reserved += blocks + DIV_ROUND_UP(blocks, buf_limit(sdp));
  599. blocks = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
  600. reserved += blocks + DIV_ROUND_UP(blocks, databuf_limit(sdp));
  601. }
  602. return reserved;
  603. }
  604. static void log_pull_tail(struct gfs2_sbd *sdp)
  605. {
  606. unsigned int new_tail = sdp->sd_log_flush_tail;
  607. unsigned int dist;
  608. if (new_tail == sdp->sd_log_tail)
  609. return;
  610. dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
  611. ail2_empty(sdp, new_tail);
  612. gfs2_log_release(sdp, dist);
  613. sdp->sd_log_tail = new_tail;
  614. }
  615. void log_flush_wait(struct gfs2_sbd *sdp)
  616. {
  617. DEFINE_WAIT(wait);
  618. if (atomic_read(&sdp->sd_log_in_flight)) {
  619. do {
  620. prepare_to_wait(&sdp->sd_log_flush_wait, &wait,
  621. TASK_UNINTERRUPTIBLE);
  622. if (atomic_read(&sdp->sd_log_in_flight))
  623. io_schedule();
  624. } while(atomic_read(&sdp->sd_log_in_flight));
  625. finish_wait(&sdp->sd_log_flush_wait, &wait);
  626. }
  627. }
  628. static int ip_cmp(void *priv, const struct list_head *a, const struct list_head *b)
  629. {
  630. struct gfs2_inode *ipa, *ipb;
  631. ipa = list_entry(a, struct gfs2_inode, i_ordered);
  632. ipb = list_entry(b, struct gfs2_inode, i_ordered);
  633. if (ipa->i_no_addr < ipb->i_no_addr)
  634. return -1;
  635. if (ipa->i_no_addr > ipb->i_no_addr)
  636. return 1;
  637. return 0;
  638. }
  639. static void __ordered_del_inode(struct gfs2_inode *ip)
  640. {
  641. if (!list_empty(&ip->i_ordered))
  642. list_del_init(&ip->i_ordered);
  643. }
  644. static void gfs2_ordered_write(struct gfs2_sbd *sdp)
  645. {
  646. struct gfs2_inode *ip;
  647. LIST_HEAD(written);
  648. spin_lock(&sdp->sd_ordered_lock);
  649. list_sort(NULL, &sdp->sd_log_ordered, &ip_cmp);
  650. while (!list_empty(&sdp->sd_log_ordered)) {
  651. ip = list_first_entry(&sdp->sd_log_ordered, struct gfs2_inode, i_ordered);
  652. if (ip->i_inode.i_mapping->nrpages == 0) {
  653. __ordered_del_inode(ip);
  654. continue;
  655. }
  656. list_move(&ip->i_ordered, &written);
  657. spin_unlock(&sdp->sd_ordered_lock);
  658. filemap_fdatawrite(ip->i_inode.i_mapping);
  659. spin_lock(&sdp->sd_ordered_lock);
  660. }
  661. list_splice(&written, &sdp->sd_log_ordered);
  662. spin_unlock(&sdp->sd_ordered_lock);
  663. }
  664. static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
  665. {
  666. struct gfs2_inode *ip;
  667. spin_lock(&sdp->sd_ordered_lock);
  668. while (!list_empty(&sdp->sd_log_ordered)) {
  669. ip = list_first_entry(&sdp->sd_log_ordered, struct gfs2_inode, i_ordered);
  670. __ordered_del_inode(ip);
  671. if (ip->i_inode.i_mapping->nrpages == 0)
  672. continue;
  673. spin_unlock(&sdp->sd_ordered_lock);
  674. filemap_fdatawait(ip->i_inode.i_mapping);
  675. spin_lock(&sdp->sd_ordered_lock);
  676. }
  677. spin_unlock(&sdp->sd_ordered_lock);
  678. }
  679. void gfs2_ordered_del_inode(struct gfs2_inode *ip)
  680. {
  681. struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
  682. spin_lock(&sdp->sd_ordered_lock);
  683. __ordered_del_inode(ip);
  684. spin_unlock(&sdp->sd_ordered_lock);
  685. }
  686. void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
  687. {
  688. struct buffer_head *bh = bd->bd_bh;
  689. struct gfs2_glock *gl = bd->bd_gl;
  690. sdp->sd_log_num_revoke++;
  691. if (atomic_inc_return(&gl->gl_revokes) == 1)
  692. gfs2_glock_hold(gl);
  693. bh->b_private = NULL;
  694. bd->bd_blkno = bh->b_blocknr;
  695. gfs2_remove_from_ail(bd); /* drops ref on bh */
  696. bd->bd_bh = NULL;
  697. set_bit(GLF_LFLUSH, &gl->gl_flags);
  698. list_add(&bd->bd_list, &sdp->sd_log_revokes);
  699. }
  700. void gfs2_glock_remove_revoke(struct gfs2_glock *gl)
  701. {
  702. if (atomic_dec_return(&gl->gl_revokes) == 0) {
  703. clear_bit(GLF_LFLUSH, &gl->gl_flags);
  704. gfs2_glock_queue_put(gl);
  705. }
  706. }
  707. /**
  708. * gfs2_flush_revokes - Add as many revokes to the system transaction as we can
  709. * @sdp: The GFS2 superblock
  710. *
  711. * Our usual strategy is to defer writing revokes as much as we can in the hope
  712. * that we'll eventually overwrite the journal, which will make those revokes
  713. * go away. This changes when we flush the log: at that point, there will
  714. * likely be some left-over space in the last revoke block of that transaction.
  715. * We can fill that space with additional revokes for blocks that have already
  716. * been written back. This will basically come at no cost now, and will save
  717. * us from having to keep track of those blocks on the AIL2 list later.
  718. */
  719. void gfs2_flush_revokes(struct gfs2_sbd *sdp)
  720. {
  721. /* number of revokes we still have room for */
  722. unsigned int max_revokes = atomic_read(&sdp->sd_log_revokes_available);
  723. gfs2_log_lock(sdp);
  724. gfs2_ail1_empty(sdp, max_revokes);
  725. gfs2_log_unlock(sdp);
  726. }
  727. /**
  728. * gfs2_write_log_header - Write a journal log header buffer at lblock
  729. * @sdp: The GFS2 superblock
  730. * @jd: journal descriptor of the journal to which we are writing
  731. * @seq: sequence number
  732. * @tail: tail of the log
  733. * @lblock: value for lh_blkno (block number relative to start of journal)
  734. * @flags: log header flags GFS2_LOG_HEAD_*
  735. * @op_flags: flags to pass to the bio
  736. *
  737. * Returns: the initialized log buffer descriptor
  738. */
  739. void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
  740. u64 seq, u32 tail, u32 lblock, u32 flags,
  741. blk_opf_t op_flags)
  742. {
  743. struct gfs2_log_header *lh;
  744. u32 hash, crc;
  745. struct page *page;
  746. struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
  747. struct timespec64 tv;
  748. struct super_block *sb = sdp->sd_vfs;
  749. u64 dblock;
  750. if (gfs2_withdrawn(sdp))
  751. return;
  752. page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
  753. lh = page_address(page);
  754. clear_page(lh);
  755. lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
  756. lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
  757. lh->lh_header.__pad0 = cpu_to_be64(0);
  758. lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
  759. lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
  760. lh->lh_sequence = cpu_to_be64(seq);
  761. lh->lh_flags = cpu_to_be32(flags);
  762. lh->lh_tail = cpu_to_be32(tail);
  763. lh->lh_blkno = cpu_to_be32(lblock);
  764. hash = ~crc32(~0, lh, LH_V1_SIZE);
  765. lh->lh_hash = cpu_to_be32(hash);
  766. ktime_get_coarse_real_ts64(&tv);
  767. lh->lh_nsec = cpu_to_be32(tv.tv_nsec);
  768. lh->lh_sec = cpu_to_be64(tv.tv_sec);
  769. if (!list_empty(&jd->extent_list))
  770. dblock = gfs2_log_bmap(jd, lblock);
  771. else {
  772. unsigned int extlen;
  773. int ret;
  774. extlen = 1;
  775. ret = gfs2_get_extent(jd->jd_inode, lblock, &dblock, &extlen);
  776. if (gfs2_assert_withdraw(sdp, ret == 0))
  777. return;
  778. }
  779. lh->lh_addr = cpu_to_be64(dblock);
  780. lh->lh_jinode = cpu_to_be64(GFS2_I(jd->jd_inode)->i_no_addr);
  781. /* We may only write local statfs, quota, etc., when writing to our
  782. own journal. The values are left 0 when recovering a journal
  783. different from our own. */
  784. if (!(flags & GFS2_LOG_HEAD_RECOVERY)) {
  785. lh->lh_statfs_addr =
  786. cpu_to_be64(GFS2_I(sdp->sd_sc_inode)->i_no_addr);
  787. lh->lh_quota_addr =
  788. cpu_to_be64(GFS2_I(sdp->sd_qc_inode)->i_no_addr);
  789. spin_lock(&sdp->sd_statfs_spin);
  790. lh->lh_local_total = cpu_to_be64(l_sc->sc_total);
  791. lh->lh_local_free = cpu_to_be64(l_sc->sc_free);
  792. lh->lh_local_dinodes = cpu_to_be64(l_sc->sc_dinodes);
  793. spin_unlock(&sdp->sd_statfs_spin);
  794. }
  795. BUILD_BUG_ON(offsetof(struct gfs2_log_header, lh_crc) != LH_V1_SIZE);
  796. crc = crc32c(~0, (void *)lh + LH_V1_SIZE + 4,
  797. sb->s_blocksize - LH_V1_SIZE - 4);
  798. lh->lh_crc = cpu_to_be32(crc);
  799. gfs2_log_write(sdp, jd, page, sb->s_blocksize, 0, dblock);
  800. gfs2_log_submit_bio(&jd->jd_log_bio, REQ_OP_WRITE | op_flags);
  801. }
  802. /**
  803. * log_write_header - Get and initialize a journal header buffer
  804. * @sdp: The GFS2 superblock
  805. * @flags: The log header flags, including log header origin
  806. *
  807. * Returns: the initialized log buffer descriptor
  808. */
  809. static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
  810. {
  811. blk_opf_t op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC;
  812. enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
  813. gfs2_assert_withdraw(sdp, (state != SFS_FROZEN));
  814. if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
  815. gfs2_ordered_wait(sdp);
  816. log_flush_wait(sdp);
  817. op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
  818. }
  819. sdp->sd_log_idle = (sdp->sd_log_flush_tail == sdp->sd_log_flush_head);
  820. gfs2_write_log_header(sdp, sdp->sd_jdesc, sdp->sd_log_sequence++,
  821. sdp->sd_log_flush_tail, sdp->sd_log_flush_head,
  822. flags, op_flags);
  823. gfs2_log_incr_head(sdp);
  824. log_flush_wait(sdp);
  825. log_pull_tail(sdp);
  826. gfs2_log_update_head(sdp);
  827. }
  828. /**
  829. * gfs2_ail_drain - drain the ail lists after a withdraw
  830. * @sdp: Pointer to GFS2 superblock
  831. */
  832. void gfs2_ail_drain(struct gfs2_sbd *sdp)
  833. {
  834. struct gfs2_trans *tr;
  835. spin_lock(&sdp->sd_ail_lock);
  836. /*
  837. * For transactions on the sd_ail1_list we need to drain both the
  838. * ail1 and ail2 lists. That's because function gfs2_ail1_start_one
  839. * (temporarily) moves items from its tr_ail1 list to tr_ail2 list
  840. * before revokes are sent for that block. Items on the sd_ail2_list
  841. * should have already gotten beyond that point, so no need.
  842. */
  843. while (!list_empty(&sdp->sd_ail1_list)) {
  844. tr = list_first_entry(&sdp->sd_ail1_list, struct gfs2_trans,
  845. tr_list);
  846. gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail1_list);
  847. gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
  848. list_del(&tr->tr_list);
  849. gfs2_trans_free(sdp, tr);
  850. }
  851. while (!list_empty(&sdp->sd_ail2_list)) {
  852. tr = list_first_entry(&sdp->sd_ail2_list, struct gfs2_trans,
  853. tr_list);
  854. gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
  855. list_del(&tr->tr_list);
  856. gfs2_trans_free(sdp, tr);
  857. }
  858. gfs2_drain_revokes(sdp);
  859. spin_unlock(&sdp->sd_ail_lock);
  860. }
  861. /**
  862. * empty_ail1_list - try to start IO and empty the ail1 list
  863. * @sdp: Pointer to GFS2 superblock
  864. */
  865. static void empty_ail1_list(struct gfs2_sbd *sdp)
  866. {
  867. unsigned long start = jiffies;
  868. for (;;) {
  869. if (time_after(jiffies, start + (HZ * 600))) {
  870. fs_err(sdp, "Error: In %s for 10 minutes! t=%d\n",
  871. __func__, current->journal_info ? 1 : 0);
  872. dump_ail_list(sdp);
  873. return;
  874. }
  875. gfs2_ail1_start(sdp);
  876. gfs2_ail1_wait(sdp);
  877. if (gfs2_ail1_empty(sdp, 0))
  878. return;
  879. }
  880. }
  881. /**
  882. * trans_drain - drain the buf and databuf queue for a failed transaction
  883. * @tr: the transaction to drain
  884. *
  885. * When this is called, we're taking an error exit for a log write that failed
  886. * but since we bypassed the after_commit functions, we need to remove the
  887. * items from the buf and databuf queue.
  888. */
  889. static void trans_drain(struct gfs2_trans *tr)
  890. {
  891. struct gfs2_bufdata *bd;
  892. struct list_head *head;
  893. if (!tr)
  894. return;
  895. head = &tr->tr_buf;
  896. while (!list_empty(head)) {
  897. bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
  898. list_del_init(&bd->bd_list);
  899. if (!list_empty(&bd->bd_ail_st_list))
  900. gfs2_remove_from_ail(bd);
  901. kmem_cache_free(gfs2_bufdata_cachep, bd);
  902. }
  903. head = &tr->tr_databuf;
  904. while (!list_empty(head)) {
  905. bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
  906. list_del_init(&bd->bd_list);
  907. if (!list_empty(&bd->bd_ail_st_list))
  908. gfs2_remove_from_ail(bd);
  909. kmem_cache_free(gfs2_bufdata_cachep, bd);
  910. }
  911. }
  912. /**
  913. * gfs2_log_flush - flush incore transaction(s)
  914. * @sdp: The filesystem
  915. * @gl: The glock structure to flush. If NULL, flush the whole incore log
  916. * @flags: The log header flags: GFS2_LOG_HEAD_FLUSH_* and debug flags
  917. *
  918. */
  919. void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
  920. {
  921. struct gfs2_trans *tr = NULL;
  922. unsigned int reserved_blocks = 0, used_blocks = 0;
  923. enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
  924. unsigned int first_log_head;
  925. unsigned int reserved_revokes = 0;
  926. down_write(&sdp->sd_log_flush_lock);
  927. trace_gfs2_log_flush(sdp, 1, flags);
  928. repeat:
  929. /*
  930. * Do this check while holding the log_flush_lock to prevent new
  931. * buffers from being added to the ail via gfs2_pin()
  932. */
  933. if (gfs2_withdrawn(sdp) || !test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
  934. goto out;
  935. /* Log might have been flushed while we waited for the flush lock */
  936. if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags))
  937. goto out;
  938. first_log_head = sdp->sd_log_head;
  939. sdp->sd_log_flush_head = first_log_head;
  940. tr = sdp->sd_log_tr;
  941. if (tr || sdp->sd_log_num_revoke) {
  942. if (reserved_blocks)
  943. gfs2_log_release(sdp, reserved_blocks);
  944. reserved_blocks = sdp->sd_log_blks_reserved;
  945. reserved_revokes = sdp->sd_log_num_revoke;
  946. if (tr) {
  947. sdp->sd_log_tr = NULL;
  948. tr->tr_first = first_log_head;
  949. if (unlikely (state == SFS_FROZEN)) {
  950. if (gfs2_assert_withdraw_delayed(sdp,
  951. !tr->tr_num_buf_new && !tr->tr_num_databuf_new))
  952. goto out_withdraw;
  953. }
  954. }
  955. } else if (!reserved_blocks) {
  956. unsigned int taboo_blocks = GFS2_LOG_FLUSH_MIN_BLOCKS;
  957. reserved_blocks = GFS2_LOG_FLUSH_MIN_BLOCKS;
  958. if (current == sdp->sd_logd_process)
  959. taboo_blocks = 0;
  960. if (!__gfs2_log_try_reserve(sdp, reserved_blocks, taboo_blocks)) {
  961. up_write(&sdp->sd_log_flush_lock);
  962. __gfs2_log_reserve(sdp, reserved_blocks, taboo_blocks);
  963. down_write(&sdp->sd_log_flush_lock);
  964. goto repeat;
  965. }
  966. BUG_ON(sdp->sd_log_num_revoke);
  967. }
  968. if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN)
  969. clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
  970. if (unlikely(state == SFS_FROZEN))
  971. if (gfs2_assert_withdraw_delayed(sdp, !reserved_revokes))
  972. goto out_withdraw;
  973. gfs2_ordered_write(sdp);
  974. if (gfs2_withdrawn(sdp))
  975. goto out_withdraw;
  976. lops_before_commit(sdp, tr);
  977. if (gfs2_withdrawn(sdp))
  978. goto out_withdraw;
  979. gfs2_log_submit_bio(&sdp->sd_jdesc->jd_log_bio, REQ_OP_WRITE);
  980. if (gfs2_withdrawn(sdp))
  981. goto out_withdraw;
  982. if (sdp->sd_log_head != sdp->sd_log_flush_head) {
  983. log_write_header(sdp, flags);
  984. } else if (sdp->sd_log_tail != sdp->sd_log_flush_tail && !sdp->sd_log_idle) {
  985. log_write_header(sdp, flags);
  986. }
  987. if (gfs2_withdrawn(sdp))
  988. goto out_withdraw;
  989. lops_after_commit(sdp, tr);
  990. gfs2_log_lock(sdp);
  991. sdp->sd_log_blks_reserved = 0;
  992. spin_lock(&sdp->sd_ail_lock);
  993. if (tr && !list_empty(&tr->tr_ail1_list)) {
  994. list_add(&tr->tr_list, &sdp->sd_ail1_list);
  995. tr = NULL;
  996. }
  997. spin_unlock(&sdp->sd_ail_lock);
  998. gfs2_log_unlock(sdp);
  999. if (!(flags & GFS2_LOG_HEAD_FLUSH_NORMAL)) {
  1000. if (!sdp->sd_log_idle) {
  1001. empty_ail1_list(sdp);
  1002. if (gfs2_withdrawn(sdp))
  1003. goto out_withdraw;
  1004. log_write_header(sdp, flags);
  1005. }
  1006. if (flags & (GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
  1007. GFS2_LOG_HEAD_FLUSH_FREEZE))
  1008. gfs2_log_shutdown(sdp);
  1009. if (flags & GFS2_LOG_HEAD_FLUSH_FREEZE)
  1010. atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
  1011. }
  1012. out_end:
  1013. used_blocks = log_distance(sdp, sdp->sd_log_flush_head, first_log_head);
  1014. reserved_revokes += atomic_read(&sdp->sd_log_revokes_available);
  1015. atomic_set(&sdp->sd_log_revokes_available, sdp->sd_ldptrs);
  1016. gfs2_assert_withdraw(sdp, reserved_revokes % sdp->sd_inptrs == sdp->sd_ldptrs);
  1017. if (reserved_revokes > sdp->sd_ldptrs)
  1018. reserved_blocks += (reserved_revokes - sdp->sd_ldptrs) / sdp->sd_inptrs;
  1019. out:
  1020. if (used_blocks != reserved_blocks) {
  1021. gfs2_assert_withdraw_delayed(sdp, used_blocks < reserved_blocks);
  1022. gfs2_log_release(sdp, reserved_blocks - used_blocks);
  1023. }
  1024. up_write(&sdp->sd_log_flush_lock);
  1025. gfs2_trans_free(sdp, tr);
  1026. if (gfs2_withdrawing(sdp))
  1027. gfs2_withdraw(sdp);
  1028. trace_gfs2_log_flush(sdp, 0, flags);
  1029. return;
  1030. out_withdraw:
  1031. trans_drain(tr);
  1032. /**
  1033. * If the tr_list is empty, we're withdrawing during a log
  1034. * flush that targets a transaction, but the transaction was
  1035. * never queued onto any of the ail lists. Here we add it to
  1036. * ail1 just so that ail_drain() will find and free it.
  1037. */
  1038. spin_lock(&sdp->sd_ail_lock);
  1039. if (tr && list_empty(&tr->tr_list))
  1040. list_add(&tr->tr_list, &sdp->sd_ail1_list);
  1041. spin_unlock(&sdp->sd_ail_lock);
  1042. tr = NULL;
  1043. goto out_end;
  1044. }
  1045. /**
  1046. * gfs2_merge_trans - Merge a new transaction into a cached transaction
  1047. * @sdp: the filesystem
  1048. * @new: New transaction to be merged
  1049. */
  1050. static void gfs2_merge_trans(struct gfs2_sbd *sdp, struct gfs2_trans *new)
  1051. {
  1052. struct gfs2_trans *old = sdp->sd_log_tr;
  1053. WARN_ON_ONCE(!test_bit(TR_ATTACHED, &old->tr_flags));
  1054. old->tr_num_buf_new += new->tr_num_buf_new;
  1055. old->tr_num_databuf_new += new->tr_num_databuf_new;
  1056. old->tr_num_buf_rm += new->tr_num_buf_rm;
  1057. old->tr_num_databuf_rm += new->tr_num_databuf_rm;
  1058. old->tr_revokes += new->tr_revokes;
  1059. old->tr_num_revoke += new->tr_num_revoke;
  1060. list_splice_tail_init(&new->tr_databuf, &old->tr_databuf);
  1061. list_splice_tail_init(&new->tr_buf, &old->tr_buf);
  1062. spin_lock(&sdp->sd_ail_lock);
  1063. list_splice_tail_init(&new->tr_ail1_list, &old->tr_ail1_list);
  1064. list_splice_tail_init(&new->tr_ail2_list, &old->tr_ail2_list);
  1065. spin_unlock(&sdp->sd_ail_lock);
  1066. }
  1067. static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
  1068. {
  1069. unsigned int reserved;
  1070. unsigned int unused;
  1071. unsigned int maxres;
  1072. gfs2_log_lock(sdp);
  1073. if (sdp->sd_log_tr) {
  1074. gfs2_merge_trans(sdp, tr);
  1075. } else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) {
  1076. gfs2_assert_withdraw(sdp, !test_bit(TR_ONSTACK, &tr->tr_flags));
  1077. sdp->sd_log_tr = tr;
  1078. set_bit(TR_ATTACHED, &tr->tr_flags);
  1079. }
  1080. reserved = calc_reserved(sdp);
  1081. maxres = sdp->sd_log_blks_reserved + tr->tr_reserved;
  1082. gfs2_assert_withdraw(sdp, maxres >= reserved);
  1083. unused = maxres - reserved;
  1084. if (unused)
  1085. gfs2_log_release(sdp, unused);
  1086. sdp->sd_log_blks_reserved = reserved;
  1087. gfs2_log_unlock(sdp);
  1088. }
  1089. /**
  1090. * gfs2_log_commit - Commit a transaction to the log
  1091. * @sdp: the filesystem
  1092. * @tr: the transaction
  1093. *
  1094. * We wake up gfs2_logd if the number of pinned blocks exceed thresh1
  1095. * or the total number of used blocks (pinned blocks plus AIL blocks)
  1096. * is greater than thresh2.
  1097. *
  1098. * At mount time thresh1 is 2/5ths of journal size, thresh2 is 4/5ths of
  1099. * journal size.
  1100. *
  1101. * Returns: errno
  1102. */
  1103. void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
  1104. {
  1105. log_refund(sdp, tr);
  1106. if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) ||
  1107. ((sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free)) >
  1108. atomic_read(&sdp->sd_log_thresh2)))
  1109. wake_up(&sdp->sd_logd_waitq);
  1110. }
  1111. /**
  1112. * gfs2_log_shutdown - write a shutdown header into a journal
  1113. * @sdp: the filesystem
  1114. *
  1115. */
  1116. static void gfs2_log_shutdown(struct gfs2_sbd *sdp)
  1117. {
  1118. gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
  1119. gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
  1120. gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
  1121. log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT | GFS2_LFC_SHUTDOWN);
  1122. log_pull_tail(sdp);
  1123. gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
  1124. gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
  1125. }
  1126. static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
  1127. {
  1128. return (atomic_read(&sdp->sd_log_pinned) +
  1129. atomic_read(&sdp->sd_log_blks_needed) >=
  1130. atomic_read(&sdp->sd_log_thresh1));
  1131. }
  1132. static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
  1133. {
  1134. unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
  1135. return used_blocks + atomic_read(&sdp->sd_log_blks_needed) >=
  1136. atomic_read(&sdp->sd_log_thresh2);
  1137. }
  1138. /**
  1139. * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
  1140. * @data: Pointer to GFS2 superblock
  1141. *
  1142. * Also, periodically check to make sure that we're using the most recent
  1143. * journal index.
  1144. */
  1145. int gfs2_logd(void *data)
  1146. {
  1147. struct gfs2_sbd *sdp = data;
  1148. unsigned long t = 1;
  1149. while (!kthread_should_stop()) {
  1150. if (gfs2_withdrawn(sdp)) {
  1151. msleep_interruptible(HZ);
  1152. continue;
  1153. }
  1154. /* Check for errors writing to the journal */
  1155. if (sdp->sd_log_error) {
  1156. gfs2_lm(sdp,
  1157. "GFS2: fsid=%s: error %d: "
  1158. "withdrawing the file system to "
  1159. "prevent further damage.\n",
  1160. sdp->sd_fsname, sdp->sd_log_error);
  1161. gfs2_withdraw(sdp);
  1162. continue;
  1163. }
  1164. if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
  1165. gfs2_ail1_empty(sdp, 0);
  1166. gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
  1167. GFS2_LFC_LOGD_JFLUSH_REQD);
  1168. }
  1169. if (test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) ||
  1170. gfs2_ail_flush_reqd(sdp)) {
  1171. clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
  1172. gfs2_ail1_start(sdp);
  1173. gfs2_ail1_wait(sdp);
  1174. gfs2_ail1_empty(sdp, 0);
  1175. gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
  1176. GFS2_LFC_LOGD_AIL_FLUSH_REQD);
  1177. }
  1178. t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
  1179. try_to_freeze();
  1180. t = wait_event_interruptible_timeout(sdp->sd_logd_waitq,
  1181. test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) ||
  1182. gfs2_ail_flush_reqd(sdp) ||
  1183. gfs2_jrnl_flush_reqd(sdp) ||
  1184. kthread_should_stop(),
  1185. t);
  1186. }
  1187. return 0;
  1188. }