trans.c 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
  4. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
  5. */
  6. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  7. #include <linux/sched.h>
  8. #include <linux/slab.h>
  9. #include <linux/spinlock.h>
  10. #include <linux/completion.h>
  11. #include <linux/buffer_head.h>
  12. #include <linux/kallsyms.h>
  13. #include <linux/gfs2_ondisk.h>
  14. #include "gfs2.h"
  15. #include "incore.h"
  16. #include "glock.h"
  17. #include "inode.h"
  18. #include "log.h"
  19. #include "lops.h"
  20. #include "meta_io.h"
  21. #include "trans.h"
  22. #include "util.h"
  23. #include "trace_gfs2.h"
  24. static void gfs2_print_trans(struct gfs2_sbd *sdp, const struct gfs2_trans *tr)
  25. {
  26. fs_warn(sdp, "Transaction created at: %pSR\n", (void *)tr->tr_ip);
  27. fs_warn(sdp, "blocks=%u revokes=%u reserved=%u touched=%u\n",
  28. tr->tr_blocks, tr->tr_revokes, tr->tr_reserved,
  29. test_bit(TR_TOUCHED, &tr->tr_flags));
  30. fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u\n",
  31. tr->tr_num_buf_new, tr->tr_num_buf_rm,
  32. tr->tr_num_databuf_new, tr->tr_num_databuf_rm,
  33. tr->tr_num_revoke);
  34. }
  35. int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
  36. unsigned int blocks, unsigned int revokes,
  37. unsigned long ip)
  38. {
  39. unsigned int extra_revokes;
  40. if (current->journal_info) {
  41. gfs2_print_trans(sdp, current->journal_info);
  42. BUG();
  43. }
  44. BUG_ON(blocks == 0 && revokes == 0);
  45. if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
  46. return -EROFS;
  47. tr->tr_ip = ip;
  48. tr->tr_blocks = blocks;
  49. tr->tr_revokes = revokes;
  50. tr->tr_reserved = GFS2_LOG_FLUSH_MIN_BLOCKS;
  51. if (blocks) {
  52. /*
  53. * The reserved blocks are either used for data or metadata.
  54. * We can have mixed data and metadata, each with its own log
  55. * descriptor block; see calc_reserved().
  56. */
  57. tr->tr_reserved += blocks + 1 + DIV_ROUND_UP(blocks - 1, databuf_limit(sdp));
  58. }
  59. INIT_LIST_HEAD(&tr->tr_databuf);
  60. INIT_LIST_HEAD(&tr->tr_buf);
  61. INIT_LIST_HEAD(&tr->tr_list);
  62. INIT_LIST_HEAD(&tr->tr_ail1_list);
  63. INIT_LIST_HEAD(&tr->tr_ail2_list);
  64. if (gfs2_assert_warn(sdp, tr->tr_reserved <= sdp->sd_jdesc->jd_blocks))
  65. return -EINVAL;
  66. sb_start_intwrite(sdp->sd_vfs);
  67. /*
  68. * Try the reservations under sd_log_flush_lock to prevent log flushes
  69. * from creating inconsistencies between the number of allocated and
  70. * reserved revokes. If that fails, do a full-block allocation outside
  71. * of the lock to avoid stalling log flushes. Then, allot the
  72. * appropriate number of blocks to revokes, use as many revokes locally
  73. * as needed, and "release" the surplus into the revokes pool.
  74. */
  75. down_read(&sdp->sd_log_flush_lock);
  76. if (gfs2_log_try_reserve(sdp, tr, &extra_revokes))
  77. goto reserved;
  78. up_read(&sdp->sd_log_flush_lock);
  79. gfs2_log_reserve(sdp, tr, &extra_revokes);
  80. down_read(&sdp->sd_log_flush_lock);
  81. reserved:
  82. gfs2_log_release_revokes(sdp, extra_revokes);
  83. if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) {
  84. gfs2_log_release_revokes(sdp, tr->tr_revokes);
  85. up_read(&sdp->sd_log_flush_lock);
  86. gfs2_log_release(sdp, tr->tr_reserved);
  87. sb_end_intwrite(sdp->sd_vfs);
  88. return -EROFS;
  89. }
  90. current->journal_info = tr;
  91. return 0;
  92. }
  93. int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
  94. unsigned int revokes)
  95. {
  96. struct gfs2_trans *tr;
  97. int error;
  98. tr = kmem_cache_zalloc(gfs2_trans_cachep, GFP_NOFS);
  99. if (!tr)
  100. return -ENOMEM;
  101. error = __gfs2_trans_begin(tr, sdp, blocks, revokes, _RET_IP_);
  102. if (error)
  103. kmem_cache_free(gfs2_trans_cachep, tr);
  104. return error;
  105. }
  106. void gfs2_trans_end(struct gfs2_sbd *sdp)
  107. {
  108. struct gfs2_trans *tr = current->journal_info;
  109. s64 nbuf;
  110. current->journal_info = NULL;
  111. if (!test_bit(TR_TOUCHED, &tr->tr_flags)) {
  112. gfs2_log_release_revokes(sdp, tr->tr_revokes);
  113. up_read(&sdp->sd_log_flush_lock);
  114. gfs2_log_release(sdp, tr->tr_reserved);
  115. if (!test_bit(TR_ONSTACK, &tr->tr_flags))
  116. gfs2_trans_free(sdp, tr);
  117. sb_end_intwrite(sdp->sd_vfs);
  118. return;
  119. }
  120. gfs2_log_release_revokes(sdp, tr->tr_revokes - tr->tr_num_revoke);
  121. nbuf = tr->tr_num_buf_new + tr->tr_num_databuf_new;
  122. nbuf -= tr->tr_num_buf_rm;
  123. nbuf -= tr->tr_num_databuf_rm;
  124. if (gfs2_assert_withdraw(sdp, nbuf <= tr->tr_blocks) ||
  125. gfs2_assert_withdraw(sdp, tr->tr_num_revoke <= tr->tr_revokes))
  126. gfs2_print_trans(sdp, tr);
  127. gfs2_log_commit(sdp, tr);
  128. if (!test_bit(TR_ONSTACK, &tr->tr_flags) &&
  129. !test_bit(TR_ATTACHED, &tr->tr_flags))
  130. gfs2_trans_free(sdp, tr);
  131. up_read(&sdp->sd_log_flush_lock);
  132. if (sdp->sd_vfs->s_flags & SB_SYNCHRONOUS)
  133. gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
  134. GFS2_LFC_TRANS_END);
  135. sb_end_intwrite(sdp->sd_vfs);
  136. }
  137. static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
  138. struct buffer_head *bh)
  139. {
  140. struct gfs2_bufdata *bd;
  141. bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL);
  142. bd->bd_bh = bh;
  143. bd->bd_gl = gl;
  144. INIT_LIST_HEAD(&bd->bd_list);
  145. INIT_LIST_HEAD(&bd->bd_ail_st_list);
  146. INIT_LIST_HEAD(&bd->bd_ail_gl_list);
  147. bh->b_private = bd;
  148. return bd;
  149. }
  150. /**
  151. * gfs2_trans_add_data - Add a databuf to the transaction.
  152. * @gl: The inode glock associated with the buffer
  153. * @bh: The buffer to add
  154. *
  155. * This is used in journaled data mode.
  156. * We need to journal the data block in the same way as metadata in
  157. * the functions above. The difference is that here we have a tag
  158. * which is two __be64's being the block number (as per meta data)
  159. * and a flag which says whether the data block needs escaping or
  160. * not. This means we need a new log entry for each 251 or so data
  161. * blocks, which isn't an enormous overhead but twice as much as
  162. * for normal metadata blocks.
  163. */
  164. void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
  165. {
  166. struct gfs2_trans *tr = current->journal_info;
  167. struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
  168. struct gfs2_bufdata *bd;
  169. lock_buffer(bh);
  170. if (buffer_pinned(bh)) {
  171. set_bit(TR_TOUCHED, &tr->tr_flags);
  172. goto out;
  173. }
  174. gfs2_log_lock(sdp);
  175. bd = bh->b_private;
  176. if (bd == NULL) {
  177. gfs2_log_unlock(sdp);
  178. unlock_buffer(bh);
  179. if (bh->b_private == NULL)
  180. bd = gfs2_alloc_bufdata(gl, bh);
  181. else
  182. bd = bh->b_private;
  183. lock_buffer(bh);
  184. gfs2_log_lock(sdp);
  185. }
  186. gfs2_assert(sdp, bd->bd_gl == gl);
  187. set_bit(TR_TOUCHED, &tr->tr_flags);
  188. if (list_empty(&bd->bd_list)) {
  189. set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
  190. set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
  191. gfs2_pin(sdp, bd->bd_bh);
  192. tr->tr_num_databuf_new++;
  193. list_add_tail(&bd->bd_list, &tr->tr_databuf);
  194. }
  195. gfs2_log_unlock(sdp);
  196. out:
  197. unlock_buffer(bh);
  198. }
  199. void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
  200. {
  201. struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
  202. struct gfs2_bufdata *bd;
  203. struct gfs2_meta_header *mh;
  204. struct gfs2_trans *tr = current->journal_info;
  205. enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
  206. lock_buffer(bh);
  207. if (buffer_pinned(bh)) {
  208. set_bit(TR_TOUCHED, &tr->tr_flags);
  209. goto out;
  210. }
  211. gfs2_log_lock(sdp);
  212. bd = bh->b_private;
  213. if (bd == NULL) {
  214. gfs2_log_unlock(sdp);
  215. unlock_buffer(bh);
  216. lock_page(bh->b_page);
  217. if (bh->b_private == NULL)
  218. bd = gfs2_alloc_bufdata(gl, bh);
  219. else
  220. bd = bh->b_private;
  221. unlock_page(bh->b_page);
  222. lock_buffer(bh);
  223. gfs2_log_lock(sdp);
  224. }
  225. gfs2_assert(sdp, bd->bd_gl == gl);
  226. set_bit(TR_TOUCHED, &tr->tr_flags);
  227. if (!list_empty(&bd->bd_list))
  228. goto out_unlock;
  229. set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
  230. set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
  231. mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
  232. if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
  233. fs_err(sdp, "Attempting to add uninitialised block to "
  234. "journal (inplace block=%lld)\n",
  235. (unsigned long long)bd->bd_bh->b_blocknr);
  236. BUG();
  237. }
  238. if (unlikely(state == SFS_FROZEN)) {
  239. fs_info(sdp, "GFS2:adding buf while frozen\n");
  240. gfs2_assert_withdraw(sdp, 0);
  241. }
  242. if (unlikely(gfs2_withdrawn(sdp))) {
  243. fs_info(sdp, "GFS2:adding buf while withdrawn! 0x%llx\n",
  244. (unsigned long long)bd->bd_bh->b_blocknr);
  245. }
  246. gfs2_pin(sdp, bd->bd_bh);
  247. mh->__pad0 = cpu_to_be64(0);
  248. mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
  249. list_add(&bd->bd_list, &tr->tr_buf);
  250. tr->tr_num_buf_new++;
  251. out_unlock:
  252. gfs2_log_unlock(sdp);
  253. out:
  254. unlock_buffer(bh);
  255. }
  256. void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
  257. {
  258. struct gfs2_trans *tr = current->journal_info;
  259. BUG_ON(!list_empty(&bd->bd_list));
  260. gfs2_add_revoke(sdp, bd);
  261. set_bit(TR_TOUCHED, &tr->tr_flags);
  262. tr->tr_num_revoke++;
  263. }
  264. void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
  265. {
  266. struct gfs2_bufdata *bd, *tmp;
  267. unsigned int n = len;
  268. gfs2_log_lock(sdp);
  269. list_for_each_entry_safe(bd, tmp, &sdp->sd_log_revokes, bd_list) {
  270. if ((bd->bd_blkno >= blkno) && (bd->bd_blkno < (blkno + len))) {
  271. list_del_init(&bd->bd_list);
  272. gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
  273. sdp->sd_log_num_revoke--;
  274. if (bd->bd_gl)
  275. gfs2_glock_remove_revoke(bd->bd_gl);
  276. kmem_cache_free(gfs2_bufdata_cachep, bd);
  277. gfs2_log_release_revokes(sdp, 1);
  278. if (--n == 0)
  279. break;
  280. }
  281. }
  282. gfs2_log_unlock(sdp);
  283. }
  284. void gfs2_trans_free(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
  285. {
  286. if (tr == NULL)
  287. return;
  288. gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
  289. gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
  290. gfs2_assert_warn(sdp, list_empty(&tr->tr_databuf));
  291. gfs2_assert_warn(sdp, list_empty(&tr->tr_buf));
  292. kmem_cache_free(gfs2_trans_cachep, tr);
  293. }