super.c 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
  4. * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
  5. */
  6. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  7. #include <linux/bio.h>
  8. #include <linux/sched/signal.h>
  9. #include <linux/slab.h>
  10. #include <linux/spinlock.h>
  11. #include <linux/completion.h>
  12. #include <linux/buffer_head.h>
  13. #include <linux/statfs.h>
  14. #include <linux/seq_file.h>
  15. #include <linux/mount.h>
  16. #include <linux/kthread.h>
  17. #include <linux/delay.h>
  18. #include <linux/gfs2_ondisk.h>
  19. #include <linux/crc32.h>
  20. #include <linux/time.h>
  21. #include <linux/wait.h>
  22. #include <linux/writeback.h>
  23. #include <linux/backing-dev.h>
  24. #include <linux/kernel.h>
  25. #include "gfs2.h"
  26. #include "incore.h"
  27. #include "bmap.h"
  28. #include "dir.h"
  29. #include "glock.h"
  30. #include "glops.h"
  31. #include "inode.h"
  32. #include "log.h"
  33. #include "meta_io.h"
  34. #include "quota.h"
  35. #include "recovery.h"
  36. #include "rgrp.h"
  37. #include "super.h"
  38. #include "trans.h"
  39. #include "util.h"
  40. #include "sys.h"
  41. #include "xattr.h"
  42. #include "lops.h"
  43. enum dinode_demise {
  44. SHOULD_DELETE_DINODE,
  45. SHOULD_NOT_DELETE_DINODE,
  46. SHOULD_DEFER_EVICTION,
  47. };
  48. /**
  49. * gfs2_jindex_free - Clear all the journal index information
  50. * @sdp: The GFS2 superblock
  51. *
  52. */
  53. void gfs2_jindex_free(struct gfs2_sbd *sdp)
  54. {
  55. struct list_head list;
  56. struct gfs2_jdesc *jd;
  57. spin_lock(&sdp->sd_jindex_spin);
  58. list_add(&list, &sdp->sd_jindex_list);
  59. list_del_init(&sdp->sd_jindex_list);
  60. sdp->sd_journals = 0;
  61. spin_unlock(&sdp->sd_jindex_spin);
  62. sdp->sd_jdesc = NULL;
  63. while (!list_empty(&list)) {
  64. jd = list_first_entry(&list, struct gfs2_jdesc, jd_list);
  65. gfs2_free_journal_extents(jd);
  66. list_del(&jd->jd_list);
  67. iput(jd->jd_inode);
  68. jd->jd_inode = NULL;
  69. kfree(jd);
  70. }
  71. }
  72. static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid)
  73. {
  74. struct gfs2_jdesc *jd;
  75. list_for_each_entry(jd, head, jd_list) {
  76. if (jd->jd_jid == jid)
  77. return jd;
  78. }
  79. return NULL;
  80. }
  81. struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid)
  82. {
  83. struct gfs2_jdesc *jd;
  84. spin_lock(&sdp->sd_jindex_spin);
  85. jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
  86. spin_unlock(&sdp->sd_jindex_spin);
  87. return jd;
  88. }
  89. int gfs2_jdesc_check(struct gfs2_jdesc *jd)
  90. {
  91. struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
  92. struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
  93. u64 size = i_size_read(jd->jd_inode);
  94. if (gfs2_check_internal_file_size(jd->jd_inode, 8 << 20, BIT(30)))
  95. return -EIO;
  96. jd->jd_blocks = size >> sdp->sd_sb.sb_bsize_shift;
  97. if (gfs2_write_alloc_required(ip, 0, size)) {
  98. gfs2_consist_inode(ip);
  99. return -EIO;
  100. }
  101. return 0;
  102. }
  103. /**
  104. * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one
  105. * @sdp: the filesystem
  106. *
  107. * Returns: errno
  108. */
  109. int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
  110. {
  111. struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
  112. struct gfs2_glock *j_gl = ip->i_gl;
  113. struct gfs2_log_header_host head;
  114. int error;
  115. j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
  116. if (gfs2_withdrawn(sdp))
  117. return -EIO;
  118. error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
  119. if (error) {
  120. gfs2_consist(sdp);
  121. return error;
  122. }
  123. if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
  124. gfs2_consist(sdp);
  125. return -EIO;
  126. }
  127. /* Initialize some head of the log stuff */
  128. sdp->sd_log_sequence = head.lh_sequence + 1;
  129. gfs2_log_pointers_init(sdp, head.lh_blkno);
  130. error = gfs2_quota_init(sdp);
  131. if (!error && gfs2_withdrawn(sdp))
  132. error = -EIO;
  133. if (!error)
  134. set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
  135. return error;
  136. }
  137. void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
  138. {
  139. const struct gfs2_statfs_change *str = buf;
  140. sc->sc_total = be64_to_cpu(str->sc_total);
  141. sc->sc_free = be64_to_cpu(str->sc_free);
  142. sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
  143. }
  144. void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
  145. {
  146. struct gfs2_statfs_change *str = buf;
  147. str->sc_total = cpu_to_be64(sc->sc_total);
  148. str->sc_free = cpu_to_be64(sc->sc_free);
  149. str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
  150. }
  151. int gfs2_statfs_init(struct gfs2_sbd *sdp)
  152. {
  153. struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
  154. struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
  155. struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
  156. struct buffer_head *m_bh;
  157. struct gfs2_holder gh;
  158. int error;
  159. error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
  160. &gh);
  161. if (error)
  162. return error;
  163. error = gfs2_meta_inode_buffer(m_ip, &m_bh);
  164. if (error)
  165. goto out;
  166. if (sdp->sd_args.ar_spectator) {
  167. spin_lock(&sdp->sd_statfs_spin);
  168. gfs2_statfs_change_in(m_sc, m_bh->b_data +
  169. sizeof(struct gfs2_dinode));
  170. spin_unlock(&sdp->sd_statfs_spin);
  171. } else {
  172. spin_lock(&sdp->sd_statfs_spin);
  173. gfs2_statfs_change_in(m_sc, m_bh->b_data +
  174. sizeof(struct gfs2_dinode));
  175. gfs2_statfs_change_in(l_sc, sdp->sd_sc_bh->b_data +
  176. sizeof(struct gfs2_dinode));
  177. spin_unlock(&sdp->sd_statfs_spin);
  178. }
  179. brelse(m_bh);
  180. out:
  181. gfs2_glock_dq_uninit(&gh);
  182. return 0;
  183. }
  184. void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
  185. s64 dinodes)
  186. {
  187. struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
  188. struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
  189. struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
  190. s64 x, y;
  191. int need_sync = 0;
  192. gfs2_trans_add_meta(l_ip->i_gl, sdp->sd_sc_bh);
  193. spin_lock(&sdp->sd_statfs_spin);
  194. l_sc->sc_total += total;
  195. l_sc->sc_free += free;
  196. l_sc->sc_dinodes += dinodes;
  197. gfs2_statfs_change_out(l_sc, sdp->sd_sc_bh->b_data +
  198. sizeof(struct gfs2_dinode));
  199. if (sdp->sd_args.ar_statfs_percent) {
  200. x = 100 * l_sc->sc_free;
  201. y = m_sc->sc_free * sdp->sd_args.ar_statfs_percent;
  202. if (x >= y || x <= -y)
  203. need_sync = 1;
  204. }
  205. spin_unlock(&sdp->sd_statfs_spin);
  206. if (need_sync)
  207. gfs2_wake_up_statfs(sdp);
  208. }
  209. void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh)
  210. {
  211. struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
  212. struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
  213. struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
  214. struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
  215. gfs2_trans_add_meta(l_ip->i_gl, sdp->sd_sc_bh);
  216. gfs2_trans_add_meta(m_ip->i_gl, m_bh);
  217. spin_lock(&sdp->sd_statfs_spin);
  218. m_sc->sc_total += l_sc->sc_total;
  219. m_sc->sc_free += l_sc->sc_free;
  220. m_sc->sc_dinodes += l_sc->sc_dinodes;
  221. memset(l_sc, 0, sizeof(struct gfs2_statfs_change));
  222. memset(sdp->sd_sc_bh->b_data + sizeof(struct gfs2_dinode),
  223. 0, sizeof(struct gfs2_statfs_change));
  224. gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
  225. spin_unlock(&sdp->sd_statfs_spin);
  226. }
  227. int gfs2_statfs_sync(struct super_block *sb, int type)
  228. {
  229. struct gfs2_sbd *sdp = sb->s_fs_info;
  230. struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
  231. struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
  232. struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
  233. struct gfs2_holder gh;
  234. struct buffer_head *m_bh;
  235. int error;
  236. error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
  237. &gh);
  238. if (error)
  239. goto out;
  240. error = gfs2_meta_inode_buffer(m_ip, &m_bh);
  241. if (error)
  242. goto out_unlock;
  243. spin_lock(&sdp->sd_statfs_spin);
  244. gfs2_statfs_change_in(m_sc, m_bh->b_data +
  245. sizeof(struct gfs2_dinode));
  246. if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) {
  247. spin_unlock(&sdp->sd_statfs_spin);
  248. goto out_bh;
  249. }
  250. spin_unlock(&sdp->sd_statfs_spin);
  251. error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
  252. if (error)
  253. goto out_bh;
  254. update_statfs(sdp, m_bh);
  255. sdp->sd_statfs_force_sync = 0;
  256. gfs2_trans_end(sdp);
  257. out_bh:
  258. brelse(m_bh);
  259. out_unlock:
  260. gfs2_glock_dq_uninit(&gh);
  261. out:
  262. return error;
  263. }
  264. struct lfcc {
  265. struct list_head list;
  266. struct gfs2_holder gh;
  267. };
  268. /**
  269. * gfs2_lock_fs_check_clean - Stop all writes to the FS and check that all
  270. * journals are clean
  271. * @sdp: the file system
  272. *
  273. * Returns: errno
  274. */
  275. static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
  276. {
  277. struct gfs2_inode *ip;
  278. struct gfs2_jdesc *jd;
  279. struct lfcc *lfcc;
  280. LIST_HEAD(list);
  281. struct gfs2_log_header_host lh;
  282. int error;
  283. list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
  284. lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
  285. if (!lfcc) {
  286. error = -ENOMEM;
  287. goto out;
  288. }
  289. ip = GFS2_I(jd->jd_inode);
  290. error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &lfcc->gh);
  291. if (error) {
  292. kfree(lfcc);
  293. goto out;
  294. }
  295. list_add(&lfcc->list, &list);
  296. }
  297. error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE,
  298. LM_FLAG_NOEXP | GL_NOPID,
  299. &sdp->sd_freeze_gh);
  300. if (error)
  301. goto out;
  302. list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
  303. error = gfs2_jdesc_check(jd);
  304. if (error)
  305. break;
  306. error = gfs2_find_jhead(jd, &lh, false);
  307. if (error)
  308. break;
  309. if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
  310. error = -EBUSY;
  311. break;
  312. }
  313. }
  314. if (error)
  315. gfs2_freeze_unlock(&sdp->sd_freeze_gh);
  316. out:
  317. while (!list_empty(&list)) {
  318. lfcc = list_first_entry(&list, struct lfcc, list);
  319. list_del(&lfcc->list);
  320. gfs2_glock_dq_uninit(&lfcc->gh);
  321. kfree(lfcc);
  322. }
  323. return error;
  324. }
  325. void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
  326. {
  327. const struct inode *inode = &ip->i_inode;
  328. struct gfs2_dinode *str = buf;
  329. str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
  330. str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
  331. str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
  332. str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
  333. str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
  334. str->di_mode = cpu_to_be32(inode->i_mode);
  335. str->di_uid = cpu_to_be32(i_uid_read(inode));
  336. str->di_gid = cpu_to_be32(i_gid_read(inode));
  337. str->di_nlink = cpu_to_be32(inode->i_nlink);
  338. str->di_size = cpu_to_be64(i_size_read(inode));
  339. str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(inode));
  340. str->di_atime = cpu_to_be64(inode->i_atime.tv_sec);
  341. str->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec);
  342. str->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec);
  343. str->di_goal_meta = cpu_to_be64(ip->i_goal);
  344. str->di_goal_data = cpu_to_be64(ip->i_goal);
  345. str->di_generation = cpu_to_be64(ip->i_generation);
  346. str->di_flags = cpu_to_be32(ip->i_diskflags);
  347. str->di_height = cpu_to_be16(ip->i_height);
  348. str->di_payload_format = cpu_to_be32(S_ISDIR(inode->i_mode) &&
  349. !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
  350. GFS2_FORMAT_DE : 0);
  351. str->di_depth = cpu_to_be16(ip->i_depth);
  352. str->di_entries = cpu_to_be32(ip->i_entries);
  353. str->di_eattr = cpu_to_be64(ip->i_eattr);
  354. str->di_atime_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
  355. str->di_mtime_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
  356. str->di_ctime_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
  357. }
  358. /**
  359. * gfs2_write_inode - Make sure the inode is stable on the disk
  360. * @inode: The inode
  361. * @wbc: The writeback control structure
  362. *
  363. * Returns: errno
  364. */
  365. static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
  366. {
  367. struct gfs2_inode *ip = GFS2_I(inode);
  368. struct gfs2_sbd *sdp = GFS2_SB(inode);
  369. struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
  370. struct backing_dev_info *bdi = inode_to_bdi(metamapping->host);
  371. int ret = 0;
  372. bool flush_all = (wbc->sync_mode == WB_SYNC_ALL || gfs2_is_jdata(ip));
  373. if (flush_all)
  374. gfs2_log_flush(GFS2_SB(inode), ip->i_gl,
  375. GFS2_LOG_HEAD_FLUSH_NORMAL |
  376. GFS2_LFC_WRITE_INODE);
  377. if (bdi->wb.dirty_exceeded)
  378. gfs2_ail1_flush(sdp, wbc);
  379. else
  380. filemap_fdatawrite(metamapping);
  381. if (flush_all)
  382. ret = filemap_fdatawait(metamapping);
  383. if (ret)
  384. mark_inode_dirty_sync(inode);
  385. else {
  386. spin_lock(&inode->i_lock);
  387. if (!(inode->i_flags & I_DIRTY))
  388. gfs2_ordered_del_inode(ip);
  389. spin_unlock(&inode->i_lock);
  390. }
  391. return ret;
  392. }
  393. /**
  394. * gfs2_dirty_inode - check for atime updates
  395. * @inode: The inode in question
  396. * @flags: The type of dirty
  397. *
  398. * Unfortunately it can be called under any combination of inode
  399. * glock and transaction lock, so we have to check carefully.
  400. *
  401. * At the moment this deals only with atime - it should be possible
  402. * to expand that role in future, once a review of the locking has
  403. * been carried out.
  404. */
  405. static void gfs2_dirty_inode(struct inode *inode, int flags)
  406. {
  407. struct gfs2_inode *ip = GFS2_I(inode);
  408. struct gfs2_sbd *sdp = GFS2_SB(inode);
  409. struct buffer_head *bh;
  410. struct gfs2_holder gh;
  411. int need_unlock = 0;
  412. int need_endtrans = 0;
  413. int ret;
  414. if (unlikely(gfs2_withdrawn(sdp)))
  415. return;
  416. if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
  417. ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
  418. if (ret) {
  419. fs_err(sdp, "dirty_inode: glock %d\n", ret);
  420. gfs2_dump_glock(NULL, ip->i_gl, true);
  421. return;
  422. }
  423. need_unlock = 1;
  424. } else if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE))
  425. return;
  426. if (current->journal_info == NULL) {
  427. ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
  428. if (ret) {
  429. fs_err(sdp, "dirty_inode: gfs2_trans_begin %d\n", ret);
  430. goto out;
  431. }
  432. need_endtrans = 1;
  433. }
  434. ret = gfs2_meta_inode_buffer(ip, &bh);
  435. if (ret == 0) {
  436. gfs2_trans_add_meta(ip->i_gl, bh);
  437. gfs2_dinode_out(ip, bh->b_data);
  438. brelse(bh);
  439. }
  440. if (need_endtrans)
  441. gfs2_trans_end(sdp);
  442. out:
  443. if (need_unlock)
  444. gfs2_glock_dq_uninit(&gh);
  445. }
  446. /**
  447. * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
  448. * @sdp: the filesystem
  449. *
  450. * Returns: errno
  451. */
  452. void gfs2_make_fs_ro(struct gfs2_sbd *sdp)
  453. {
  454. int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
  455. gfs2_flush_delete_work(sdp);
  456. if (!log_write_allowed && current == sdp->sd_quotad_process)
  457. fs_warn(sdp, "The quotad daemon is withdrawing.\n");
  458. else if (sdp->sd_quotad_process)
  459. kthread_stop(sdp->sd_quotad_process);
  460. sdp->sd_quotad_process = NULL;
  461. if (!log_write_allowed && current == sdp->sd_logd_process)
  462. fs_warn(sdp, "The logd daemon is withdrawing.\n");
  463. else if (sdp->sd_logd_process)
  464. kthread_stop(sdp->sd_logd_process);
  465. sdp->sd_logd_process = NULL;
  466. if (log_write_allowed) {
  467. gfs2_quota_sync(sdp->sd_vfs, 0);
  468. gfs2_statfs_sync(sdp->sd_vfs, 0);
  469. gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
  470. GFS2_LFC_MAKE_FS_RO);
  471. wait_event_timeout(sdp->sd_log_waitq,
  472. gfs2_log_is_empty(sdp),
  473. HZ * 5);
  474. gfs2_assert_warn(sdp, gfs2_log_is_empty(sdp));
  475. } else {
  476. wait_event_timeout(sdp->sd_log_waitq,
  477. gfs2_log_is_empty(sdp),
  478. HZ * 5);
  479. }
  480. gfs2_quota_cleanup(sdp);
  481. if (!log_write_allowed)
  482. sdp->sd_vfs->s_flags |= SB_RDONLY;
  483. }
  484. /**
  485. * gfs2_put_super - Unmount the filesystem
  486. * @sb: The VFS superblock
  487. *
  488. */
  489. static void gfs2_put_super(struct super_block *sb)
  490. {
  491. struct gfs2_sbd *sdp = sb->s_fs_info;
  492. struct gfs2_jdesc *jd;
  493. /* No more recovery requests */
  494. set_bit(SDF_NORECOVERY, &sdp->sd_flags);
  495. smp_mb();
  496. /* Wait on outstanding recovery */
  497. restart:
  498. spin_lock(&sdp->sd_jindex_spin);
  499. list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
  500. if (!test_bit(JDF_RECOVERY, &jd->jd_flags))
  501. continue;
  502. spin_unlock(&sdp->sd_jindex_spin);
  503. wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
  504. TASK_UNINTERRUPTIBLE);
  505. goto restart;
  506. }
  507. spin_unlock(&sdp->sd_jindex_spin);
  508. if (!sb_rdonly(sb)) {
  509. gfs2_make_fs_ro(sdp);
  510. }
  511. WARN_ON(gfs2_withdrawing(sdp));
  512. /* At this point, we're through modifying the disk */
  513. /* Release stuff */
  514. iput(sdp->sd_jindex);
  515. iput(sdp->sd_statfs_inode);
  516. iput(sdp->sd_rindex);
  517. iput(sdp->sd_quota_inode);
  518. gfs2_glock_put(sdp->sd_rename_gl);
  519. gfs2_glock_put(sdp->sd_freeze_gl);
  520. if (!sdp->sd_args.ar_spectator) {
  521. if (gfs2_holder_initialized(&sdp->sd_journal_gh))
  522. gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
  523. if (gfs2_holder_initialized(&sdp->sd_jinode_gh))
  524. gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
  525. brelse(sdp->sd_sc_bh);
  526. gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
  527. gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
  528. free_local_statfs_inodes(sdp);
  529. iput(sdp->sd_qc_inode);
  530. }
  531. gfs2_glock_dq_uninit(&sdp->sd_live_gh);
  532. gfs2_clear_rgrpd(sdp);
  533. gfs2_jindex_free(sdp);
  534. /* Take apart glock structures and buffer lists */
  535. gfs2_gl_hash_clear(sdp);
  536. truncate_inode_pages_final(&sdp->sd_aspace);
  537. gfs2_delete_debugfs_file(sdp);
  538. /* Unmount the locking protocol */
  539. gfs2_lm_unmount(sdp);
  540. /* At this point, we're through participating in the lockspace */
  541. gfs2_sys_fs_del(sdp);
  542. free_sbd(sdp);
  543. }
  544. /**
  545. * gfs2_sync_fs - sync the filesystem
  546. * @sb: the superblock
  547. * @wait: true to wait for completion
  548. *
  549. * Flushes the log to disk.
  550. */
  551. static int gfs2_sync_fs(struct super_block *sb, int wait)
  552. {
  553. struct gfs2_sbd *sdp = sb->s_fs_info;
  554. gfs2_quota_sync(sb, -1);
  555. if (wait)
  556. gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
  557. GFS2_LFC_SYNC_FS);
  558. return sdp->sd_log_error;
  559. }
  560. void gfs2_freeze_func(struct work_struct *work)
  561. {
  562. int error;
  563. struct gfs2_holder freeze_gh;
  564. struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_freeze_work);
  565. struct super_block *sb = sdp->sd_vfs;
  566. atomic_inc(&sb->s_active);
  567. error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
  568. if (error) {
  569. gfs2_assert_withdraw(sdp, 0);
  570. } else {
  571. atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
  572. error = thaw_super(sb);
  573. if (error) {
  574. fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n",
  575. error);
  576. gfs2_assert_withdraw(sdp, 0);
  577. }
  578. gfs2_freeze_unlock(&freeze_gh);
  579. }
  580. deactivate_super(sb);
  581. clear_bit_unlock(SDF_FS_FROZEN, &sdp->sd_flags);
  582. wake_up_bit(&sdp->sd_flags, SDF_FS_FROZEN);
  583. return;
  584. }
  585. /**
  586. * gfs2_freeze - prevent further writes to the filesystem
  587. * @sb: the VFS structure for the filesystem
  588. *
  589. */
  590. static int gfs2_freeze(struct super_block *sb)
  591. {
  592. struct gfs2_sbd *sdp = sb->s_fs_info;
  593. int error;
  594. mutex_lock(&sdp->sd_freeze_mutex);
  595. if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) {
  596. error = -EBUSY;
  597. goto out;
  598. }
  599. for (;;) {
  600. if (gfs2_withdrawn(sdp)) {
  601. error = -EINVAL;
  602. goto out;
  603. }
  604. error = gfs2_lock_fs_check_clean(sdp);
  605. if (!error)
  606. break;
  607. if (error == -EBUSY)
  608. fs_err(sdp, "waiting for recovery before freeze\n");
  609. else if (error == -EIO) {
  610. fs_err(sdp, "Fatal IO error: cannot freeze gfs2 due "
  611. "to recovery error.\n");
  612. goto out;
  613. } else {
  614. fs_err(sdp, "error freezing FS: %d\n", error);
  615. }
  616. fs_err(sdp, "retrying...\n");
  617. msleep(1000);
  618. }
  619. set_bit(SDF_FS_FROZEN, &sdp->sd_flags);
  620. out:
  621. mutex_unlock(&sdp->sd_freeze_mutex);
  622. return error;
  623. }
  624. /**
  625. * gfs2_unfreeze - reallow writes to the filesystem
  626. * @sb: the VFS structure for the filesystem
  627. *
  628. */
  629. static int gfs2_unfreeze(struct super_block *sb)
  630. {
  631. struct gfs2_sbd *sdp = sb->s_fs_info;
  632. mutex_lock(&sdp->sd_freeze_mutex);
  633. if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN ||
  634. !gfs2_holder_initialized(&sdp->sd_freeze_gh)) {
  635. mutex_unlock(&sdp->sd_freeze_mutex);
  636. return -EINVAL;
  637. }
  638. gfs2_freeze_unlock(&sdp->sd_freeze_gh);
  639. mutex_unlock(&sdp->sd_freeze_mutex);
  640. return wait_on_bit(&sdp->sd_flags, SDF_FS_FROZEN, TASK_INTERRUPTIBLE);
  641. }
  642. /**
  643. * statfs_slow_fill - fill in the sg for a given RG
  644. * @rgd: the RG
  645. * @sc: the sc structure
  646. *
  647. * Returns: 0 on success, -ESTALE if the LVB is invalid
  648. */
  649. static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
  650. struct gfs2_statfs_change_host *sc)
  651. {
  652. gfs2_rgrp_verify(rgd);
  653. sc->sc_total += rgd->rd_data;
  654. sc->sc_free += rgd->rd_free;
  655. sc->sc_dinodes += rgd->rd_dinodes;
  656. return 0;
  657. }
  658. /**
  659. * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
  660. * @sdp: the filesystem
  661. * @sc: the sc info that will be returned
  662. *
  663. * Any error (other than a signal) will cause this routine to fall back
  664. * to the synchronous version.
  665. *
  666. * FIXME: This really shouldn't busy wait like this.
  667. *
  668. * Returns: errno
  669. */
  670. static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
  671. {
  672. struct gfs2_rgrpd *rgd_next;
  673. struct gfs2_holder *gha, *gh;
  674. unsigned int slots = 64;
  675. unsigned int x;
  676. int done;
  677. int error = 0, err;
  678. memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
  679. gha = kmalloc_array(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
  680. if (!gha)
  681. return -ENOMEM;
  682. for (x = 0; x < slots; x++)
  683. gfs2_holder_mark_uninitialized(gha + x);
  684. rgd_next = gfs2_rgrpd_get_first(sdp);
  685. for (;;) {
  686. done = 1;
  687. for (x = 0; x < slots; x++) {
  688. gh = gha + x;
  689. if (gfs2_holder_initialized(gh) && gfs2_glock_poll(gh)) {
  690. err = gfs2_glock_wait(gh);
  691. if (err) {
  692. gfs2_holder_uninit(gh);
  693. error = err;
  694. } else {
  695. if (!error) {
  696. struct gfs2_rgrpd *rgd =
  697. gfs2_glock2rgrp(gh->gh_gl);
  698. error = statfs_slow_fill(rgd, sc);
  699. }
  700. gfs2_glock_dq_uninit(gh);
  701. }
  702. }
  703. if (gfs2_holder_initialized(gh))
  704. done = 0;
  705. else if (rgd_next && !error) {
  706. error = gfs2_glock_nq_init(rgd_next->rd_gl,
  707. LM_ST_SHARED,
  708. GL_ASYNC,
  709. gh);
  710. rgd_next = gfs2_rgrpd_get_next(rgd_next);
  711. done = 0;
  712. }
  713. if (signal_pending(current))
  714. error = -ERESTARTSYS;
  715. }
  716. if (done)
  717. break;
  718. yield();
  719. }
  720. kfree(gha);
  721. return error;
  722. }
  723. /**
  724. * gfs2_statfs_i - Do a statfs
  725. * @sdp: the filesystem
  726. * @sc: the sc structure
  727. *
  728. * Returns: errno
  729. */
  730. static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
  731. {
  732. struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
  733. struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
  734. spin_lock(&sdp->sd_statfs_spin);
  735. *sc = *m_sc;
  736. sc->sc_total += l_sc->sc_total;
  737. sc->sc_free += l_sc->sc_free;
  738. sc->sc_dinodes += l_sc->sc_dinodes;
  739. spin_unlock(&sdp->sd_statfs_spin);
  740. if (sc->sc_free < 0)
  741. sc->sc_free = 0;
  742. if (sc->sc_free > sc->sc_total)
  743. sc->sc_free = sc->sc_total;
  744. if (sc->sc_dinodes < 0)
  745. sc->sc_dinodes = 0;
  746. return 0;
  747. }
  748. /**
  749. * gfs2_statfs - Gather and return stats about the filesystem
  750. * @dentry: The name of the link
  751. * @buf: The buffer
  752. *
  753. * Returns: 0 on success or error code
  754. */
  755. static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
  756. {
  757. struct super_block *sb = dentry->d_sb;
  758. struct gfs2_sbd *sdp = sb->s_fs_info;
  759. struct gfs2_statfs_change_host sc;
  760. int error;
  761. error = gfs2_rindex_update(sdp);
  762. if (error)
  763. return error;
  764. if (gfs2_tune_get(sdp, gt_statfs_slow))
  765. error = gfs2_statfs_slow(sdp, &sc);
  766. else
  767. error = gfs2_statfs_i(sdp, &sc);
  768. if (error)
  769. return error;
  770. buf->f_type = GFS2_MAGIC;
  771. buf->f_bsize = sdp->sd_sb.sb_bsize;
  772. buf->f_blocks = sc.sc_total;
  773. buf->f_bfree = sc.sc_free;
  774. buf->f_bavail = sc.sc_free;
  775. buf->f_files = sc.sc_dinodes + sc.sc_free;
  776. buf->f_ffree = sc.sc_free;
  777. buf->f_namelen = GFS2_FNAMESIZE;
  778. return 0;
  779. }
  780. /**
  781. * gfs2_drop_inode - Drop an inode (test for remote unlink)
  782. * @inode: The inode to drop
  783. *
  784. * If we've received a callback on an iopen lock then it's because a
  785. * remote node tried to deallocate the inode but failed due to this node
  786. * still having the inode open. Here we mark the link count zero
  787. * since we know that it must have reached zero if the GLF_DEMOTE flag
  788. * is set on the iopen glock. If we didn't do a disk read since the
  789. * remote node removed the final link then we might otherwise miss
  790. * this event. This check ensures that this node will deallocate the
  791. * inode's blocks, or alternatively pass the baton on to another
  792. * node for later deallocation.
  793. */
  794. static int gfs2_drop_inode(struct inode *inode)
  795. {
  796. struct gfs2_inode *ip = GFS2_I(inode);
  797. if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) &&
  798. inode->i_nlink &&
  799. gfs2_holder_initialized(&ip->i_iopen_gh)) {
  800. struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
  801. if (test_bit(GLF_DEMOTE, &gl->gl_flags))
  802. clear_nlink(inode);
  803. }
  804. /*
  805. * When under memory pressure when an inode's link count has dropped to
  806. * zero, defer deleting the inode to the delete workqueue. This avoids
  807. * calling into DLM under memory pressure, which can deadlock.
  808. */
  809. if (!inode->i_nlink &&
  810. unlikely(current->flags & PF_MEMALLOC) &&
  811. gfs2_holder_initialized(&ip->i_iopen_gh)) {
  812. struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
  813. gfs2_glock_hold(gl);
  814. if (!gfs2_queue_delete_work(gl, 0))
  815. gfs2_glock_queue_put(gl);
  816. return 0;
  817. }
  818. return generic_drop_inode(inode);
  819. }
  820. static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
  821. {
  822. do {
  823. if (d1 == d2)
  824. return 1;
  825. d1 = d1->d_parent;
  826. } while (!IS_ROOT(d1));
  827. return 0;
  828. }
  829. /**
  830. * gfs2_show_options - Show mount options for /proc/mounts
  831. * @s: seq_file structure
  832. * @root: root of this (sub)tree
  833. *
  834. * Returns: 0 on success or error code
  835. */
  836. static int gfs2_show_options(struct seq_file *s, struct dentry *root)
  837. {
  838. struct gfs2_sbd *sdp = root->d_sb->s_fs_info;
  839. struct gfs2_args *args = &sdp->sd_args;
  840. unsigned int logd_secs, statfs_slow, statfs_quantum, quota_quantum;
  841. spin_lock(&sdp->sd_tune.gt_spin);
  842. logd_secs = sdp->sd_tune.gt_logd_secs;
  843. quota_quantum = sdp->sd_tune.gt_quota_quantum;
  844. statfs_quantum = sdp->sd_tune.gt_statfs_quantum;
  845. statfs_slow = sdp->sd_tune.gt_statfs_slow;
  846. spin_unlock(&sdp->sd_tune.gt_spin);
  847. if (is_ancestor(root, sdp->sd_master_dir))
  848. seq_puts(s, ",meta");
  849. if (args->ar_lockproto[0])
  850. seq_show_option(s, "lockproto", args->ar_lockproto);
  851. if (args->ar_locktable[0])
  852. seq_show_option(s, "locktable", args->ar_locktable);
  853. if (args->ar_hostdata[0])
  854. seq_show_option(s, "hostdata", args->ar_hostdata);
  855. if (args->ar_spectator)
  856. seq_puts(s, ",spectator");
  857. if (args->ar_localflocks)
  858. seq_puts(s, ",localflocks");
  859. if (args->ar_debug)
  860. seq_puts(s, ",debug");
  861. if (args->ar_posix_acl)
  862. seq_puts(s, ",acl");
  863. if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
  864. char *state;
  865. switch (args->ar_quota) {
  866. case GFS2_QUOTA_OFF:
  867. state = "off";
  868. break;
  869. case GFS2_QUOTA_ACCOUNT:
  870. state = "account";
  871. break;
  872. case GFS2_QUOTA_ON:
  873. state = "on";
  874. break;
  875. default:
  876. state = "unknown";
  877. break;
  878. }
  879. seq_printf(s, ",quota=%s", state);
  880. }
  881. if (args->ar_suiddir)
  882. seq_puts(s, ",suiddir");
  883. if (args->ar_data != GFS2_DATA_DEFAULT) {
  884. char *state;
  885. switch (args->ar_data) {
  886. case GFS2_DATA_WRITEBACK:
  887. state = "writeback";
  888. break;
  889. case GFS2_DATA_ORDERED:
  890. state = "ordered";
  891. break;
  892. default:
  893. state = "unknown";
  894. break;
  895. }
  896. seq_printf(s, ",data=%s", state);
  897. }
  898. if (args->ar_discard)
  899. seq_puts(s, ",discard");
  900. if (logd_secs != 30)
  901. seq_printf(s, ",commit=%d", logd_secs);
  902. if (statfs_quantum != 30)
  903. seq_printf(s, ",statfs_quantum=%d", statfs_quantum);
  904. else if (statfs_slow)
  905. seq_puts(s, ",statfs_quantum=0");
  906. if (quota_quantum != 60)
  907. seq_printf(s, ",quota_quantum=%d", quota_quantum);
  908. if (args->ar_statfs_percent)
  909. seq_printf(s, ",statfs_percent=%d", args->ar_statfs_percent);
  910. if (args->ar_errors != GFS2_ERRORS_DEFAULT) {
  911. const char *state;
  912. switch (args->ar_errors) {
  913. case GFS2_ERRORS_WITHDRAW:
  914. state = "withdraw";
  915. break;
  916. case GFS2_ERRORS_PANIC:
  917. state = "panic";
  918. break;
  919. default:
  920. state = "unknown";
  921. break;
  922. }
  923. seq_printf(s, ",errors=%s", state);
  924. }
  925. if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
  926. seq_puts(s, ",nobarrier");
  927. if (test_bit(SDF_DEMOTE, &sdp->sd_flags))
  928. seq_puts(s, ",demote_interface_used");
  929. if (args->ar_rgrplvb)
  930. seq_puts(s, ",rgrplvb");
  931. if (args->ar_loccookie)
  932. seq_puts(s, ",loccookie");
  933. return 0;
  934. }
  935. static void gfs2_final_release_pages(struct gfs2_inode *ip)
  936. {
  937. struct inode *inode = &ip->i_inode;
  938. struct gfs2_glock *gl = ip->i_gl;
  939. truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0);
  940. truncate_inode_pages(&inode->i_data, 0);
  941. if (atomic_read(&gl->gl_revokes) == 0) {
  942. clear_bit(GLF_LFLUSH, &gl->gl_flags);
  943. clear_bit(GLF_DIRTY, &gl->gl_flags);
  944. }
  945. }
  946. static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
  947. {
  948. struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
  949. struct gfs2_rgrpd *rgd;
  950. struct gfs2_holder gh;
  951. int error;
  952. if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
  953. gfs2_consist_inode(ip);
  954. return -EIO;
  955. }
  956. error = gfs2_rindex_update(sdp);
  957. if (error)
  958. return error;
  959. error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
  960. if (error)
  961. return error;
  962. rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
  963. if (!rgd) {
  964. gfs2_consist_inode(ip);
  965. error = -EIO;
  966. goto out_qs;
  967. }
  968. error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
  969. LM_FLAG_NODE_SCOPE, &gh);
  970. if (error)
  971. goto out_qs;
  972. error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA,
  973. sdp->sd_jdesc->jd_blocks);
  974. if (error)
  975. goto out_rg_gunlock;
  976. gfs2_free_di(rgd, ip);
  977. gfs2_final_release_pages(ip);
  978. gfs2_trans_end(sdp);
  979. out_rg_gunlock:
  980. gfs2_glock_dq_uninit(&gh);
  981. out_qs:
  982. gfs2_quota_unhold(ip);
  983. return error;
  984. }
  985. /**
  986. * gfs2_glock_put_eventually
  987. * @gl: The glock to put
  988. *
  989. * When under memory pressure, trigger a deferred glock put to make sure we
  990. * won't call into DLM and deadlock. Otherwise, put the glock directly.
  991. */
  992. static void gfs2_glock_put_eventually(struct gfs2_glock *gl)
  993. {
  994. if (current->flags & PF_MEMALLOC)
  995. gfs2_glock_queue_put(gl);
  996. else
  997. gfs2_glock_put(gl);
  998. }
  999. static bool gfs2_upgrade_iopen_glock(struct inode *inode)
  1000. {
  1001. struct gfs2_inode *ip = GFS2_I(inode);
  1002. struct gfs2_sbd *sdp = GFS2_SB(inode);
  1003. struct gfs2_holder *gh = &ip->i_iopen_gh;
  1004. long timeout = 5 * HZ;
  1005. int error;
  1006. gh->gh_flags |= GL_NOCACHE;
  1007. gfs2_glock_dq_wait(gh);
  1008. /*
  1009. * If there are no other lock holders, we'll get the lock immediately.
  1010. * Otherwise, the other nodes holding the lock will be notified about
  1011. * our locking request. If they don't have the inode open, they'll
  1012. * evict the cached inode and release the lock. Otherwise, if they
  1013. * poke the inode glock, we'll take this as an indication that they
  1014. * still need the iopen glock and that they'll take care of deleting
  1015. * the inode when they're done. As a last resort, if another node
  1016. * keeps holding the iopen glock without showing any activity on the
  1017. * inode glock, we'll eventually time out.
  1018. *
  1019. * Note that we're passing the LM_FLAG_TRY_1CB flag to the first
  1020. * locking request as an optimization to notify lock holders as soon as
  1021. * possible. Without that flag, they'd be notified implicitly by the
  1022. * second locking request.
  1023. */
  1024. gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, gh);
  1025. error = gfs2_glock_nq(gh);
  1026. if (error != GLR_TRYFAILED)
  1027. return !error;
  1028. gfs2_holder_reinit(LM_ST_EXCLUSIVE, GL_ASYNC | GL_NOCACHE, gh);
  1029. error = gfs2_glock_nq(gh);
  1030. if (error)
  1031. return false;
  1032. timeout = wait_event_interruptible_timeout(sdp->sd_async_glock_wait,
  1033. !test_bit(HIF_WAIT, &gh->gh_iflags) ||
  1034. test_bit(GLF_DEMOTE, &ip->i_gl->gl_flags),
  1035. timeout);
  1036. if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) {
  1037. gfs2_glock_dq(gh);
  1038. return false;
  1039. }
  1040. return gfs2_glock_holder_ready(gh) == 0;
  1041. }
  1042. /**
  1043. * evict_should_delete - determine whether the inode is eligible for deletion
  1044. * @inode: The inode to evict
  1045. * @gh: The glock holder structure
  1046. *
  1047. * This function determines whether the evicted inode is eligible to be deleted
  1048. * and locks the inode glock.
  1049. *
  1050. * Returns: the fate of the dinode
  1051. */
  1052. static enum dinode_demise evict_should_delete(struct inode *inode,
  1053. struct gfs2_holder *gh)
  1054. {
  1055. struct gfs2_inode *ip = GFS2_I(inode);
  1056. struct super_block *sb = inode->i_sb;
  1057. struct gfs2_sbd *sdp = sb->s_fs_info;
  1058. int ret;
  1059. if (test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) {
  1060. BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));
  1061. goto should_delete;
  1062. }
  1063. if (test_bit(GIF_DEFERRED_DELETE, &ip->i_flags))
  1064. return SHOULD_DEFER_EVICTION;
  1065. /* Deletes should never happen under memory pressure anymore. */
  1066. if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
  1067. return SHOULD_DEFER_EVICTION;
  1068. /* Must not read inode block until block type has been verified */
  1069. ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, gh);
  1070. if (unlikely(ret)) {
  1071. glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
  1072. ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
  1073. gfs2_glock_dq_uninit(&ip->i_iopen_gh);
  1074. return SHOULD_DEFER_EVICTION;
  1075. }
  1076. if (gfs2_inode_already_deleted(ip->i_gl, ip->i_no_formal_ino))
  1077. return SHOULD_NOT_DELETE_DINODE;
  1078. ret = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
  1079. if (ret)
  1080. return SHOULD_NOT_DELETE_DINODE;
  1081. ret = gfs2_instantiate(gh);
  1082. if (ret)
  1083. return SHOULD_NOT_DELETE_DINODE;
  1084. /*
  1085. * The inode may have been recreated in the meantime.
  1086. */
  1087. if (inode->i_nlink)
  1088. return SHOULD_NOT_DELETE_DINODE;
  1089. should_delete:
  1090. if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
  1091. test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
  1092. if (!gfs2_upgrade_iopen_glock(inode)) {
  1093. gfs2_holder_uninit(&ip->i_iopen_gh);
  1094. return SHOULD_NOT_DELETE_DINODE;
  1095. }
  1096. }
  1097. return SHOULD_DELETE_DINODE;
  1098. }
  1099. /**
  1100. * evict_unlinked_inode - delete the pieces of an unlinked evicted inode
  1101. * @inode: The inode to evict
  1102. */
  1103. static int evict_unlinked_inode(struct inode *inode)
  1104. {
  1105. struct gfs2_inode *ip = GFS2_I(inode);
  1106. int ret;
  1107. if (S_ISDIR(inode->i_mode) &&
  1108. (ip->i_diskflags & GFS2_DIF_EXHASH)) {
  1109. ret = gfs2_dir_exhash_dealloc(ip);
  1110. if (ret)
  1111. goto out;
  1112. }
  1113. if (ip->i_eattr) {
  1114. ret = gfs2_ea_dealloc(ip);
  1115. if (ret)
  1116. goto out;
  1117. }
  1118. if (!gfs2_is_stuffed(ip)) {
  1119. ret = gfs2_file_dealloc(ip);
  1120. if (ret)
  1121. goto out;
  1122. }
  1123. /* We're about to clear the bitmap for the dinode, but as soon as we
  1124. do, gfs2_create_inode can create another inode at the same block
  1125. location and try to set gl_object again. We clear gl_object here so
  1126. that subsequent inode creates don't see an old gl_object. */
  1127. glock_clear_object(ip->i_gl, ip);
  1128. ret = gfs2_dinode_dealloc(ip);
  1129. gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino);
  1130. out:
  1131. return ret;
  1132. }
  1133. /*
  1134. * evict_linked_inode - evict an inode whose dinode has not been unlinked
  1135. * @inode: The inode to evict
  1136. */
  1137. static int evict_linked_inode(struct inode *inode)
  1138. {
  1139. struct super_block *sb = inode->i_sb;
  1140. struct gfs2_sbd *sdp = sb->s_fs_info;
  1141. struct gfs2_inode *ip = GFS2_I(inode);
  1142. struct address_space *metamapping;
  1143. int ret;
  1144. gfs2_log_flush(sdp, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
  1145. GFS2_LFC_EVICT_INODE);
  1146. metamapping = gfs2_glock2aspace(ip->i_gl);
  1147. if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) {
  1148. filemap_fdatawrite(metamapping);
  1149. filemap_fdatawait(metamapping);
  1150. }
  1151. write_inode_now(inode, 1);
  1152. gfs2_ail_flush(ip->i_gl, 0);
  1153. ret = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
  1154. if (ret)
  1155. return ret;
  1156. /* Needs to be done before glock release & also in a transaction */
  1157. truncate_inode_pages(&inode->i_data, 0);
  1158. truncate_inode_pages(metamapping, 0);
  1159. gfs2_trans_end(sdp);
  1160. return 0;
  1161. }
  1162. /**
  1163. * gfs2_evict_inode - Remove an inode from cache
  1164. * @inode: The inode to evict
  1165. *
  1166. * There are three cases to consider:
  1167. * 1. i_nlink == 0, we are final opener (and must deallocate)
  1168. * 2. i_nlink == 0, we are not the final opener (and cannot deallocate)
  1169. * 3. i_nlink > 0
  1170. *
  1171. * If the fs is read only, then we have to treat all cases as per #3
  1172. * since we are unable to do any deallocation. The inode will be
  1173. * deallocated by the next read/write node to attempt an allocation
  1174. * in the same resource group
  1175. *
  1176. * We have to (at the moment) hold the inodes main lock to cover
  1177. * the gap between unlocking the shared lock on the iopen lock and
  1178. * taking the exclusive lock. I'd rather do a shared -> exclusive
  1179. * conversion on the iopen lock, but we can change that later. This
  1180. * is safe, just less efficient.
  1181. */
  1182. static void gfs2_evict_inode(struct inode *inode)
  1183. {
  1184. struct super_block *sb = inode->i_sb;
  1185. struct gfs2_sbd *sdp = sb->s_fs_info;
  1186. struct gfs2_inode *ip = GFS2_I(inode);
  1187. struct gfs2_holder gh;
  1188. int ret;
  1189. if (test_bit(GIF_FREE_VFS_INODE, &ip->i_flags)) {
  1190. clear_inode(inode);
  1191. return;
  1192. }
  1193. if (inode->i_nlink || sb_rdonly(sb))
  1194. goto out;
  1195. /*
  1196. * In case of an incomplete mount, gfs2_evict_inode() may be called for
  1197. * system files without having an active journal to write to. In that
  1198. * case, skip the filesystem evict.
  1199. */
  1200. if (!sdp->sd_jdesc)
  1201. goto out;
  1202. gfs2_holder_mark_uninitialized(&gh);
  1203. ret = evict_should_delete(inode, &gh);
  1204. if (ret == SHOULD_DEFER_EVICTION)
  1205. goto out;
  1206. if (ret == SHOULD_DELETE_DINODE)
  1207. ret = evict_unlinked_inode(inode);
  1208. else
  1209. ret = evict_linked_inode(inode);
  1210. if (gfs2_rs_active(&ip->i_res))
  1211. gfs2_rs_deltree(&ip->i_res);
  1212. if (gfs2_holder_initialized(&gh)) {
  1213. glock_clear_object(ip->i_gl, ip);
  1214. gfs2_glock_dq_uninit(&gh);
  1215. }
  1216. if (ret && ret != GLR_TRYFAILED && ret != -EROFS)
  1217. fs_warn(sdp, "gfs2_evict_inode: %d\n", ret);
  1218. out:
  1219. truncate_inode_pages_final(&inode->i_data);
  1220. if (ip->i_qadata)
  1221. gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0);
  1222. gfs2_rs_deltree(&ip->i_res);
  1223. gfs2_ordered_del_inode(ip);
  1224. clear_inode(inode);
  1225. gfs2_dir_hash_inval(ip);
  1226. if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
  1227. struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
  1228. glock_clear_object(gl, ip);
  1229. if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
  1230. ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
  1231. gfs2_glock_dq(&ip->i_iopen_gh);
  1232. }
  1233. gfs2_glock_hold(gl);
  1234. gfs2_holder_uninit(&ip->i_iopen_gh);
  1235. gfs2_glock_put_eventually(gl);
  1236. }
  1237. if (ip->i_gl) {
  1238. glock_clear_object(ip->i_gl, ip);
  1239. wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
  1240. gfs2_glock_add_to_lru(ip->i_gl);
  1241. gfs2_glock_put_eventually(ip->i_gl);
  1242. rcu_assign_pointer(ip->i_gl, NULL);
  1243. }
  1244. }
  1245. static struct inode *gfs2_alloc_inode(struct super_block *sb)
  1246. {
  1247. struct gfs2_inode *ip;
  1248. ip = alloc_inode_sb(sb, gfs2_inode_cachep, GFP_KERNEL);
  1249. if (!ip)
  1250. return NULL;
  1251. ip->i_flags = 0;
  1252. ip->i_gl = NULL;
  1253. gfs2_holder_mark_uninitialized(&ip->i_iopen_gh);
  1254. memset(&ip->i_res, 0, sizeof(ip->i_res));
  1255. RB_CLEAR_NODE(&ip->i_res.rs_node);
  1256. ip->i_rahead = 0;
  1257. return &ip->i_inode;
  1258. }
  1259. static void gfs2_free_inode(struct inode *inode)
  1260. {
  1261. kmem_cache_free(gfs2_inode_cachep, GFS2_I(inode));
  1262. }
  1263. extern void free_local_statfs_inodes(struct gfs2_sbd *sdp)
  1264. {
  1265. struct local_statfs_inode *lsi, *safe;
  1266. /* Run through the statfs inodes list to iput and free memory */
  1267. list_for_each_entry_safe(lsi, safe, &sdp->sd_sc_inodes_list, si_list) {
  1268. if (lsi->si_jid == sdp->sd_jdesc->jd_jid)
  1269. sdp->sd_sc_inode = NULL; /* belongs to this node */
  1270. if (lsi->si_sc_inode)
  1271. iput(lsi->si_sc_inode);
  1272. list_del(&lsi->si_list);
  1273. kfree(lsi);
  1274. }
  1275. }
  1276. extern struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp,
  1277. unsigned int index)
  1278. {
  1279. struct local_statfs_inode *lsi;
  1280. /* Return the local (per node) statfs inode in the
  1281. * sdp->sd_sc_inodes_list corresponding to the 'index'. */
  1282. list_for_each_entry(lsi, &sdp->sd_sc_inodes_list, si_list) {
  1283. if (lsi->si_jid == index)
  1284. return lsi->si_sc_inode;
  1285. }
  1286. return NULL;
  1287. }
  1288. const struct super_operations gfs2_super_ops = {
  1289. .alloc_inode = gfs2_alloc_inode,
  1290. .free_inode = gfs2_free_inode,
  1291. .write_inode = gfs2_write_inode,
  1292. .dirty_inode = gfs2_dirty_inode,
  1293. .evict_inode = gfs2_evict_inode,
  1294. .put_super = gfs2_put_super,
  1295. .sync_fs = gfs2_sync_fs,
  1296. .freeze_super = gfs2_freeze,
  1297. .thaw_super = gfs2_unfreeze,
  1298. .statfs = gfs2_statfs,
  1299. .drop_inode = gfs2_drop_inode,
  1300. .show_options = gfs2_show_options,
  1301. };