parent.c 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334
  1. // SPDX-License-Identifier: GPL-2.0+
  2. /*
  3. * Copyright (C) 2017 Oracle. All Rights Reserved.
  4. * Author: Darrick J. Wong <[email protected]>
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_shared.h"
  9. #include "xfs_format.h"
  10. #include "xfs_trans_resv.h"
  11. #include "xfs_mount.h"
  12. #include "xfs_log_format.h"
  13. #include "xfs_inode.h"
  14. #include "xfs_icache.h"
  15. #include "xfs_dir2.h"
  16. #include "xfs_dir2_priv.h"
  17. #include "scrub/scrub.h"
  18. #include "scrub/common.h"
  19. /* Set us up to scrub parents. */
  20. int
  21. xchk_setup_parent(
  22. struct xfs_scrub *sc)
  23. {
  24. return xchk_setup_inode_contents(sc, 0);
  25. }
  26. /* Parent pointers */
  27. /* Look for an entry in a parent pointing to this inode. */
  28. struct xchk_parent_ctx {
  29. struct dir_context dc;
  30. struct xfs_scrub *sc;
  31. xfs_ino_t ino;
  32. xfs_nlink_t nlink;
  33. bool cancelled;
  34. };
  35. /* Look for a single entry in a directory pointing to an inode. */
  36. STATIC bool
  37. xchk_parent_actor(
  38. struct dir_context *dc,
  39. const char *name,
  40. int namelen,
  41. loff_t pos,
  42. u64 ino,
  43. unsigned type)
  44. {
  45. struct xchk_parent_ctx *spc;
  46. int error = 0;
  47. spc = container_of(dc, struct xchk_parent_ctx, dc);
  48. if (spc->ino == ino)
  49. spc->nlink++;
  50. /*
  51. * If we're facing a fatal signal, bail out. Store the cancellation
  52. * status separately because the VFS readdir code squashes error codes
  53. * into short directory reads.
  54. */
  55. if (xchk_should_terminate(spc->sc, &error))
  56. spc->cancelled = true;
  57. return !error;
  58. }
  59. /* Count the number of dentries in the parent dir that point to this inode. */
  60. STATIC int
  61. xchk_parent_count_parent_dentries(
  62. struct xfs_scrub *sc,
  63. struct xfs_inode *parent,
  64. xfs_nlink_t *nlink)
  65. {
  66. struct xchk_parent_ctx spc = {
  67. .dc.actor = xchk_parent_actor,
  68. .ino = sc->ip->i_ino,
  69. .sc = sc,
  70. };
  71. size_t bufsize;
  72. loff_t oldpos;
  73. uint lock_mode;
  74. int error = 0;
  75. /*
  76. * If there are any blocks, read-ahead block 0 as we're almost
  77. * certain to have the next operation be a read there. This is
  78. * how we guarantee that the parent's extent map has been loaded,
  79. * if there is one.
  80. */
  81. lock_mode = xfs_ilock_data_map_shared(parent);
  82. if (parent->i_df.if_nextents > 0)
  83. error = xfs_dir3_data_readahead(parent, 0, 0);
  84. xfs_iunlock(parent, lock_mode);
  85. if (error)
  86. return error;
  87. /*
  88. * Iterate the parent dir to confirm that there is
  89. * exactly one entry pointing back to the inode being
  90. * scanned.
  91. */
  92. bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE,
  93. parent->i_disk_size);
  94. oldpos = 0;
  95. while (true) {
  96. error = xfs_readdir(sc->tp, parent, &spc.dc, bufsize);
  97. if (error)
  98. goto out;
  99. if (spc.cancelled) {
  100. error = -EAGAIN;
  101. goto out;
  102. }
  103. if (oldpos == spc.dc.pos)
  104. break;
  105. oldpos = spc.dc.pos;
  106. }
  107. *nlink = spc.nlink;
  108. out:
  109. return error;
  110. }
  111. /*
  112. * Given the inode number of the alleged parent of the inode being
  113. * scrubbed, try to validate that the parent has exactly one directory
  114. * entry pointing back to the inode being scrubbed.
  115. */
  116. STATIC int
  117. xchk_parent_validate(
  118. struct xfs_scrub *sc,
  119. xfs_ino_t dnum,
  120. bool *try_again)
  121. {
  122. struct xfs_mount *mp = sc->mp;
  123. struct xfs_inode *dp = NULL;
  124. xfs_nlink_t expected_nlink;
  125. xfs_nlink_t nlink;
  126. int error = 0;
  127. *try_again = false;
  128. if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
  129. goto out;
  130. /* '..' must not point to ourselves. */
  131. if (sc->ip->i_ino == dnum) {
  132. xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
  133. goto out;
  134. }
  135. /*
  136. * If we're an unlinked directory, the parent /won't/ have a link
  137. * to us. Otherwise, it should have one link.
  138. */
  139. expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1;
  140. /*
  141. * Grab this parent inode. We release the inode before we
  142. * cancel the scrub transaction. Since we're don't know a
  143. * priori that releasing the inode won't trigger eofblocks
  144. * cleanup (which allocates what would be a nested transaction)
  145. * if the parent pointer erroneously points to a file, we
  146. * can't use DONTCACHE here because DONTCACHE inodes can trigger
  147. * immediate inactive cleanup of the inode.
  148. *
  149. * If _iget returns -EINVAL or -ENOENT then the parent inode number is
  150. * garbage and the directory is corrupt. If the _iget returns
  151. * -EFSCORRUPTED or -EFSBADCRC then the parent is corrupt which is a
  152. * cross referencing error. Any other error is an operational error.
  153. */
  154. error = xfs_iget(mp, sc->tp, dnum, XFS_IGET_UNTRUSTED, 0, &dp);
  155. if (error == -EINVAL || error == -ENOENT) {
  156. error = -EFSCORRUPTED;
  157. xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error);
  158. goto out;
  159. }
  160. if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
  161. goto out;
  162. if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) {
  163. xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
  164. goto out_rele;
  165. }
  166. /*
  167. * We prefer to keep the inode locked while we lock and search
  168. * its alleged parent for a forward reference. If we can grab
  169. * the iolock, validate the pointers and we're done. We must
  170. * use nowait here to avoid an ABBA deadlock on the parent and
  171. * the child inodes.
  172. */
  173. if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) {
  174. error = xchk_parent_count_parent_dentries(sc, dp, &nlink);
  175. if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
  176. &error))
  177. goto out_unlock;
  178. if (nlink != expected_nlink)
  179. xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
  180. goto out_unlock;
  181. }
  182. /*
  183. * The game changes if we get here. We failed to lock the parent,
  184. * so we're going to try to verify both pointers while only holding
  185. * one lock so as to avoid deadlocking with something that's actually
  186. * trying to traverse down the directory tree.
  187. */
  188. xfs_iunlock(sc->ip, sc->ilock_flags);
  189. sc->ilock_flags = 0;
  190. error = xchk_ilock_inverted(dp, XFS_IOLOCK_SHARED);
  191. if (error)
  192. goto out_rele;
  193. /* Go looking for our dentry. */
  194. error = xchk_parent_count_parent_dentries(sc, dp, &nlink);
  195. if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
  196. goto out_unlock;
  197. /* Drop the parent lock, relock this inode. */
  198. xfs_iunlock(dp, XFS_IOLOCK_SHARED);
  199. error = xchk_ilock_inverted(sc->ip, XFS_IOLOCK_EXCL);
  200. if (error)
  201. goto out_rele;
  202. sc->ilock_flags = XFS_IOLOCK_EXCL;
  203. /*
  204. * If we're an unlinked directory, the parent /won't/ have a link
  205. * to us. Otherwise, it should have one link. We have to re-set
  206. * it here because we dropped the lock on sc->ip.
  207. */
  208. expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1;
  209. /* Look up '..' to see if the inode changed. */
  210. error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL);
  211. if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
  212. goto out_rele;
  213. /* Drat, parent changed. Try again! */
  214. if (dnum != dp->i_ino) {
  215. xfs_irele(dp);
  216. *try_again = true;
  217. return 0;
  218. }
  219. xfs_irele(dp);
  220. /*
  221. * '..' didn't change, so check that there was only one entry
  222. * for us in the parent.
  223. */
  224. if (nlink != expected_nlink)
  225. xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
  226. return error;
  227. out_unlock:
  228. xfs_iunlock(dp, XFS_IOLOCK_SHARED);
  229. out_rele:
  230. xfs_irele(dp);
  231. out:
  232. return error;
  233. }
  234. /* Scrub a parent pointer. */
  235. int
  236. xchk_parent(
  237. struct xfs_scrub *sc)
  238. {
  239. struct xfs_mount *mp = sc->mp;
  240. xfs_ino_t dnum;
  241. bool try_again;
  242. int tries = 0;
  243. int error = 0;
  244. /*
  245. * If we're a directory, check that the '..' link points up to
  246. * a directory that has one entry pointing to us.
  247. */
  248. if (!S_ISDIR(VFS_I(sc->ip)->i_mode))
  249. return -ENOENT;
  250. /* We're not a special inode, are we? */
  251. if (!xfs_verify_dir_ino(mp, sc->ip->i_ino)) {
  252. xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
  253. goto out;
  254. }
  255. /*
  256. * The VFS grabs a read or write lock via i_rwsem before it reads
  257. * or writes to a directory. If we've gotten this far we've
  258. * already obtained IOLOCK_EXCL, which (since 4.10) is the same as
  259. * getting a write lock on i_rwsem. Therefore, it is safe for us
  260. * to drop the ILOCK here in order to do directory lookups.
  261. */
  262. sc->ilock_flags &= ~(XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL);
  263. xfs_iunlock(sc->ip, XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL);
  264. /* Look up '..' */
  265. error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL);
  266. if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
  267. goto out;
  268. if (!xfs_verify_dir_ino(mp, dnum)) {
  269. xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
  270. goto out;
  271. }
  272. /* Is this the root dir? Then '..' must point to itself. */
  273. if (sc->ip == mp->m_rootip) {
  274. if (sc->ip->i_ino != mp->m_sb.sb_rootino ||
  275. sc->ip->i_ino != dnum)
  276. xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
  277. goto out;
  278. }
  279. do {
  280. error = xchk_parent_validate(sc, dnum, &try_again);
  281. if (error)
  282. goto out;
  283. } while (try_again && ++tries < 20);
  284. /*
  285. * We gave it our best shot but failed, so mark this scrub
  286. * incomplete. Userspace can decide if it wants to try again.
  287. */
  288. if (try_again && tries == 20)
  289. xchk_set_incomplete(sc);
  290. out:
  291. /*
  292. * If we failed to lock the parent inode even after a retry, just mark
  293. * this scrub incomplete and return.
  294. */
  295. if ((sc->flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) {
  296. error = 0;
  297. xchk_set_incomplete(sc);
  298. }
  299. return error;
  300. }