common.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867
  1. // SPDX-License-Identifier: GPL-2.0+
  2. /*
  3. * Copyright (C) 2017 Oracle. All Rights Reserved.
  4. * Author: Darrick J. Wong <[email protected]>
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_shared.h"
  9. #include "xfs_format.h"
  10. #include "xfs_trans_resv.h"
  11. #include "xfs_mount.h"
  12. #include "xfs_btree.h"
  13. #include "xfs_log_format.h"
  14. #include "xfs_trans.h"
  15. #include "xfs_inode.h"
  16. #include "xfs_icache.h"
  17. #include "xfs_alloc.h"
  18. #include "xfs_alloc_btree.h"
  19. #include "xfs_ialloc.h"
  20. #include "xfs_ialloc_btree.h"
  21. #include "xfs_refcount_btree.h"
  22. #include "xfs_rmap.h"
  23. #include "xfs_rmap_btree.h"
  24. #include "xfs_log.h"
  25. #include "xfs_trans_priv.h"
  26. #include "xfs_da_format.h"
  27. #include "xfs_da_btree.h"
  28. #include "xfs_attr.h"
  29. #include "xfs_reflink.h"
  30. #include "xfs_ag.h"
  31. #include "scrub/scrub.h"
  32. #include "scrub/common.h"
  33. #include "scrub/trace.h"
  34. #include "scrub/repair.h"
  35. #include "scrub/health.h"
  36. /* Common code for the metadata scrubbers. */
  37. /*
  38. * Handling operational errors.
  39. *
  40. * The *_process_error() family of functions are used to process error return
  41. * codes from functions called as part of a scrub operation.
  42. *
  43. * If there's no error, we return true to tell the caller that it's ok
  44. * to move on to the next check in its list.
  45. *
  46. * For non-verifier errors (e.g. ENOMEM) we return false to tell the
  47. * caller that something bad happened, and we preserve *error so that
  48. * the caller can return the *error up the stack to userspace.
  49. *
  50. * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting
  51. * OFLAG_CORRUPT in sm_flags and the *error is cleared. In other words,
  52. * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT,
  53. * not via return codes. We return false to tell the caller that
  54. * something bad happened. Since the error has been cleared, the caller
  55. * will (presumably) return that zero and scrubbing will move on to
  56. * whatever's next.
  57. *
  58. * ftrace can be used to record the precise metadata location and the
  59. * approximate code location of the failed operation.
  60. */
  61. /* Check for operational errors. */
  62. static bool
  63. __xchk_process_error(
  64. struct xfs_scrub *sc,
  65. xfs_agnumber_t agno,
  66. xfs_agblock_t bno,
  67. int *error,
  68. __u32 errflag,
  69. void *ret_ip)
  70. {
  71. switch (*error) {
  72. case 0:
  73. return true;
  74. case -EDEADLOCK:
  75. /* Used to restart an op with deadlock avoidance. */
  76. trace_xchk_deadlock_retry(
  77. sc->ip ? sc->ip : XFS_I(file_inode(sc->file)),
  78. sc->sm, *error);
  79. break;
  80. case -EFSBADCRC:
  81. case -EFSCORRUPTED:
  82. /* Note the badness but don't abort. */
  83. sc->sm->sm_flags |= errflag;
  84. *error = 0;
  85. fallthrough;
  86. default:
  87. trace_xchk_op_error(sc, agno, bno, *error,
  88. ret_ip);
  89. break;
  90. }
  91. return false;
  92. }
  93. bool
  94. xchk_process_error(
  95. struct xfs_scrub *sc,
  96. xfs_agnumber_t agno,
  97. xfs_agblock_t bno,
  98. int *error)
  99. {
  100. return __xchk_process_error(sc, agno, bno, error,
  101. XFS_SCRUB_OFLAG_CORRUPT, __return_address);
  102. }
  103. bool
  104. xchk_xref_process_error(
  105. struct xfs_scrub *sc,
  106. xfs_agnumber_t agno,
  107. xfs_agblock_t bno,
  108. int *error)
  109. {
  110. return __xchk_process_error(sc, agno, bno, error,
  111. XFS_SCRUB_OFLAG_XFAIL, __return_address);
  112. }
  113. /* Check for operational errors for a file offset. */
  114. static bool
  115. __xchk_fblock_process_error(
  116. struct xfs_scrub *sc,
  117. int whichfork,
  118. xfs_fileoff_t offset,
  119. int *error,
  120. __u32 errflag,
  121. void *ret_ip)
  122. {
  123. switch (*error) {
  124. case 0:
  125. return true;
  126. case -EDEADLOCK:
  127. /* Used to restart an op with deadlock avoidance. */
  128. trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
  129. break;
  130. case -EFSBADCRC:
  131. case -EFSCORRUPTED:
  132. /* Note the badness but don't abort. */
  133. sc->sm->sm_flags |= errflag;
  134. *error = 0;
  135. fallthrough;
  136. default:
  137. trace_xchk_file_op_error(sc, whichfork, offset, *error,
  138. ret_ip);
  139. break;
  140. }
  141. return false;
  142. }
  143. bool
  144. xchk_fblock_process_error(
  145. struct xfs_scrub *sc,
  146. int whichfork,
  147. xfs_fileoff_t offset,
  148. int *error)
  149. {
  150. return __xchk_fblock_process_error(sc, whichfork, offset, error,
  151. XFS_SCRUB_OFLAG_CORRUPT, __return_address);
  152. }
  153. bool
  154. xchk_fblock_xref_process_error(
  155. struct xfs_scrub *sc,
  156. int whichfork,
  157. xfs_fileoff_t offset,
  158. int *error)
  159. {
  160. return __xchk_fblock_process_error(sc, whichfork, offset, error,
  161. XFS_SCRUB_OFLAG_XFAIL, __return_address);
  162. }
  163. /*
  164. * Handling scrub corruption/optimization/warning checks.
  165. *
  166. * The *_set_{corrupt,preen,warning}() family of functions are used to
  167. * record the presence of metadata that is incorrect (corrupt), could be
  168. * optimized somehow (preen), or should be flagged for administrative
  169. * review but is not incorrect (warn).
  170. *
  171. * ftrace can be used to record the precise metadata location and
  172. * approximate code location of the failed check.
  173. */
  174. /* Record a block which could be optimized. */
  175. void
  176. xchk_block_set_preen(
  177. struct xfs_scrub *sc,
  178. struct xfs_buf *bp)
  179. {
  180. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
  181. trace_xchk_block_preen(sc, xfs_buf_daddr(bp), __return_address);
  182. }
  183. /*
  184. * Record an inode which could be optimized. The trace data will
  185. * include the block given by bp if bp is given; otherwise it will use
  186. * the block location of the inode record itself.
  187. */
  188. void
  189. xchk_ino_set_preen(
  190. struct xfs_scrub *sc,
  191. xfs_ino_t ino)
  192. {
  193. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
  194. trace_xchk_ino_preen(sc, ino, __return_address);
  195. }
  196. /* Record something being wrong with the filesystem primary superblock. */
  197. void
  198. xchk_set_corrupt(
  199. struct xfs_scrub *sc)
  200. {
  201. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  202. trace_xchk_fs_error(sc, 0, __return_address);
  203. }
  204. /* Record a corrupt block. */
  205. void
  206. xchk_block_set_corrupt(
  207. struct xfs_scrub *sc,
  208. struct xfs_buf *bp)
  209. {
  210. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  211. trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address);
  212. }
  213. /* Record a corruption while cross-referencing. */
  214. void
  215. xchk_block_xref_set_corrupt(
  216. struct xfs_scrub *sc,
  217. struct xfs_buf *bp)
  218. {
  219. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
  220. trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address);
  221. }
  222. /*
  223. * Record a corrupt inode. The trace data will include the block given
  224. * by bp if bp is given; otherwise it will use the block location of the
  225. * inode record itself.
  226. */
  227. void
  228. xchk_ino_set_corrupt(
  229. struct xfs_scrub *sc,
  230. xfs_ino_t ino)
  231. {
  232. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  233. trace_xchk_ino_error(sc, ino, __return_address);
  234. }
  235. /* Record a corruption while cross-referencing with an inode. */
  236. void
  237. xchk_ino_xref_set_corrupt(
  238. struct xfs_scrub *sc,
  239. xfs_ino_t ino)
  240. {
  241. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
  242. trace_xchk_ino_error(sc, ino, __return_address);
  243. }
  244. /* Record corruption in a block indexed by a file fork. */
  245. void
  246. xchk_fblock_set_corrupt(
  247. struct xfs_scrub *sc,
  248. int whichfork,
  249. xfs_fileoff_t offset)
  250. {
  251. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  252. trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
  253. }
  254. /* Record a corruption while cross-referencing a fork block. */
  255. void
  256. xchk_fblock_xref_set_corrupt(
  257. struct xfs_scrub *sc,
  258. int whichfork,
  259. xfs_fileoff_t offset)
  260. {
  261. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
  262. trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
  263. }
  264. /*
  265. * Warn about inodes that need administrative review but is not
  266. * incorrect.
  267. */
  268. void
  269. xchk_ino_set_warning(
  270. struct xfs_scrub *sc,
  271. xfs_ino_t ino)
  272. {
  273. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
  274. trace_xchk_ino_warning(sc, ino, __return_address);
  275. }
  276. /* Warn about a block indexed by a file fork that needs review. */
  277. void
  278. xchk_fblock_set_warning(
  279. struct xfs_scrub *sc,
  280. int whichfork,
  281. xfs_fileoff_t offset)
  282. {
  283. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
  284. trace_xchk_fblock_warning(sc, whichfork, offset, __return_address);
  285. }
  286. /* Signal an incomplete scrub. */
  287. void
  288. xchk_set_incomplete(
  289. struct xfs_scrub *sc)
  290. {
  291. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE;
  292. trace_xchk_incomplete(sc, __return_address);
  293. }
  294. /*
  295. * rmap scrubbing -- compute the number of blocks with a given owner,
  296. * at least according to the reverse mapping data.
  297. */
  298. struct xchk_rmap_ownedby_info {
  299. const struct xfs_owner_info *oinfo;
  300. xfs_filblks_t *blocks;
  301. };
  302. STATIC int
  303. xchk_count_rmap_ownedby_irec(
  304. struct xfs_btree_cur *cur,
  305. const struct xfs_rmap_irec *rec,
  306. void *priv)
  307. {
  308. struct xchk_rmap_ownedby_info *sroi = priv;
  309. bool irec_attr;
  310. bool oinfo_attr;
  311. irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK;
  312. oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK;
  313. if (rec->rm_owner != sroi->oinfo->oi_owner)
  314. return 0;
  315. if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr)
  316. (*sroi->blocks) += rec->rm_blockcount;
  317. return 0;
  318. }
  319. /*
  320. * Calculate the number of blocks the rmap thinks are owned by something.
  321. * The caller should pass us an rmapbt cursor.
  322. */
  323. int
  324. xchk_count_rmap_ownedby_ag(
  325. struct xfs_scrub *sc,
  326. struct xfs_btree_cur *cur,
  327. const struct xfs_owner_info *oinfo,
  328. xfs_filblks_t *blocks)
  329. {
  330. struct xchk_rmap_ownedby_info sroi = {
  331. .oinfo = oinfo,
  332. .blocks = blocks,
  333. };
  334. *blocks = 0;
  335. return xfs_rmap_query_all(cur, xchk_count_rmap_ownedby_irec,
  336. &sroi);
  337. }
  338. /*
  339. * AG scrubbing
  340. *
  341. * These helpers facilitate locking an allocation group's header
  342. * buffers, setting up cursors for all btrees that are present, and
  343. * cleaning everything up once we're through.
  344. */
  345. /* Decide if we want to return an AG header read failure. */
  346. static inline bool
  347. want_ag_read_header_failure(
  348. struct xfs_scrub *sc,
  349. unsigned int type)
  350. {
  351. /* Return all AG header read failures when scanning btrees. */
  352. if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF &&
  353. sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL &&
  354. sc->sm->sm_type != XFS_SCRUB_TYPE_AGI)
  355. return true;
  356. /*
  357. * If we're scanning a given type of AG header, we only want to
  358. * see read failures from that specific header. We'd like the
  359. * other headers to cross-check them, but this isn't required.
  360. */
  361. if (sc->sm->sm_type == type)
  362. return true;
  363. return false;
  364. }
  365. /*
  366. * Grab the perag structure and all the headers for an AG.
  367. *
  368. * The headers should be released by xchk_ag_free, but as a fail safe we attach
  369. * all the buffers we grab to the scrub transaction so they'll all be freed
  370. * when we cancel it. Returns ENOENT if we can't grab the perag structure.
  371. */
  372. int
  373. xchk_ag_read_headers(
  374. struct xfs_scrub *sc,
  375. xfs_agnumber_t agno,
  376. struct xchk_ag *sa)
  377. {
  378. struct xfs_mount *mp = sc->mp;
  379. int error;
  380. ASSERT(!sa->pag);
  381. sa->pag = xfs_perag_get(mp, agno);
  382. if (!sa->pag)
  383. return -ENOENT;
  384. error = xfs_ialloc_read_agi(sa->pag, sc->tp, &sa->agi_bp);
  385. if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI))
  386. return error;
  387. error = xfs_alloc_read_agf(sa->pag, sc->tp, 0, &sa->agf_bp);
  388. if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF))
  389. return error;
  390. error = xfs_alloc_read_agfl(sa->pag, sc->tp, &sa->agfl_bp);
  391. if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL))
  392. return error;
  393. return 0;
  394. }
  395. /* Release all the AG btree cursors. */
  396. void
  397. xchk_ag_btcur_free(
  398. struct xchk_ag *sa)
  399. {
  400. if (sa->refc_cur)
  401. xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR);
  402. if (sa->rmap_cur)
  403. xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR);
  404. if (sa->fino_cur)
  405. xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR);
  406. if (sa->ino_cur)
  407. xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR);
  408. if (sa->cnt_cur)
  409. xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR);
  410. if (sa->bno_cur)
  411. xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR);
  412. sa->refc_cur = NULL;
  413. sa->rmap_cur = NULL;
  414. sa->fino_cur = NULL;
  415. sa->ino_cur = NULL;
  416. sa->bno_cur = NULL;
  417. sa->cnt_cur = NULL;
  418. }
  419. /* Initialize all the btree cursors for an AG. */
  420. void
  421. xchk_ag_btcur_init(
  422. struct xfs_scrub *sc,
  423. struct xchk_ag *sa)
  424. {
  425. struct xfs_mount *mp = sc->mp;
  426. if (sa->agf_bp &&
  427. xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_BNO)) {
  428. /* Set up a bnobt cursor for cross-referencing. */
  429. sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
  430. sa->pag, XFS_BTNUM_BNO);
  431. }
  432. if (sa->agf_bp &&
  433. xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_CNT)) {
  434. /* Set up a cntbt cursor for cross-referencing. */
  435. sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
  436. sa->pag, XFS_BTNUM_CNT);
  437. }
  438. /* Set up a inobt cursor for cross-referencing. */
  439. if (sa->agi_bp &&
  440. xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_INO)) {
  441. sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
  442. sa->pag, XFS_BTNUM_INO);
  443. }
  444. /* Set up a finobt cursor for cross-referencing. */
  445. if (sa->agi_bp && xfs_has_finobt(mp) &&
  446. xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) {
  447. sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
  448. sa->pag, XFS_BTNUM_FINO);
  449. }
  450. /* Set up a rmapbt cursor for cross-referencing. */
  451. if (sa->agf_bp && xfs_has_rmapbt(mp) &&
  452. xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_RMAP)) {
  453. sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
  454. sa->pag);
  455. }
  456. /* Set up a refcountbt cursor for cross-referencing. */
  457. if (sa->agf_bp && xfs_has_reflink(mp) &&
  458. xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_REFC)) {
  459. sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
  460. sa->agf_bp, sa->pag);
  461. }
  462. }
  463. /* Release the AG header context and btree cursors. */
  464. void
  465. xchk_ag_free(
  466. struct xfs_scrub *sc,
  467. struct xchk_ag *sa)
  468. {
  469. xchk_ag_btcur_free(sa);
  470. if (sa->agfl_bp) {
  471. xfs_trans_brelse(sc->tp, sa->agfl_bp);
  472. sa->agfl_bp = NULL;
  473. }
  474. if (sa->agf_bp) {
  475. xfs_trans_brelse(sc->tp, sa->agf_bp);
  476. sa->agf_bp = NULL;
  477. }
  478. if (sa->agi_bp) {
  479. xfs_trans_brelse(sc->tp, sa->agi_bp);
  480. sa->agi_bp = NULL;
  481. }
  482. if (sa->pag) {
  483. xfs_perag_put(sa->pag);
  484. sa->pag = NULL;
  485. }
  486. }
  487. /*
  488. * For scrub, grab the perag structure, the AGI, and the AGF headers, in that
  489. * order. Locking order requires us to get the AGI before the AGF. We use the
  490. * transaction to avoid deadlocking on crosslinked metadata buffers; either the
  491. * caller passes one in (bmap scrub) or we have to create a transaction
  492. * ourselves. Returns ENOENT if the perag struct cannot be grabbed.
  493. */
  494. int
  495. xchk_ag_init(
  496. struct xfs_scrub *sc,
  497. xfs_agnumber_t agno,
  498. struct xchk_ag *sa)
  499. {
  500. int error;
  501. error = xchk_ag_read_headers(sc, agno, sa);
  502. if (error)
  503. return error;
  504. xchk_ag_btcur_init(sc, sa);
  505. return 0;
  506. }
  507. /* Per-scrubber setup functions */
  508. /*
  509. * Grab an empty transaction so that we can re-grab locked buffers if
  510. * one of our btrees turns out to be cyclic.
  511. *
  512. * If we're going to repair something, we need to ask for the largest possible
  513. * log reservation so that we can handle the worst case scenario for metadata
  514. * updates while rebuilding a metadata item. We also need to reserve as many
  515. * blocks in the head transaction as we think we're going to need to rebuild
  516. * the metadata object.
  517. */
  518. int
  519. xchk_trans_alloc(
  520. struct xfs_scrub *sc,
  521. uint resblks)
  522. {
  523. if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
  524. return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
  525. resblks, 0, 0, &sc->tp);
  526. return xfs_trans_alloc_empty(sc->mp, &sc->tp);
  527. }
  528. /* Set us up with a transaction and an empty context. */
  529. int
  530. xchk_setup_fs(
  531. struct xfs_scrub *sc)
  532. {
  533. uint resblks;
  534. resblks = xrep_calc_ag_resblks(sc);
  535. return xchk_trans_alloc(sc, resblks);
  536. }
  537. /* Set us up with AG headers and btree cursors. */
  538. int
  539. xchk_setup_ag_btree(
  540. struct xfs_scrub *sc,
  541. bool force_log)
  542. {
  543. struct xfs_mount *mp = sc->mp;
  544. int error;
  545. /*
  546. * If the caller asks us to checkpont the log, do so. This
  547. * expensive operation should be performed infrequently and only
  548. * as a last resort. Any caller that sets force_log should
  549. * document why they need to do so.
  550. */
  551. if (force_log) {
  552. error = xchk_checkpoint_log(mp);
  553. if (error)
  554. return error;
  555. }
  556. error = xchk_setup_fs(sc);
  557. if (error)
  558. return error;
  559. return xchk_ag_init(sc, sc->sm->sm_agno, &sc->sa);
  560. }
  561. /* Push everything out of the log onto disk. */
  562. int
  563. xchk_checkpoint_log(
  564. struct xfs_mount *mp)
  565. {
  566. int error;
  567. error = xfs_log_force(mp, XFS_LOG_SYNC);
  568. if (error)
  569. return error;
  570. xfs_ail_push_all_sync(mp->m_ail);
  571. return 0;
  572. }
  573. /*
  574. * Given an inode and the scrub control structure, grab either the
  575. * inode referenced in the control structure or the inode passed in.
  576. * The inode is not locked.
  577. */
  578. int
  579. xchk_get_inode(
  580. struct xfs_scrub *sc)
  581. {
  582. struct xfs_imap imap;
  583. struct xfs_mount *mp = sc->mp;
  584. struct xfs_inode *ip_in = XFS_I(file_inode(sc->file));
  585. struct xfs_inode *ip = NULL;
  586. int error;
  587. /* We want to scan the inode we already had opened. */
  588. if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
  589. sc->ip = ip_in;
  590. return 0;
  591. }
  592. /* Look up the inode, see if the generation number matches. */
  593. if (xfs_internal_inum(mp, sc->sm->sm_ino))
  594. return -ENOENT;
  595. error = xfs_iget(mp, NULL, sc->sm->sm_ino,
  596. XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip);
  597. switch (error) {
  598. case -ENOENT:
  599. /* Inode doesn't exist, just bail out. */
  600. return error;
  601. case 0:
  602. /* Got an inode, continue. */
  603. break;
  604. case -EINVAL:
  605. /*
  606. * -EINVAL with IGET_UNTRUSTED could mean one of several
  607. * things: userspace gave us an inode number that doesn't
  608. * correspond to fs space, or doesn't have an inobt entry;
  609. * or it could simply mean that the inode buffer failed the
  610. * read verifiers.
  611. *
  612. * Try just the inode mapping lookup -- if it succeeds, then
  613. * the inode buffer verifier failed and something needs fixing.
  614. * Otherwise, we really couldn't find it so tell userspace
  615. * that it no longer exists.
  616. */
  617. error = xfs_imap(sc->mp, sc->tp, sc->sm->sm_ino, &imap,
  618. XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE);
  619. if (error)
  620. return -ENOENT;
  621. error = -EFSCORRUPTED;
  622. fallthrough;
  623. default:
  624. trace_xchk_op_error(sc,
  625. XFS_INO_TO_AGNO(mp, sc->sm->sm_ino),
  626. XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
  627. error, __return_address);
  628. return error;
  629. }
  630. if (VFS_I(ip)->i_generation != sc->sm->sm_gen) {
  631. xfs_irele(ip);
  632. return -ENOENT;
  633. }
  634. sc->ip = ip;
  635. return 0;
  636. }
  637. /* Set us up to scrub a file's contents. */
  638. int
  639. xchk_setup_inode_contents(
  640. struct xfs_scrub *sc,
  641. unsigned int resblks)
  642. {
  643. int error;
  644. error = xchk_get_inode(sc);
  645. if (error)
  646. return error;
  647. /* Got the inode, lock it and we're ready to go. */
  648. sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
  649. xfs_ilock(sc->ip, sc->ilock_flags);
  650. error = xchk_trans_alloc(sc, resblks);
  651. if (error)
  652. goto out;
  653. sc->ilock_flags |= XFS_ILOCK_EXCL;
  654. xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
  655. out:
  656. /* scrub teardown will unlock and release the inode for us */
  657. return error;
  658. }
  659. /*
  660. * Predicate that decides if we need to evaluate the cross-reference check.
  661. * If there was an error accessing the cross-reference btree, just delete
  662. * the cursor and skip the check.
  663. */
  664. bool
  665. xchk_should_check_xref(
  666. struct xfs_scrub *sc,
  667. int *error,
  668. struct xfs_btree_cur **curpp)
  669. {
  670. /* No point in xref if we already know we're corrupt. */
  671. if (xchk_skip_xref(sc->sm))
  672. return false;
  673. if (*error == 0)
  674. return true;
  675. if (curpp) {
  676. /* If we've already given up on xref, just bail out. */
  677. if (!*curpp)
  678. return false;
  679. /* xref error, delete cursor and bail out. */
  680. xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR);
  681. *curpp = NULL;
  682. }
  683. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
  684. trace_xchk_xref_error(sc, *error, __return_address);
  685. /*
  686. * Errors encountered during cross-referencing with another
  687. * data structure should not cause this scrubber to abort.
  688. */
  689. *error = 0;
  690. return false;
  691. }
  692. /* Run the structure verifiers on in-memory buffers to detect bad memory. */
  693. void
  694. xchk_buffer_recheck(
  695. struct xfs_scrub *sc,
  696. struct xfs_buf *bp)
  697. {
  698. xfs_failaddr_t fa;
  699. if (bp->b_ops == NULL) {
  700. xchk_block_set_corrupt(sc, bp);
  701. return;
  702. }
  703. if (bp->b_ops->verify_struct == NULL) {
  704. xchk_set_incomplete(sc);
  705. return;
  706. }
  707. fa = bp->b_ops->verify_struct(bp);
  708. if (!fa)
  709. return;
  710. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  711. trace_xchk_block_error(sc, xfs_buf_daddr(bp), fa);
  712. }
  713. /*
  714. * Scrub the attr/data forks of a metadata inode. The metadata inode must be
  715. * pointed to by sc->ip and the ILOCK must be held.
  716. */
  717. int
  718. xchk_metadata_inode_forks(
  719. struct xfs_scrub *sc)
  720. {
  721. __u32 smtype;
  722. bool shared;
  723. int error;
  724. if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
  725. return 0;
  726. /* Metadata inodes don't live on the rt device. */
  727. if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME) {
  728. xchk_ino_set_corrupt(sc, sc->ip->i_ino);
  729. return 0;
  730. }
  731. /* They should never participate in reflink. */
  732. if (xfs_is_reflink_inode(sc->ip)) {
  733. xchk_ino_set_corrupt(sc, sc->ip->i_ino);
  734. return 0;
  735. }
  736. /* They also should never have extended attributes. */
  737. if (xfs_inode_hasattr(sc->ip)) {
  738. xchk_ino_set_corrupt(sc, sc->ip->i_ino);
  739. return 0;
  740. }
  741. /* Invoke the data fork scrubber. */
  742. smtype = sc->sm->sm_type;
  743. sc->sm->sm_type = XFS_SCRUB_TYPE_BMBTD;
  744. error = xchk_bmap_data(sc);
  745. sc->sm->sm_type = smtype;
  746. if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
  747. return error;
  748. /* Look for incorrect shared blocks. */
  749. if (xfs_has_reflink(sc->mp)) {
  750. error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
  751. &shared);
  752. if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0,
  753. &error))
  754. return error;
  755. if (shared)
  756. xchk_ino_set_corrupt(sc, sc->ip->i_ino);
  757. }
  758. return error;
  759. }
  760. /*
  761. * Try to lock an inode in violation of the usual locking order rules. For
  762. * example, trying to get the IOLOCK while in transaction context, or just
  763. * plain breaking AG-order or inode-order inode locking rules. Either way,
  764. * the only way to avoid an ABBA deadlock is to use trylock and back off if
  765. * we can't.
  766. */
  767. int
  768. xchk_ilock_inverted(
  769. struct xfs_inode *ip,
  770. uint lock_mode)
  771. {
  772. int i;
  773. for (i = 0; i < 20; i++) {
  774. if (xfs_ilock_nowait(ip, lock_mode))
  775. return 0;
  776. delay(1);
  777. }
  778. return -EDEADLOCK;
  779. }