xfs_ag.c 26 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * Copyright (c) 2000-2005 Silicon Graphics, Inc.
  4. * Copyright (c) 2018 Red Hat, Inc.
  5. * All rights reserved.
  6. */
  7. #include "xfs.h"
  8. #include "xfs_fs.h"
  9. #include "xfs_shared.h"
  10. #include "xfs_format.h"
  11. #include "xfs_trans_resv.h"
  12. #include "xfs_bit.h"
  13. #include "xfs_sb.h"
  14. #include "xfs_mount.h"
  15. #include "xfs_btree.h"
  16. #include "xfs_alloc_btree.h"
  17. #include "xfs_rmap_btree.h"
  18. #include "xfs_alloc.h"
  19. #include "xfs_ialloc.h"
  20. #include "xfs_rmap.h"
  21. #include "xfs_ag.h"
  22. #include "xfs_ag_resv.h"
  23. #include "xfs_health.h"
  24. #include "xfs_error.h"
  25. #include "xfs_bmap.h"
  26. #include "xfs_defer.h"
  27. #include "xfs_log_format.h"
  28. #include "xfs_trans.h"
  29. #include "xfs_trace.h"
  30. #include "xfs_inode.h"
  31. #include "xfs_icache.h"
  32. /*
  33. * Passive reference counting access wrappers to the perag structures. If the
  34. * per-ag structure is to be freed, the freeing code is responsible for cleaning
  35. * up objects with passive references before freeing the structure. This is
  36. * things like cached buffers.
  37. */
  38. struct xfs_perag *
  39. xfs_perag_get(
  40. struct xfs_mount *mp,
  41. xfs_agnumber_t agno)
  42. {
  43. struct xfs_perag *pag;
  44. int ref = 0;
  45. rcu_read_lock();
  46. pag = radix_tree_lookup(&mp->m_perag_tree, agno);
  47. if (pag) {
  48. ASSERT(atomic_read(&pag->pag_ref) >= 0);
  49. ref = atomic_inc_return(&pag->pag_ref);
  50. }
  51. rcu_read_unlock();
  52. trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
  53. return pag;
  54. }
  55. /*
  56. * search from @first to find the next perag with the given tag set.
  57. */
  58. struct xfs_perag *
  59. xfs_perag_get_tag(
  60. struct xfs_mount *mp,
  61. xfs_agnumber_t first,
  62. unsigned int tag)
  63. {
  64. struct xfs_perag *pag;
  65. int found;
  66. int ref;
  67. rcu_read_lock();
  68. found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
  69. (void **)&pag, first, 1, tag);
  70. if (found <= 0) {
  71. rcu_read_unlock();
  72. return NULL;
  73. }
  74. ref = atomic_inc_return(&pag->pag_ref);
  75. rcu_read_unlock();
  76. trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_);
  77. return pag;
  78. }
  79. void
  80. xfs_perag_put(
  81. struct xfs_perag *pag)
  82. {
  83. int ref;
  84. ASSERT(atomic_read(&pag->pag_ref) > 0);
  85. ref = atomic_dec_return(&pag->pag_ref);
  86. trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
  87. }
  88. /*
  89. * xfs_initialize_perag_data
  90. *
  91. * Read in each per-ag structure so we can count up the number of
  92. * allocated inodes, free inodes and used filesystem blocks as this
  93. * information is no longer persistent in the superblock. Once we have
  94. * this information, write it into the in-core superblock structure.
  95. */
  96. int
  97. xfs_initialize_perag_data(
  98. struct xfs_mount *mp,
  99. xfs_agnumber_t agcount)
  100. {
  101. xfs_agnumber_t index;
  102. struct xfs_perag *pag;
  103. struct xfs_sb *sbp = &mp->m_sb;
  104. uint64_t ifree = 0;
  105. uint64_t ialloc = 0;
  106. uint64_t bfree = 0;
  107. uint64_t bfreelst = 0;
  108. uint64_t btree = 0;
  109. uint64_t fdblocks;
  110. int error = 0;
  111. for (index = 0; index < agcount; index++) {
  112. /*
  113. * Read the AGF and AGI buffers to populate the per-ag
  114. * structures for us.
  115. */
  116. pag = xfs_perag_get(mp, index);
  117. error = xfs_alloc_read_agf(pag, NULL, 0, NULL);
  118. if (!error)
  119. error = xfs_ialloc_read_agi(pag, NULL, NULL);
  120. if (error) {
  121. xfs_perag_put(pag);
  122. return error;
  123. }
  124. ifree += pag->pagi_freecount;
  125. ialloc += pag->pagi_count;
  126. bfree += pag->pagf_freeblks;
  127. bfreelst += pag->pagf_flcount;
  128. btree += pag->pagf_btreeblks;
  129. xfs_perag_put(pag);
  130. }
  131. fdblocks = bfree + bfreelst + btree;
  132. /*
  133. * If the new summary counts are obviously incorrect, fail the
  134. * mount operation because that implies the AGFs are also corrupt.
  135. * Clear FS_COUNTERS so that we don't unmount with a dirty log, which
  136. * will prevent xfs_repair from fixing anything.
  137. */
  138. if (fdblocks > sbp->sb_dblocks || ifree > ialloc) {
  139. xfs_alert(mp, "AGF corruption. Please run xfs_repair.");
  140. error = -EFSCORRUPTED;
  141. goto out;
  142. }
  143. /* Overwrite incore superblock counters with just-read data */
  144. spin_lock(&mp->m_sb_lock);
  145. sbp->sb_ifree = ifree;
  146. sbp->sb_icount = ialloc;
  147. sbp->sb_fdblocks = fdblocks;
  148. spin_unlock(&mp->m_sb_lock);
  149. xfs_reinit_percpu_counters(mp);
  150. out:
  151. xfs_fs_mark_healthy(mp, XFS_SICK_FS_COUNTERS);
  152. return error;
  153. }
  154. STATIC void
  155. __xfs_free_perag(
  156. struct rcu_head *head)
  157. {
  158. struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
  159. ASSERT(!delayed_work_pending(&pag->pag_blockgc_work));
  160. kmem_free(pag);
  161. }
  162. /*
  163. * Free up the per-ag resources associated with the mount structure.
  164. */
  165. void
  166. xfs_free_perag(
  167. struct xfs_mount *mp)
  168. {
  169. struct xfs_perag *pag;
  170. xfs_agnumber_t agno;
  171. for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
  172. spin_lock(&mp->m_perag_lock);
  173. pag = radix_tree_delete(&mp->m_perag_tree, agno);
  174. spin_unlock(&mp->m_perag_lock);
  175. ASSERT(pag);
  176. XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0);
  177. cancel_delayed_work_sync(&pag->pag_blockgc_work);
  178. xfs_buf_hash_destroy(pag);
  179. call_rcu(&pag->rcu_head, __xfs_free_perag);
  180. }
  181. }
  182. /* Find the size of the AG, in blocks. */
  183. static xfs_agblock_t
  184. __xfs_ag_block_count(
  185. struct xfs_mount *mp,
  186. xfs_agnumber_t agno,
  187. xfs_agnumber_t agcount,
  188. xfs_rfsblock_t dblocks)
  189. {
  190. ASSERT(agno < agcount);
  191. if (agno < agcount - 1)
  192. return mp->m_sb.sb_agblocks;
  193. return dblocks - (agno * mp->m_sb.sb_agblocks);
  194. }
  195. xfs_agblock_t
  196. xfs_ag_block_count(
  197. struct xfs_mount *mp,
  198. xfs_agnumber_t agno)
  199. {
  200. return __xfs_ag_block_count(mp, agno, mp->m_sb.sb_agcount,
  201. mp->m_sb.sb_dblocks);
  202. }
  203. /* Calculate the first and last possible inode number in an AG. */
  204. static void
  205. __xfs_agino_range(
  206. struct xfs_mount *mp,
  207. xfs_agblock_t eoag,
  208. xfs_agino_t *first,
  209. xfs_agino_t *last)
  210. {
  211. xfs_agblock_t bno;
  212. /*
  213. * Calculate the first inode, which will be in the first
  214. * cluster-aligned block after the AGFL.
  215. */
  216. bno = round_up(XFS_AGFL_BLOCK(mp) + 1, M_IGEO(mp)->cluster_align);
  217. *first = XFS_AGB_TO_AGINO(mp, bno);
  218. /*
  219. * Calculate the last inode, which will be at the end of the
  220. * last (aligned) cluster that can be allocated in the AG.
  221. */
  222. bno = round_down(eoag, M_IGEO(mp)->cluster_align);
  223. *last = XFS_AGB_TO_AGINO(mp, bno) - 1;
  224. }
  225. void
  226. xfs_agino_range(
  227. struct xfs_mount *mp,
  228. xfs_agnumber_t agno,
  229. xfs_agino_t *first,
  230. xfs_agino_t *last)
  231. {
  232. return __xfs_agino_range(mp, xfs_ag_block_count(mp, agno), first, last);
  233. }
  234. int
  235. xfs_initialize_perag(
  236. struct xfs_mount *mp,
  237. xfs_agnumber_t agcount,
  238. xfs_rfsblock_t dblocks,
  239. xfs_agnumber_t *maxagi)
  240. {
  241. struct xfs_perag *pag;
  242. xfs_agnumber_t index;
  243. xfs_agnumber_t first_initialised = NULLAGNUMBER;
  244. int error;
  245. /*
  246. * Walk the current per-ag tree so we don't try to initialise AGs
  247. * that already exist (growfs case). Allocate and insert all the
  248. * AGs we don't find ready for initialisation.
  249. */
  250. for (index = 0; index < agcount; index++) {
  251. pag = xfs_perag_get(mp, index);
  252. if (pag) {
  253. xfs_perag_put(pag);
  254. continue;
  255. }
  256. pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
  257. if (!pag) {
  258. error = -ENOMEM;
  259. goto out_unwind_new_pags;
  260. }
  261. pag->pag_agno = index;
  262. pag->pag_mount = mp;
  263. error = radix_tree_preload(GFP_NOFS);
  264. if (error)
  265. goto out_free_pag;
  266. spin_lock(&mp->m_perag_lock);
  267. if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
  268. WARN_ON_ONCE(1);
  269. spin_unlock(&mp->m_perag_lock);
  270. radix_tree_preload_end();
  271. error = -EEXIST;
  272. goto out_free_pag;
  273. }
  274. spin_unlock(&mp->m_perag_lock);
  275. radix_tree_preload_end();
  276. #ifdef __KERNEL__
  277. /* Place kernel structure only init below this point. */
  278. spin_lock_init(&pag->pag_ici_lock);
  279. spin_lock_init(&pag->pagb_lock);
  280. spin_lock_init(&pag->pag_state_lock);
  281. INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
  282. INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
  283. init_waitqueue_head(&pag->pagb_wait);
  284. pag->pagb_count = 0;
  285. pag->pagb_tree = RB_ROOT;
  286. #endif /* __KERNEL__ */
  287. error = xfs_buf_hash_init(pag);
  288. if (error)
  289. goto out_remove_pag;
  290. /* first new pag is fully initialized */
  291. if (first_initialised == NULLAGNUMBER)
  292. first_initialised = index;
  293. /*
  294. * Pre-calculated geometry
  295. */
  296. pag->block_count = __xfs_ag_block_count(mp, index, agcount,
  297. dblocks);
  298. pag->min_block = XFS_AGFL_BLOCK(mp);
  299. __xfs_agino_range(mp, pag->block_count, &pag->agino_min,
  300. &pag->agino_max);
  301. }
  302. index = xfs_set_inode_alloc(mp, agcount);
  303. if (maxagi)
  304. *maxagi = index;
  305. mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp);
  306. return 0;
  307. out_remove_pag:
  308. radix_tree_delete(&mp->m_perag_tree, index);
  309. out_free_pag:
  310. kmem_free(pag);
  311. out_unwind_new_pags:
  312. /* unwind any prior newly initialized pags */
  313. for (index = first_initialised; index < agcount; index++) {
  314. pag = radix_tree_delete(&mp->m_perag_tree, index);
  315. if (!pag)
  316. break;
  317. xfs_buf_hash_destroy(pag);
  318. kmem_free(pag);
  319. }
  320. return error;
  321. }
  322. static int
  323. xfs_get_aghdr_buf(
  324. struct xfs_mount *mp,
  325. xfs_daddr_t blkno,
  326. size_t numblks,
  327. struct xfs_buf **bpp,
  328. const struct xfs_buf_ops *ops)
  329. {
  330. struct xfs_buf *bp;
  331. int error;
  332. error = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, 0, &bp);
  333. if (error)
  334. return error;
  335. bp->b_maps[0].bm_bn = blkno;
  336. bp->b_ops = ops;
  337. *bpp = bp;
  338. return 0;
  339. }
  340. /*
  341. * Generic btree root block init function
  342. */
  343. static void
  344. xfs_btroot_init(
  345. struct xfs_mount *mp,
  346. struct xfs_buf *bp,
  347. struct aghdr_init_data *id)
  348. {
  349. xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno);
  350. }
  351. /* Finish initializing a free space btree. */
  352. static void
  353. xfs_freesp_init_recs(
  354. struct xfs_mount *mp,
  355. struct xfs_buf *bp,
  356. struct aghdr_init_data *id)
  357. {
  358. struct xfs_alloc_rec *arec;
  359. struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
  360. arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
  361. arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
  362. if (xfs_ag_contains_log(mp, id->agno)) {
  363. struct xfs_alloc_rec *nrec;
  364. xfs_agblock_t start = XFS_FSB_TO_AGBNO(mp,
  365. mp->m_sb.sb_logstart);
  366. ASSERT(start >= mp->m_ag_prealloc_blocks);
  367. if (start != mp->m_ag_prealloc_blocks) {
  368. /*
  369. * Modify first record to pad stripe align of log
  370. */
  371. arec->ar_blockcount = cpu_to_be32(start -
  372. mp->m_ag_prealloc_blocks);
  373. nrec = arec + 1;
  374. /*
  375. * Insert second record at start of internal log
  376. * which then gets trimmed.
  377. */
  378. nrec->ar_startblock = cpu_to_be32(
  379. be32_to_cpu(arec->ar_startblock) +
  380. be32_to_cpu(arec->ar_blockcount));
  381. arec = nrec;
  382. be16_add_cpu(&block->bb_numrecs, 1);
  383. }
  384. /*
  385. * Change record start to after the internal log
  386. */
  387. be32_add_cpu(&arec->ar_startblock, mp->m_sb.sb_logblocks);
  388. }
  389. /*
  390. * Calculate the record block count and check for the case where
  391. * the log might have consumed all available space in the AG. If
  392. * so, reset the record count to 0 to avoid exposure of an invalid
  393. * record start block.
  394. */
  395. arec->ar_blockcount = cpu_to_be32(id->agsize -
  396. be32_to_cpu(arec->ar_startblock));
  397. if (!arec->ar_blockcount)
  398. block->bb_numrecs = 0;
  399. }
  400. /*
  401. * Alloc btree root block init functions
  402. */
  403. static void
  404. xfs_bnoroot_init(
  405. struct xfs_mount *mp,
  406. struct xfs_buf *bp,
  407. struct aghdr_init_data *id)
  408. {
  409. xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno);
  410. xfs_freesp_init_recs(mp, bp, id);
  411. }
  412. static void
  413. xfs_cntroot_init(
  414. struct xfs_mount *mp,
  415. struct xfs_buf *bp,
  416. struct aghdr_init_data *id)
  417. {
  418. xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno);
  419. xfs_freesp_init_recs(mp, bp, id);
  420. }
  421. /*
  422. * Reverse map root block init
  423. */
  424. static void
  425. xfs_rmaproot_init(
  426. struct xfs_mount *mp,
  427. struct xfs_buf *bp,
  428. struct aghdr_init_data *id)
  429. {
  430. struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
  431. struct xfs_rmap_rec *rrec;
  432. xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno);
  433. /*
  434. * mark the AG header regions as static metadata The BNO
  435. * btree block is the first block after the headers, so
  436. * it's location defines the size of region the static
  437. * metadata consumes.
  438. *
  439. * Note: unlike mkfs, we never have to account for log
  440. * space when growing the data regions
  441. */
  442. rrec = XFS_RMAP_REC_ADDR(block, 1);
  443. rrec->rm_startblock = 0;
  444. rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp));
  445. rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS);
  446. rrec->rm_offset = 0;
  447. /* account freespace btree root blocks */
  448. rrec = XFS_RMAP_REC_ADDR(block, 2);
  449. rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp));
  450. rrec->rm_blockcount = cpu_to_be32(2);
  451. rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
  452. rrec->rm_offset = 0;
  453. /* account inode btree root blocks */
  454. rrec = XFS_RMAP_REC_ADDR(block, 3);
  455. rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp));
  456. rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) -
  457. XFS_IBT_BLOCK(mp));
  458. rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT);
  459. rrec->rm_offset = 0;
  460. /* account for rmap btree root */
  461. rrec = XFS_RMAP_REC_ADDR(block, 4);
  462. rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp));
  463. rrec->rm_blockcount = cpu_to_be32(1);
  464. rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
  465. rrec->rm_offset = 0;
  466. /* account for refc btree root */
  467. if (xfs_has_reflink(mp)) {
  468. rrec = XFS_RMAP_REC_ADDR(block, 5);
  469. rrec->rm_startblock = cpu_to_be32(xfs_refc_block(mp));
  470. rrec->rm_blockcount = cpu_to_be32(1);
  471. rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_REFC);
  472. rrec->rm_offset = 0;
  473. be16_add_cpu(&block->bb_numrecs, 1);
  474. }
  475. /* account for the log space */
  476. if (xfs_ag_contains_log(mp, id->agno)) {
  477. rrec = XFS_RMAP_REC_ADDR(block,
  478. be16_to_cpu(block->bb_numrecs) + 1);
  479. rrec->rm_startblock = cpu_to_be32(
  480. XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart));
  481. rrec->rm_blockcount = cpu_to_be32(mp->m_sb.sb_logblocks);
  482. rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG);
  483. rrec->rm_offset = 0;
  484. be16_add_cpu(&block->bb_numrecs, 1);
  485. }
  486. }
  487. /*
  488. * Initialise new secondary superblocks with the pre-grow geometry, but mark
  489. * them as "in progress" so we know they haven't yet been activated. This will
  490. * get cleared when the update with the new geometry information is done after
  491. * changes to the primary are committed. This isn't strictly necessary, but we
  492. * get it for free with the delayed buffer write lists and it means we can tell
  493. * if a grow operation didn't complete properly after the fact.
  494. */
  495. static void
  496. xfs_sbblock_init(
  497. struct xfs_mount *mp,
  498. struct xfs_buf *bp,
  499. struct aghdr_init_data *id)
  500. {
  501. struct xfs_dsb *dsb = bp->b_addr;
  502. xfs_sb_to_disk(dsb, &mp->m_sb);
  503. dsb->sb_inprogress = 1;
  504. }
  505. static void
  506. xfs_agfblock_init(
  507. struct xfs_mount *mp,
  508. struct xfs_buf *bp,
  509. struct aghdr_init_data *id)
  510. {
  511. struct xfs_agf *agf = bp->b_addr;
  512. xfs_extlen_t tmpsize;
  513. agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
  514. agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
  515. agf->agf_seqno = cpu_to_be32(id->agno);
  516. agf->agf_length = cpu_to_be32(id->agsize);
  517. agf->agf_roots[XFS_BTNUM_BNOi] = cpu_to_be32(XFS_BNO_BLOCK(mp));
  518. agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
  519. agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
  520. agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
  521. if (xfs_has_rmapbt(mp)) {
  522. agf->agf_roots[XFS_BTNUM_RMAPi] =
  523. cpu_to_be32(XFS_RMAP_BLOCK(mp));
  524. agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
  525. agf->agf_rmap_blocks = cpu_to_be32(1);
  526. }
  527. agf->agf_flfirst = cpu_to_be32(1);
  528. agf->agf_fllast = 0;
  529. agf->agf_flcount = 0;
  530. tmpsize = id->agsize - mp->m_ag_prealloc_blocks;
  531. agf->agf_freeblks = cpu_to_be32(tmpsize);
  532. agf->agf_longest = cpu_to_be32(tmpsize);
  533. if (xfs_has_crc(mp))
  534. uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
  535. if (xfs_has_reflink(mp)) {
  536. agf->agf_refcount_root = cpu_to_be32(
  537. xfs_refc_block(mp));
  538. agf->agf_refcount_level = cpu_to_be32(1);
  539. agf->agf_refcount_blocks = cpu_to_be32(1);
  540. }
  541. if (xfs_ag_contains_log(mp, id->agno)) {
  542. int64_t logblocks = mp->m_sb.sb_logblocks;
  543. be32_add_cpu(&agf->agf_freeblks, -logblocks);
  544. agf->agf_longest = cpu_to_be32(id->agsize -
  545. XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart) - logblocks);
  546. }
  547. }
  548. static void
  549. xfs_agflblock_init(
  550. struct xfs_mount *mp,
  551. struct xfs_buf *bp,
  552. struct aghdr_init_data *id)
  553. {
  554. struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
  555. __be32 *agfl_bno;
  556. int bucket;
  557. if (xfs_has_crc(mp)) {
  558. agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
  559. agfl->agfl_seqno = cpu_to_be32(id->agno);
  560. uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
  561. }
  562. agfl_bno = xfs_buf_to_agfl_bno(bp);
  563. for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++)
  564. agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
  565. }
  566. static void
  567. xfs_agiblock_init(
  568. struct xfs_mount *mp,
  569. struct xfs_buf *bp,
  570. struct aghdr_init_data *id)
  571. {
  572. struct xfs_agi *agi = bp->b_addr;
  573. int bucket;
  574. agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
  575. agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
  576. agi->agi_seqno = cpu_to_be32(id->agno);
  577. agi->agi_length = cpu_to_be32(id->agsize);
  578. agi->agi_count = 0;
  579. agi->agi_root = cpu_to_be32(XFS_IBT_BLOCK(mp));
  580. agi->agi_level = cpu_to_be32(1);
  581. agi->agi_freecount = 0;
  582. agi->agi_newino = cpu_to_be32(NULLAGINO);
  583. agi->agi_dirino = cpu_to_be32(NULLAGINO);
  584. if (xfs_has_crc(mp))
  585. uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
  586. if (xfs_has_finobt(mp)) {
  587. agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp));
  588. agi->agi_free_level = cpu_to_be32(1);
  589. }
  590. for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
  591. agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
  592. if (xfs_has_inobtcounts(mp)) {
  593. agi->agi_iblocks = cpu_to_be32(1);
  594. if (xfs_has_finobt(mp))
  595. agi->agi_fblocks = cpu_to_be32(1);
  596. }
  597. }
  598. typedef void (*aghdr_init_work_f)(struct xfs_mount *mp, struct xfs_buf *bp,
  599. struct aghdr_init_data *id);
  600. static int
  601. xfs_ag_init_hdr(
  602. struct xfs_mount *mp,
  603. struct aghdr_init_data *id,
  604. aghdr_init_work_f work,
  605. const struct xfs_buf_ops *ops)
  606. {
  607. struct xfs_buf *bp;
  608. int error;
  609. error = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, &bp, ops);
  610. if (error)
  611. return error;
  612. (*work)(mp, bp, id);
  613. xfs_buf_delwri_queue(bp, &id->buffer_list);
  614. xfs_buf_relse(bp);
  615. return 0;
  616. }
  617. struct xfs_aghdr_grow_data {
  618. xfs_daddr_t daddr;
  619. size_t numblks;
  620. const struct xfs_buf_ops *ops;
  621. aghdr_init_work_f work;
  622. xfs_btnum_t type;
  623. bool need_init;
  624. };
  625. /*
  626. * Prepare new AG headers to be written to disk. We use uncached buffers here,
  627. * as it is assumed these new AG headers are currently beyond the currently
  628. * valid filesystem address space. Using cached buffers would trip over EOFS
  629. * corruption detection alogrithms in the buffer cache lookup routines.
  630. *
  631. * This is a non-transactional function, but the prepared buffers are added to a
  632. * delayed write buffer list supplied by the caller so they can submit them to
  633. * disk and wait on them as required.
  634. */
  635. int
  636. xfs_ag_init_headers(
  637. struct xfs_mount *mp,
  638. struct aghdr_init_data *id)
  639. {
  640. struct xfs_aghdr_grow_data aghdr_data[] = {
  641. { /* SB */
  642. .daddr = XFS_AG_DADDR(mp, id->agno, XFS_SB_DADDR),
  643. .numblks = XFS_FSS_TO_BB(mp, 1),
  644. .ops = &xfs_sb_buf_ops,
  645. .work = &xfs_sbblock_init,
  646. .need_init = true
  647. },
  648. { /* AGF */
  649. .daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGF_DADDR(mp)),
  650. .numblks = XFS_FSS_TO_BB(mp, 1),
  651. .ops = &xfs_agf_buf_ops,
  652. .work = &xfs_agfblock_init,
  653. .need_init = true
  654. },
  655. { /* AGFL */
  656. .daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGFL_DADDR(mp)),
  657. .numblks = XFS_FSS_TO_BB(mp, 1),
  658. .ops = &xfs_agfl_buf_ops,
  659. .work = &xfs_agflblock_init,
  660. .need_init = true
  661. },
  662. { /* AGI */
  663. .daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGI_DADDR(mp)),
  664. .numblks = XFS_FSS_TO_BB(mp, 1),
  665. .ops = &xfs_agi_buf_ops,
  666. .work = &xfs_agiblock_init,
  667. .need_init = true
  668. },
  669. { /* BNO root block */
  670. .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_BNO_BLOCK(mp)),
  671. .numblks = BTOBB(mp->m_sb.sb_blocksize),
  672. .ops = &xfs_bnobt_buf_ops,
  673. .work = &xfs_bnoroot_init,
  674. .need_init = true
  675. },
  676. { /* CNT root block */
  677. .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_CNT_BLOCK(mp)),
  678. .numblks = BTOBB(mp->m_sb.sb_blocksize),
  679. .ops = &xfs_cntbt_buf_ops,
  680. .work = &xfs_cntroot_init,
  681. .need_init = true
  682. },
  683. { /* INO root block */
  684. .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_IBT_BLOCK(mp)),
  685. .numblks = BTOBB(mp->m_sb.sb_blocksize),
  686. .ops = &xfs_inobt_buf_ops,
  687. .work = &xfs_btroot_init,
  688. .type = XFS_BTNUM_INO,
  689. .need_init = true
  690. },
  691. { /* FINO root block */
  692. .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_FIBT_BLOCK(mp)),
  693. .numblks = BTOBB(mp->m_sb.sb_blocksize),
  694. .ops = &xfs_finobt_buf_ops,
  695. .work = &xfs_btroot_init,
  696. .type = XFS_BTNUM_FINO,
  697. .need_init = xfs_has_finobt(mp)
  698. },
  699. { /* RMAP root block */
  700. .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_RMAP_BLOCK(mp)),
  701. .numblks = BTOBB(mp->m_sb.sb_blocksize),
  702. .ops = &xfs_rmapbt_buf_ops,
  703. .work = &xfs_rmaproot_init,
  704. .need_init = xfs_has_rmapbt(mp)
  705. },
  706. { /* REFC root block */
  707. .daddr = XFS_AGB_TO_DADDR(mp, id->agno, xfs_refc_block(mp)),
  708. .numblks = BTOBB(mp->m_sb.sb_blocksize),
  709. .ops = &xfs_refcountbt_buf_ops,
  710. .work = &xfs_btroot_init,
  711. .type = XFS_BTNUM_REFC,
  712. .need_init = xfs_has_reflink(mp)
  713. },
  714. { /* NULL terminating block */
  715. .daddr = XFS_BUF_DADDR_NULL,
  716. }
  717. };
  718. struct xfs_aghdr_grow_data *dp;
  719. int error = 0;
  720. /* Account for AG free space in new AG */
  721. id->nfree += id->agsize - mp->m_ag_prealloc_blocks;
  722. for (dp = &aghdr_data[0]; dp->daddr != XFS_BUF_DADDR_NULL; dp++) {
  723. if (!dp->need_init)
  724. continue;
  725. id->daddr = dp->daddr;
  726. id->numblks = dp->numblks;
  727. id->type = dp->type;
  728. error = xfs_ag_init_hdr(mp, id, dp->work, dp->ops);
  729. if (error)
  730. break;
  731. }
  732. return error;
  733. }
  734. int
  735. xfs_ag_shrink_space(
  736. struct xfs_perag *pag,
  737. struct xfs_trans **tpp,
  738. xfs_extlen_t delta)
  739. {
  740. struct xfs_mount *mp = pag->pag_mount;
  741. struct xfs_alloc_arg args = {
  742. .tp = *tpp,
  743. .mp = mp,
  744. .type = XFS_ALLOCTYPE_THIS_BNO,
  745. .minlen = delta,
  746. .maxlen = delta,
  747. .oinfo = XFS_RMAP_OINFO_SKIP_UPDATE,
  748. .resv = XFS_AG_RESV_NONE,
  749. .prod = 1
  750. };
  751. struct xfs_buf *agibp, *agfbp;
  752. struct xfs_agi *agi;
  753. struct xfs_agf *agf;
  754. xfs_agblock_t aglen;
  755. int error, err2;
  756. ASSERT(pag->pag_agno == mp->m_sb.sb_agcount - 1);
  757. error = xfs_ialloc_read_agi(pag, *tpp, &agibp);
  758. if (error)
  759. return error;
  760. agi = agibp->b_addr;
  761. error = xfs_alloc_read_agf(pag, *tpp, 0, &agfbp);
  762. if (error)
  763. return error;
  764. agf = agfbp->b_addr;
  765. aglen = be32_to_cpu(agi->agi_length);
  766. /* some extra paranoid checks before we shrink the ag */
  767. if (XFS_IS_CORRUPT(mp, agf->agf_length != agi->agi_length))
  768. return -EFSCORRUPTED;
  769. if (delta >= aglen)
  770. return -EINVAL;
  771. args.fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, aglen - delta);
  772. /*
  773. * Make sure that the last inode cluster cannot overlap with the new
  774. * end of the AG, even if it's sparse.
  775. */
  776. error = xfs_ialloc_check_shrink(*tpp, pag->pag_agno, agibp,
  777. aglen - delta);
  778. if (error)
  779. return error;
  780. /*
  781. * Disable perag reservations so it doesn't cause the allocation request
  782. * to fail. We'll reestablish reservation before we return.
  783. */
  784. error = xfs_ag_resv_free(pag);
  785. if (error)
  786. return error;
  787. /* internal log shouldn't also show up in the free space btrees */
  788. error = xfs_alloc_vextent(&args);
  789. if (!error && args.agbno == NULLAGBLOCK)
  790. error = -ENOSPC;
  791. if (error) {
  792. /*
  793. * if extent allocation fails, need to roll the transaction to
  794. * ensure that the AGFL fixup has been committed anyway.
  795. */
  796. xfs_trans_bhold(*tpp, agfbp);
  797. err2 = xfs_trans_roll(tpp);
  798. if (err2)
  799. return err2;
  800. xfs_trans_bjoin(*tpp, agfbp);
  801. goto resv_init_out;
  802. }
  803. /*
  804. * if successfully deleted from freespace btrees, need to confirm
  805. * per-AG reservation works as expected.
  806. */
  807. be32_add_cpu(&agi->agi_length, -delta);
  808. be32_add_cpu(&agf->agf_length, -delta);
  809. err2 = xfs_ag_resv_init(pag, *tpp);
  810. if (err2) {
  811. be32_add_cpu(&agi->agi_length, delta);
  812. be32_add_cpu(&agf->agf_length, delta);
  813. if (err2 != -ENOSPC)
  814. goto resv_err;
  815. __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, true);
  816. /*
  817. * Roll the transaction before trying to re-init the per-ag
  818. * reservation. The new transaction is clean so it will cancel
  819. * without any side effects.
  820. */
  821. error = xfs_defer_finish(tpp);
  822. if (error)
  823. return error;
  824. error = -ENOSPC;
  825. goto resv_init_out;
  826. }
  827. xfs_ialloc_log_agi(*tpp, agibp, XFS_AGI_LENGTH);
  828. xfs_alloc_log_agf(*tpp, agfbp, XFS_AGF_LENGTH);
  829. return 0;
  830. resv_init_out:
  831. err2 = xfs_ag_resv_init(pag, *tpp);
  832. if (!err2)
  833. return error;
  834. resv_err:
  835. xfs_warn(mp, "Error %d reserving per-AG metadata reserve pool.", err2);
  836. xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
  837. return err2;
  838. }
  839. /*
  840. * Extent the AG indicated by the @id by the length passed in
  841. */
  842. int
  843. xfs_ag_extend_space(
  844. struct xfs_perag *pag,
  845. struct xfs_trans *tp,
  846. xfs_extlen_t len)
  847. {
  848. struct xfs_buf *bp;
  849. struct xfs_agi *agi;
  850. struct xfs_agf *agf;
  851. int error;
  852. ASSERT(pag->pag_agno == pag->pag_mount->m_sb.sb_agcount - 1);
  853. error = xfs_ialloc_read_agi(pag, tp, &bp);
  854. if (error)
  855. return error;
  856. agi = bp->b_addr;
  857. be32_add_cpu(&agi->agi_length, len);
  858. xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH);
  859. /*
  860. * Change agf length.
  861. */
  862. error = xfs_alloc_read_agf(pag, tp, 0, &bp);
  863. if (error)
  864. return error;
  865. agf = bp->b_addr;
  866. be32_add_cpu(&agf->agf_length, len);
  867. ASSERT(agf->agf_length == agi->agi_length);
  868. xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
  869. /*
  870. * Free the new space.
  871. *
  872. * XFS_RMAP_OINFO_SKIP_UPDATE is used here to tell the rmap btree that
  873. * this doesn't actually exist in the rmap btree.
  874. */
  875. error = xfs_rmap_free(tp, bp, pag, be32_to_cpu(agf->agf_length) - len,
  876. len, &XFS_RMAP_OINFO_SKIP_UPDATE);
  877. if (error)
  878. return error;
  879. error = xfs_free_extent(tp, XFS_AGB_TO_FSB(pag->pag_mount, pag->pag_agno,
  880. be32_to_cpu(agf->agf_length) - len),
  881. len, &XFS_RMAP_OINFO_SKIP_UPDATE,
  882. XFS_AG_RESV_NONE);
  883. if (error)
  884. return error;
  885. /* Update perag geometry */
  886. pag->block_count = be32_to_cpu(agf->agf_length);
  887. __xfs_agino_range(pag->pag_mount, pag->block_count, &pag->agino_min,
  888. &pag->agino_max);
  889. return 0;
  890. }
  891. /* Retrieve AG geometry. */
  892. int
  893. xfs_ag_get_geometry(
  894. struct xfs_perag *pag,
  895. struct xfs_ag_geometry *ageo)
  896. {
  897. struct xfs_buf *agi_bp;
  898. struct xfs_buf *agf_bp;
  899. struct xfs_agi *agi;
  900. struct xfs_agf *agf;
  901. unsigned int freeblks;
  902. int error;
  903. /* Lock the AG headers. */
  904. error = xfs_ialloc_read_agi(pag, NULL, &agi_bp);
  905. if (error)
  906. return error;
  907. error = xfs_alloc_read_agf(pag, NULL, 0, &agf_bp);
  908. if (error)
  909. goto out_agi;
  910. /* Fill out form. */
  911. memset(ageo, 0, sizeof(*ageo));
  912. ageo->ag_number = pag->pag_agno;
  913. agi = agi_bp->b_addr;
  914. ageo->ag_icount = be32_to_cpu(agi->agi_count);
  915. ageo->ag_ifree = be32_to_cpu(agi->agi_freecount);
  916. agf = agf_bp->b_addr;
  917. ageo->ag_length = be32_to_cpu(agf->agf_length);
  918. freeblks = pag->pagf_freeblks +
  919. pag->pagf_flcount +
  920. pag->pagf_btreeblks -
  921. xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE);
  922. ageo->ag_freeblks = freeblks;
  923. xfs_ag_geom_health(pag, ageo);
  924. /* Release resources. */
  925. xfs_buf_relse(agf_bp);
  926. out_agi:
  927. xfs_buf_relse(agi_bp);
  928. return error;
  929. }