super.c 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Simple file system for zoned block devices exposing zones as files.
  4. *
  5. * Copyright (C) 2019 Western Digital Corporation or its affiliates.
  6. */
  7. #include <linux/module.h>
  8. #include <linux/pagemap.h>
  9. #include <linux/magic.h>
  10. #include <linux/iomap.h>
  11. #include <linux/init.h>
  12. #include <linux/slab.h>
  13. #include <linux/blkdev.h>
  14. #include <linux/statfs.h>
  15. #include <linux/writeback.h>
  16. #include <linux/quotaops.h>
  17. #include <linux/seq_file.h>
  18. #include <linux/parser.h>
  19. #include <linux/uio.h>
  20. #include <linux/mman.h>
  21. #include <linux/sched/mm.h>
  22. #include <linux/crc32.h>
  23. #include <linux/task_io_accounting_ops.h>
  24. #include "zonefs.h"
  25. #define CREATE_TRACE_POINTS
  26. #include "trace.h"
  27. /*
  28. * Get the name of a zone group directory.
  29. */
  30. static const char *zonefs_zgroup_name(enum zonefs_ztype ztype)
  31. {
  32. switch (ztype) {
  33. case ZONEFS_ZTYPE_CNV:
  34. return "cnv";
  35. case ZONEFS_ZTYPE_SEQ:
  36. return "seq";
  37. default:
  38. WARN_ON_ONCE(1);
  39. return "???";
  40. }
  41. }
  42. /*
  43. * Manage the active zone count.
  44. */
  45. static void zonefs_account_active(struct super_block *sb,
  46. struct zonefs_zone *z)
  47. {
  48. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  49. if (zonefs_zone_is_cnv(z))
  50. return;
  51. /*
  52. * For zones that transitioned to the offline or readonly condition,
  53. * we only need to clear the active state.
  54. */
  55. if (z->z_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY))
  56. goto out;
  57. /*
  58. * If the zone is active, that is, if it is explicitly open or
  59. * partially written, check if it was already accounted as active.
  60. */
  61. if ((z->z_flags & ZONEFS_ZONE_OPEN) ||
  62. (z->z_wpoffset > 0 && z->z_wpoffset < z->z_capacity)) {
  63. if (!(z->z_flags & ZONEFS_ZONE_ACTIVE)) {
  64. z->z_flags |= ZONEFS_ZONE_ACTIVE;
  65. atomic_inc(&sbi->s_active_seq_files);
  66. }
  67. return;
  68. }
  69. out:
  70. /* The zone is not active. If it was, update the active count */
  71. if (z->z_flags & ZONEFS_ZONE_ACTIVE) {
  72. z->z_flags &= ~ZONEFS_ZONE_ACTIVE;
  73. atomic_dec(&sbi->s_active_seq_files);
  74. }
  75. }
  76. /*
  77. * Manage the active zone count. Called with zi->i_truncate_mutex held.
  78. */
  79. void zonefs_inode_account_active(struct inode *inode)
  80. {
  81. lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex);
  82. return zonefs_account_active(inode->i_sb, zonefs_inode_zone(inode));
  83. }
  84. /*
  85. * Execute a zone management operation.
  86. */
  87. static int zonefs_zone_mgmt(struct super_block *sb,
  88. struct zonefs_zone *z, enum req_op op)
  89. {
  90. int ret;
  91. /*
  92. * With ZNS drives, closing an explicitly open zone that has not been
  93. * written will change the zone state to "closed", that is, the zone
  94. * will remain active. Since this can then cause failure of explicit
  95. * open operation on other zones if the drive active zone resources
  96. * are exceeded, make sure that the zone does not remain active by
  97. * resetting it.
  98. */
  99. if (op == REQ_OP_ZONE_CLOSE && !z->z_wpoffset)
  100. op = REQ_OP_ZONE_RESET;
  101. trace_zonefs_zone_mgmt(sb, z, op);
  102. ret = blkdev_zone_mgmt(sb->s_bdev, op, z->z_sector,
  103. z->z_size >> SECTOR_SHIFT, GFP_NOFS);
  104. if (ret) {
  105. zonefs_err(sb,
  106. "Zone management operation %s at %llu failed %d\n",
  107. blk_op_str(op), z->z_sector, ret);
  108. return ret;
  109. }
  110. return 0;
  111. }
  112. int zonefs_inode_zone_mgmt(struct inode *inode, enum req_op op)
  113. {
  114. lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex);
  115. return zonefs_zone_mgmt(inode->i_sb, zonefs_inode_zone(inode), op);
  116. }
  117. void zonefs_i_size_write(struct inode *inode, loff_t isize)
  118. {
  119. struct zonefs_zone *z = zonefs_inode_zone(inode);
  120. i_size_write(inode, isize);
  121. /*
  122. * A full zone is no longer open/active and does not need
  123. * explicit closing.
  124. */
  125. if (isize >= z->z_capacity) {
  126. struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
  127. if (z->z_flags & ZONEFS_ZONE_ACTIVE)
  128. atomic_dec(&sbi->s_active_seq_files);
  129. z->z_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE);
  130. }
  131. }
  132. void zonefs_update_stats(struct inode *inode, loff_t new_isize)
  133. {
  134. struct super_block *sb = inode->i_sb;
  135. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  136. loff_t old_isize = i_size_read(inode);
  137. loff_t nr_blocks;
  138. if (new_isize == old_isize)
  139. return;
  140. spin_lock(&sbi->s_lock);
  141. /*
  142. * This may be called for an update after an IO error.
  143. * So beware of the values seen.
  144. */
  145. if (new_isize < old_isize) {
  146. nr_blocks = (old_isize - new_isize) >> sb->s_blocksize_bits;
  147. if (sbi->s_used_blocks > nr_blocks)
  148. sbi->s_used_blocks -= nr_blocks;
  149. else
  150. sbi->s_used_blocks = 0;
  151. } else {
  152. sbi->s_used_blocks +=
  153. (new_isize - old_isize) >> sb->s_blocksize_bits;
  154. if (sbi->s_used_blocks > sbi->s_blocks)
  155. sbi->s_used_blocks = sbi->s_blocks;
  156. }
  157. spin_unlock(&sbi->s_lock);
  158. }
  159. /*
  160. * Check a zone condition. Return the amount of written (and still readable)
  161. * data in the zone.
  162. */
  163. static loff_t zonefs_check_zone_condition(struct super_block *sb,
  164. struct zonefs_zone *z,
  165. struct blk_zone *zone)
  166. {
  167. switch (zone->cond) {
  168. case BLK_ZONE_COND_OFFLINE:
  169. zonefs_warn(sb, "Zone %llu: offline zone\n",
  170. z->z_sector);
  171. z->z_flags |= ZONEFS_ZONE_OFFLINE;
  172. return 0;
  173. case BLK_ZONE_COND_READONLY:
  174. /*
  175. * The write pointer of read-only zones is invalid, so we cannot
  176. * determine the zone wpoffset (inode size). We thus keep the
  177. * zone wpoffset as is, which leads to an empty file
  178. * (wpoffset == 0) on mount. For a runtime error, this keeps
  179. * the inode size as it was when last updated so that the user
  180. * can recover data.
  181. */
  182. zonefs_warn(sb, "Zone %llu: read-only zone\n",
  183. z->z_sector);
  184. z->z_flags |= ZONEFS_ZONE_READONLY;
  185. if (zonefs_zone_is_cnv(z))
  186. return z->z_capacity;
  187. return z->z_wpoffset;
  188. case BLK_ZONE_COND_FULL:
  189. /* The write pointer of full zones is invalid. */
  190. return z->z_capacity;
  191. default:
  192. if (zonefs_zone_is_cnv(z))
  193. return z->z_capacity;
  194. return (zone->wp - zone->start) << SECTOR_SHIFT;
  195. }
  196. }
  197. /*
  198. * Check a zone condition and adjust its inode access permissions for
  199. * offline and readonly zones.
  200. */
  201. static void zonefs_inode_update_mode(struct inode *inode)
  202. {
  203. struct zonefs_zone *z = zonefs_inode_zone(inode);
  204. if (z->z_flags & ZONEFS_ZONE_OFFLINE) {
  205. /* Offline zones cannot be read nor written */
  206. inode->i_flags |= S_IMMUTABLE;
  207. inode->i_mode &= ~0777;
  208. } else if (z->z_flags & ZONEFS_ZONE_READONLY) {
  209. /* Readonly zones cannot be written */
  210. inode->i_flags |= S_IMMUTABLE;
  211. if (z->z_flags & ZONEFS_ZONE_INIT_MODE)
  212. inode->i_mode &= ~0777;
  213. else
  214. inode->i_mode &= ~0222;
  215. }
  216. z->z_flags &= ~ZONEFS_ZONE_INIT_MODE;
  217. }
  218. struct zonefs_ioerr_data {
  219. struct inode *inode;
  220. bool write;
  221. };
  222. static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
  223. void *data)
  224. {
  225. struct zonefs_ioerr_data *err = data;
  226. struct inode *inode = err->inode;
  227. struct zonefs_zone *z = zonefs_inode_zone(inode);
  228. struct super_block *sb = inode->i_sb;
  229. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  230. loff_t isize, data_size;
  231. /*
  232. * Check the zone condition: if the zone is not "bad" (offline or
  233. * read-only), read errors are simply signaled to the IO issuer as long
  234. * as there is no inconsistency between the inode size and the amount of
  235. * data writen in the zone (data_size).
  236. */
  237. data_size = zonefs_check_zone_condition(sb, z, zone);
  238. isize = i_size_read(inode);
  239. if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) &&
  240. !err->write && isize == data_size)
  241. return 0;
  242. /*
  243. * At this point, we detected either a bad zone or an inconsistency
  244. * between the inode size and the amount of data written in the zone.
  245. * For the latter case, the cause may be a write IO error or an external
  246. * action on the device. Two error patterns exist:
  247. * 1) The inode size is lower than the amount of data in the zone:
  248. * a write operation partially failed and data was writen at the end
  249. * of the file. This can happen in the case of a large direct IO
  250. * needing several BIOs and/or write requests to be processed.
  251. * 2) The inode size is larger than the amount of data in the zone:
  252. * this can happen with a deferred write error with the use of the
  253. * device side write cache after getting successful write IO
  254. * completions. Other possibilities are (a) an external corruption,
  255. * e.g. an application reset the zone directly, or (b) the device
  256. * has a serious problem (e.g. firmware bug).
  257. *
  258. * In all cases, warn about inode size inconsistency and handle the
  259. * IO error according to the zone condition and to the mount options.
  260. */
  261. if (zonefs_zone_is_seq(z) && isize != data_size)
  262. zonefs_warn(sb,
  263. "inode %lu: invalid size %lld (should be %lld)\n",
  264. inode->i_ino, isize, data_size);
  265. /*
  266. * First handle bad zones signaled by hardware. The mount options
  267. * errors=zone-ro and errors=zone-offline result in changing the
  268. * zone condition to read-only and offline respectively, as if the
  269. * condition was signaled by the hardware.
  270. */
  271. if ((z->z_flags & ZONEFS_ZONE_OFFLINE) ||
  272. (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)) {
  273. zonefs_warn(sb, "inode %lu: read/write access disabled\n",
  274. inode->i_ino);
  275. if (!(z->z_flags & ZONEFS_ZONE_OFFLINE))
  276. z->z_flags |= ZONEFS_ZONE_OFFLINE;
  277. zonefs_inode_update_mode(inode);
  278. data_size = 0;
  279. } else if ((z->z_flags & ZONEFS_ZONE_READONLY) ||
  280. (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)) {
  281. zonefs_warn(sb, "inode %lu: write access disabled\n",
  282. inode->i_ino);
  283. if (!(z->z_flags & ZONEFS_ZONE_READONLY))
  284. z->z_flags |= ZONEFS_ZONE_READONLY;
  285. zonefs_inode_update_mode(inode);
  286. data_size = isize;
  287. } else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO &&
  288. data_size > isize) {
  289. /* Do not expose garbage data */
  290. data_size = isize;
  291. }
  292. /*
  293. * If the filesystem is mounted with the explicit-open mount option, we
  294. * need to clear the ZONEFS_ZONE_OPEN flag if the zone transitioned to
  295. * the read-only or offline condition, to avoid attempting an explicit
  296. * close of the zone when the inode file is closed.
  297. */
  298. if ((sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) &&
  299. (z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)))
  300. z->z_flags &= ~ZONEFS_ZONE_OPEN;
  301. /*
  302. * If error=remount-ro was specified, any error result in remounting
  303. * the volume as read-only.
  304. */
  305. if ((sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) && !sb_rdonly(sb)) {
  306. zonefs_warn(sb, "remounting filesystem read-only\n");
  307. sb->s_flags |= SB_RDONLY;
  308. }
  309. /*
  310. * Update block usage stats and the inode size to prevent access to
  311. * invalid data.
  312. */
  313. zonefs_update_stats(inode, data_size);
  314. zonefs_i_size_write(inode, data_size);
  315. z->z_wpoffset = data_size;
  316. zonefs_inode_account_active(inode);
  317. return 0;
  318. }
  319. /*
  320. * When an file IO error occurs, check the file zone to see if there is a change
  321. * in the zone condition (e.g. offline or read-only). For a failed write to a
  322. * sequential zone, the zone write pointer position must also be checked to
  323. * eventually correct the file size and zonefs inode write pointer offset
  324. * (which can be out of sync with the drive due to partial write failures).
  325. */
  326. void __zonefs_io_error(struct inode *inode, bool write)
  327. {
  328. struct zonefs_zone *z = zonefs_inode_zone(inode);
  329. struct super_block *sb = inode->i_sb;
  330. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  331. unsigned int noio_flag;
  332. unsigned int nr_zones = 1;
  333. struct zonefs_ioerr_data err = {
  334. .inode = inode,
  335. .write = write,
  336. };
  337. int ret;
  338. /*
  339. * The only files that have more than one zone are conventional zone
  340. * files with aggregated conventional zones, for which the inode zone
  341. * size is always larger than the device zone size.
  342. */
  343. if (z->z_size > bdev_zone_sectors(sb->s_bdev))
  344. nr_zones = z->z_size >>
  345. (sbi->s_zone_sectors_shift + SECTOR_SHIFT);
  346. /*
  347. * Memory allocations in blkdev_report_zones() can trigger a memory
  348. * reclaim which may in turn cause a recursion into zonefs as well as
  349. * struct request allocations for the same device. The former case may
  350. * end up in a deadlock on the inode truncate mutex, while the latter
  351. * may prevent IO forward progress. Executing the report zones under
  352. * the GFP_NOIO context avoids both problems.
  353. */
  354. noio_flag = memalloc_noio_save();
  355. ret = blkdev_report_zones(sb->s_bdev, z->z_sector, nr_zones,
  356. zonefs_io_error_cb, &err);
  357. if (ret != nr_zones)
  358. zonefs_err(sb, "Get inode %lu zone information failed %d\n",
  359. inode->i_ino, ret);
  360. memalloc_noio_restore(noio_flag);
  361. }
  362. static struct kmem_cache *zonefs_inode_cachep;
  363. static struct inode *zonefs_alloc_inode(struct super_block *sb)
  364. {
  365. struct zonefs_inode_info *zi;
  366. zi = alloc_inode_sb(sb, zonefs_inode_cachep, GFP_KERNEL);
  367. if (!zi)
  368. return NULL;
  369. inode_init_once(&zi->i_vnode);
  370. mutex_init(&zi->i_truncate_mutex);
  371. zi->i_wr_refcnt = 0;
  372. return &zi->i_vnode;
  373. }
  374. static void zonefs_free_inode(struct inode *inode)
  375. {
  376. kmem_cache_free(zonefs_inode_cachep, ZONEFS_I(inode));
  377. }
  378. /*
  379. * File system stat.
  380. */
  381. static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
  382. {
  383. struct super_block *sb = dentry->d_sb;
  384. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  385. enum zonefs_ztype t;
  386. buf->f_type = ZONEFS_MAGIC;
  387. buf->f_bsize = sb->s_blocksize;
  388. buf->f_namelen = ZONEFS_NAME_MAX;
  389. spin_lock(&sbi->s_lock);
  390. buf->f_blocks = sbi->s_blocks;
  391. if (WARN_ON(sbi->s_used_blocks > sbi->s_blocks))
  392. buf->f_bfree = 0;
  393. else
  394. buf->f_bfree = buf->f_blocks - sbi->s_used_blocks;
  395. buf->f_bavail = buf->f_bfree;
  396. for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) {
  397. if (sbi->s_zgroup[t].g_nr_zones)
  398. buf->f_files += sbi->s_zgroup[t].g_nr_zones + 1;
  399. }
  400. buf->f_ffree = 0;
  401. spin_unlock(&sbi->s_lock);
  402. buf->f_fsid = uuid_to_fsid(sbi->s_uuid.b);
  403. return 0;
  404. }
  405. enum {
  406. Opt_errors_ro, Opt_errors_zro, Opt_errors_zol, Opt_errors_repair,
  407. Opt_explicit_open, Opt_err,
  408. };
  409. static const match_table_t tokens = {
  410. { Opt_errors_ro, "errors=remount-ro"},
  411. { Opt_errors_zro, "errors=zone-ro"},
  412. { Opt_errors_zol, "errors=zone-offline"},
  413. { Opt_errors_repair, "errors=repair"},
  414. { Opt_explicit_open, "explicit-open" },
  415. { Opt_err, NULL}
  416. };
  417. static int zonefs_parse_options(struct super_block *sb, char *options)
  418. {
  419. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  420. substring_t args[MAX_OPT_ARGS];
  421. char *p;
  422. if (!options)
  423. return 0;
  424. while ((p = strsep(&options, ",")) != NULL) {
  425. int token;
  426. if (!*p)
  427. continue;
  428. token = match_token(p, tokens, args);
  429. switch (token) {
  430. case Opt_errors_ro:
  431. sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
  432. sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_RO;
  433. break;
  434. case Opt_errors_zro:
  435. sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
  436. sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZRO;
  437. break;
  438. case Opt_errors_zol:
  439. sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
  440. sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZOL;
  441. break;
  442. case Opt_errors_repair:
  443. sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
  444. sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_REPAIR;
  445. break;
  446. case Opt_explicit_open:
  447. sbi->s_mount_opts |= ZONEFS_MNTOPT_EXPLICIT_OPEN;
  448. break;
  449. default:
  450. return -EINVAL;
  451. }
  452. }
  453. return 0;
  454. }
  455. static int zonefs_show_options(struct seq_file *seq, struct dentry *root)
  456. {
  457. struct zonefs_sb_info *sbi = ZONEFS_SB(root->d_sb);
  458. if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO)
  459. seq_puts(seq, ",errors=remount-ro");
  460. if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)
  461. seq_puts(seq, ",errors=zone-ro");
  462. if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)
  463. seq_puts(seq, ",errors=zone-offline");
  464. if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR)
  465. seq_puts(seq, ",errors=repair");
  466. return 0;
  467. }
  468. static int zonefs_remount(struct super_block *sb, int *flags, char *data)
  469. {
  470. sync_filesystem(sb);
  471. return zonefs_parse_options(sb, data);
  472. }
  473. static int zonefs_inode_setattr(struct user_namespace *mnt_userns,
  474. struct dentry *dentry, struct iattr *iattr)
  475. {
  476. struct inode *inode = d_inode(dentry);
  477. int ret;
  478. if (unlikely(IS_IMMUTABLE(inode)))
  479. return -EPERM;
  480. ret = setattr_prepare(&init_user_ns, dentry, iattr);
  481. if (ret)
  482. return ret;
  483. /*
  484. * Since files and directories cannot be created nor deleted, do not
  485. * allow setting any write attributes on the sub-directories grouping
  486. * files by zone type.
  487. */
  488. if ((iattr->ia_valid & ATTR_MODE) && S_ISDIR(inode->i_mode) &&
  489. (iattr->ia_mode & 0222))
  490. return -EPERM;
  491. if (((iattr->ia_valid & ATTR_UID) &&
  492. !uid_eq(iattr->ia_uid, inode->i_uid)) ||
  493. ((iattr->ia_valid & ATTR_GID) &&
  494. !gid_eq(iattr->ia_gid, inode->i_gid))) {
  495. ret = dquot_transfer(mnt_userns, inode, iattr);
  496. if (ret)
  497. return ret;
  498. }
  499. if (iattr->ia_valid & ATTR_SIZE) {
  500. ret = zonefs_file_truncate(inode, iattr->ia_size);
  501. if (ret)
  502. return ret;
  503. }
  504. setattr_copy(&init_user_ns, inode, iattr);
  505. return 0;
  506. }
  507. static const struct inode_operations zonefs_dir_inode_operations = {
  508. .lookup = simple_lookup,
  509. .setattr = zonefs_inode_setattr,
  510. };
  511. static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode,
  512. enum zonefs_ztype ztype)
  513. {
  514. struct super_block *sb = parent->i_sb;
  515. inode->i_ino = bdev_nr_zones(sb->s_bdev) + ztype + 1;
  516. inode_init_owner(&init_user_ns, inode, parent, S_IFDIR | 0555);
  517. inode->i_op = &zonefs_dir_inode_operations;
  518. inode->i_fop = &simple_dir_operations;
  519. set_nlink(inode, 2);
  520. inc_nlink(parent);
  521. }
  522. static const struct inode_operations zonefs_file_inode_operations = {
  523. .setattr = zonefs_inode_setattr,
  524. };
  525. static void zonefs_init_file_inode(struct inode *inode,
  526. struct zonefs_zone *z)
  527. {
  528. struct super_block *sb = inode->i_sb;
  529. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  530. inode->i_private = z;
  531. inode->i_ino = z->z_sector >> sbi->s_zone_sectors_shift;
  532. inode->i_mode = S_IFREG | sbi->s_perm;
  533. inode->i_uid = sbi->s_uid;
  534. inode->i_gid = sbi->s_gid;
  535. inode->i_size = z->z_wpoffset;
  536. inode->i_blocks = z->z_capacity >> SECTOR_SHIFT;
  537. inode->i_op = &zonefs_file_inode_operations;
  538. inode->i_fop = &zonefs_file_operations;
  539. inode->i_mapping->a_ops = &zonefs_file_aops;
  540. /* Update the inode access rights depending on the zone condition */
  541. z->z_flags |= ZONEFS_ZONE_INIT_MODE;
  542. zonefs_inode_update_mode(inode);
  543. }
  544. static struct dentry *zonefs_create_inode(struct dentry *parent,
  545. const char *name,
  546. struct zonefs_zone *z,
  547. enum zonefs_ztype ztype)
  548. {
  549. struct inode *dir = d_inode(parent);
  550. struct dentry *dentry;
  551. struct inode *inode;
  552. int ret = -ENOMEM;
  553. dentry = d_alloc_name(parent, name);
  554. if (!dentry)
  555. return ERR_PTR(ret);
  556. inode = new_inode(parent->d_sb);
  557. if (!inode)
  558. goto dput;
  559. inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime;
  560. if (z)
  561. zonefs_init_file_inode(inode, z);
  562. else
  563. zonefs_init_dir_inode(dir, inode, ztype);
  564. d_add(dentry, inode);
  565. dir->i_size++;
  566. return dentry;
  567. dput:
  568. dput(dentry);
  569. return ERR_PTR(ret);
  570. }
  571. struct zonefs_zone_data {
  572. struct super_block *sb;
  573. unsigned int nr_zones[ZONEFS_ZTYPE_MAX];
  574. sector_t cnv_zone_start;
  575. struct blk_zone *zones;
  576. };
  577. /*
  578. * Create the inodes for a zone group.
  579. */
  580. static int zonefs_create_zgroup_inodes(struct super_block *sb,
  581. enum zonefs_ztype ztype)
  582. {
  583. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  584. struct zonefs_zone_group *zgroup = &sbi->s_zgroup[ztype];
  585. struct dentry *dir, *dent;
  586. char *file_name;
  587. int i, ret = 0;
  588. if (!zgroup)
  589. return -ENOMEM;
  590. /* If the group is empty, there is nothing to do */
  591. if (!zgroup->g_nr_zones)
  592. return 0;
  593. file_name = kmalloc(ZONEFS_NAME_MAX, GFP_KERNEL);
  594. if (!file_name)
  595. return -ENOMEM;
  596. dir = zonefs_create_inode(sb->s_root, zonefs_zgroup_name(ztype),
  597. NULL, ztype);
  598. if (IS_ERR(dir)) {
  599. ret = PTR_ERR(dir);
  600. goto free;
  601. }
  602. for (i = 0; i < zgroup->g_nr_zones; i++) {
  603. /* Use the zone number within its group as the file name */
  604. snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", i);
  605. dent = zonefs_create_inode(dir, file_name,
  606. &zgroup->g_zones[i], ztype);
  607. if (IS_ERR(dent)) {
  608. ret = PTR_ERR(dent);
  609. break;
  610. }
  611. }
  612. free:
  613. kfree(file_name);
  614. return ret;
  615. }
  616. static int zonefs_get_zone_info_cb(struct blk_zone *zone, unsigned int idx,
  617. void *data)
  618. {
  619. struct zonefs_zone_data *zd = data;
  620. struct super_block *sb = zd->sb;
  621. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  622. /*
  623. * We do not care about the first zone: it contains the super block
  624. * and not exposed as a file.
  625. */
  626. if (!idx)
  627. return 0;
  628. /*
  629. * Count the number of zones that will be exposed as files.
  630. * For sequential zones, we always have as many files as zones.
  631. * FOr conventional zones, the number of files depends on if we have
  632. * conventional zones aggregation enabled.
  633. */
  634. switch (zone->type) {
  635. case BLK_ZONE_TYPE_CONVENTIONAL:
  636. if (sbi->s_features & ZONEFS_F_AGGRCNV) {
  637. /* One file per set of contiguous conventional zones */
  638. if (!(sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones) ||
  639. zone->start != zd->cnv_zone_start)
  640. sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++;
  641. zd->cnv_zone_start = zone->start + zone->len;
  642. } else {
  643. /* One file per zone */
  644. sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++;
  645. }
  646. break;
  647. case BLK_ZONE_TYPE_SEQWRITE_REQ:
  648. case BLK_ZONE_TYPE_SEQWRITE_PREF:
  649. sbi->s_zgroup[ZONEFS_ZTYPE_SEQ].g_nr_zones++;
  650. break;
  651. default:
  652. zonefs_err(zd->sb, "Unsupported zone type 0x%x\n",
  653. zone->type);
  654. return -EIO;
  655. }
  656. memcpy(&zd->zones[idx], zone, sizeof(struct blk_zone));
  657. return 0;
  658. }
  659. static int zonefs_get_zone_info(struct zonefs_zone_data *zd)
  660. {
  661. struct block_device *bdev = zd->sb->s_bdev;
  662. int ret;
  663. zd->zones = kvcalloc(bdev_nr_zones(bdev), sizeof(struct blk_zone),
  664. GFP_KERNEL);
  665. if (!zd->zones)
  666. return -ENOMEM;
  667. /* Get zones information from the device */
  668. ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES,
  669. zonefs_get_zone_info_cb, zd);
  670. if (ret < 0) {
  671. zonefs_err(zd->sb, "Zone report failed %d\n", ret);
  672. return ret;
  673. }
  674. if (ret != bdev_nr_zones(bdev)) {
  675. zonefs_err(zd->sb, "Invalid zone report (%d/%u zones)\n",
  676. ret, bdev_nr_zones(bdev));
  677. return -EIO;
  678. }
  679. return 0;
  680. }
  681. static inline void zonefs_free_zone_info(struct zonefs_zone_data *zd)
  682. {
  683. kvfree(zd->zones);
  684. }
  685. /*
  686. * Create a zone group and populate it with zone files.
  687. */
  688. static int zonefs_init_zgroup(struct super_block *sb,
  689. struct zonefs_zone_data *zd,
  690. enum zonefs_ztype ztype)
  691. {
  692. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  693. struct zonefs_zone_group *zgroup = &sbi->s_zgroup[ztype];
  694. struct blk_zone *zone, *next, *end;
  695. struct zonefs_zone *z;
  696. unsigned int n = 0;
  697. int ret;
  698. /* Allocate the zone group. If it is empty, we have nothing to do. */
  699. if (!zgroup->g_nr_zones)
  700. return 0;
  701. zgroup->g_zones = kvcalloc(zgroup->g_nr_zones,
  702. sizeof(struct zonefs_zone), GFP_KERNEL);
  703. if (!zgroup->g_zones)
  704. return -ENOMEM;
  705. /*
  706. * Initialize the zone groups using the device zone information.
  707. * We always skip the first zone as it contains the super block
  708. * and is not use to back a file.
  709. */
  710. end = zd->zones + bdev_nr_zones(sb->s_bdev);
  711. for (zone = &zd->zones[1]; zone < end; zone = next) {
  712. next = zone + 1;
  713. if (zonefs_zone_type(zone) != ztype)
  714. continue;
  715. if (WARN_ON_ONCE(n >= zgroup->g_nr_zones))
  716. return -EINVAL;
  717. /*
  718. * For conventional zones, contiguous zones can be aggregated
  719. * together to form larger files. Note that this overwrites the
  720. * length of the first zone of the set of contiguous zones
  721. * aggregated together. If one offline or read-only zone is
  722. * found, assume that all zones aggregated have the same
  723. * condition.
  724. */
  725. if (ztype == ZONEFS_ZTYPE_CNV &&
  726. (sbi->s_features & ZONEFS_F_AGGRCNV)) {
  727. for (; next < end; next++) {
  728. if (zonefs_zone_type(next) != ztype)
  729. break;
  730. zone->len += next->len;
  731. zone->capacity += next->capacity;
  732. if (next->cond == BLK_ZONE_COND_READONLY &&
  733. zone->cond != BLK_ZONE_COND_OFFLINE)
  734. zone->cond = BLK_ZONE_COND_READONLY;
  735. else if (next->cond == BLK_ZONE_COND_OFFLINE)
  736. zone->cond = BLK_ZONE_COND_OFFLINE;
  737. }
  738. }
  739. z = &zgroup->g_zones[n];
  740. if (ztype == ZONEFS_ZTYPE_CNV)
  741. z->z_flags |= ZONEFS_ZONE_CNV;
  742. z->z_sector = zone->start;
  743. z->z_size = zone->len << SECTOR_SHIFT;
  744. if (z->z_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT &&
  745. !(sbi->s_features & ZONEFS_F_AGGRCNV)) {
  746. zonefs_err(sb,
  747. "Invalid zone size %llu (device zone sectors %llu)\n",
  748. z->z_size,
  749. bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT);
  750. return -EINVAL;
  751. }
  752. z->z_capacity = min_t(loff_t, MAX_LFS_FILESIZE,
  753. zone->capacity << SECTOR_SHIFT);
  754. z->z_wpoffset = zonefs_check_zone_condition(sb, z, zone);
  755. sb->s_maxbytes = max(z->z_capacity, sb->s_maxbytes);
  756. sbi->s_blocks += z->z_capacity >> sb->s_blocksize_bits;
  757. sbi->s_used_blocks += z->z_wpoffset >> sb->s_blocksize_bits;
  758. /*
  759. * For sequential zones, make sure that any open zone is closed
  760. * first to ensure that the initial number of open zones is 0,
  761. * in sync with the open zone accounting done when the mount
  762. * option ZONEFS_MNTOPT_EXPLICIT_OPEN is used.
  763. */
  764. if (ztype == ZONEFS_ZTYPE_SEQ &&
  765. (zone->cond == BLK_ZONE_COND_IMP_OPEN ||
  766. zone->cond == BLK_ZONE_COND_EXP_OPEN)) {
  767. ret = zonefs_zone_mgmt(sb, z, REQ_OP_ZONE_CLOSE);
  768. if (ret)
  769. return ret;
  770. }
  771. zonefs_account_active(sb, z);
  772. n++;
  773. }
  774. if (WARN_ON_ONCE(n != zgroup->g_nr_zones))
  775. return -EINVAL;
  776. zonefs_info(sb, "Zone group \"%s\" has %u file%s\n",
  777. zonefs_zgroup_name(ztype),
  778. zgroup->g_nr_zones,
  779. zgroup->g_nr_zones > 1 ? "s" : "");
  780. return 0;
  781. }
  782. static void zonefs_free_zgroups(struct super_block *sb)
  783. {
  784. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  785. enum zonefs_ztype ztype;
  786. if (!sbi)
  787. return;
  788. for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
  789. kvfree(sbi->s_zgroup[ztype].g_zones);
  790. sbi->s_zgroup[ztype].g_zones = NULL;
  791. }
  792. }
  793. /*
  794. * Create a zone group and populate it with zone files.
  795. */
  796. static int zonefs_init_zgroups(struct super_block *sb)
  797. {
  798. struct zonefs_zone_data zd;
  799. enum zonefs_ztype ztype;
  800. int ret;
  801. /* First get the device zone information */
  802. memset(&zd, 0, sizeof(struct zonefs_zone_data));
  803. zd.sb = sb;
  804. ret = zonefs_get_zone_info(&zd);
  805. if (ret)
  806. goto cleanup;
  807. /* Allocate and initialize the zone groups */
  808. for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
  809. ret = zonefs_init_zgroup(sb, &zd, ztype);
  810. if (ret) {
  811. zonefs_info(sb,
  812. "Zone group \"%s\" initialization failed\n",
  813. zonefs_zgroup_name(ztype));
  814. break;
  815. }
  816. }
  817. cleanup:
  818. zonefs_free_zone_info(&zd);
  819. if (ret)
  820. zonefs_free_zgroups(sb);
  821. return ret;
  822. }
  823. /*
  824. * Read super block information from the device.
  825. */
  826. static int zonefs_read_super(struct super_block *sb)
  827. {
  828. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  829. struct zonefs_super *super;
  830. u32 crc, stored_crc;
  831. struct page *page;
  832. struct bio_vec bio_vec;
  833. struct bio bio;
  834. int ret;
  835. page = alloc_page(GFP_KERNEL);
  836. if (!page)
  837. return -ENOMEM;
  838. bio_init(&bio, sb->s_bdev, &bio_vec, 1, REQ_OP_READ);
  839. bio.bi_iter.bi_sector = 0;
  840. bio_add_page(&bio, page, PAGE_SIZE, 0);
  841. ret = submit_bio_wait(&bio);
  842. if (ret)
  843. goto free_page;
  844. super = page_address(page);
  845. ret = -EINVAL;
  846. if (le32_to_cpu(super->s_magic) != ZONEFS_MAGIC)
  847. goto free_page;
  848. stored_crc = le32_to_cpu(super->s_crc);
  849. super->s_crc = 0;
  850. crc = crc32(~0U, (unsigned char *)super, sizeof(struct zonefs_super));
  851. if (crc != stored_crc) {
  852. zonefs_err(sb, "Invalid checksum (Expected 0x%08x, got 0x%08x)",
  853. crc, stored_crc);
  854. goto free_page;
  855. }
  856. sbi->s_features = le64_to_cpu(super->s_features);
  857. if (sbi->s_features & ~ZONEFS_F_DEFINED_FEATURES) {
  858. zonefs_err(sb, "Unknown features set 0x%llx\n",
  859. sbi->s_features);
  860. goto free_page;
  861. }
  862. if (sbi->s_features & ZONEFS_F_UID) {
  863. sbi->s_uid = make_kuid(current_user_ns(),
  864. le32_to_cpu(super->s_uid));
  865. if (!uid_valid(sbi->s_uid)) {
  866. zonefs_err(sb, "Invalid UID feature\n");
  867. goto free_page;
  868. }
  869. }
  870. if (sbi->s_features & ZONEFS_F_GID) {
  871. sbi->s_gid = make_kgid(current_user_ns(),
  872. le32_to_cpu(super->s_gid));
  873. if (!gid_valid(sbi->s_gid)) {
  874. zonefs_err(sb, "Invalid GID feature\n");
  875. goto free_page;
  876. }
  877. }
  878. if (sbi->s_features & ZONEFS_F_PERM)
  879. sbi->s_perm = le32_to_cpu(super->s_perm);
  880. if (memchr_inv(super->s_reserved, 0, sizeof(super->s_reserved))) {
  881. zonefs_err(sb, "Reserved area is being used\n");
  882. goto free_page;
  883. }
  884. import_uuid(&sbi->s_uuid, super->s_uuid);
  885. ret = 0;
  886. free_page:
  887. __free_page(page);
  888. return ret;
  889. }
  890. static const struct super_operations zonefs_sops = {
  891. .alloc_inode = zonefs_alloc_inode,
  892. .free_inode = zonefs_free_inode,
  893. .statfs = zonefs_statfs,
  894. .remount_fs = zonefs_remount,
  895. .show_options = zonefs_show_options,
  896. };
  897. /*
  898. * Check that the device is zoned. If it is, get the list of zones and create
  899. * sub-directories and files according to the device zone configuration and
  900. * format options.
  901. */
  902. static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
  903. {
  904. struct zonefs_sb_info *sbi;
  905. struct inode *inode;
  906. enum zonefs_ztype t;
  907. int ret;
  908. if (!bdev_is_zoned(sb->s_bdev)) {
  909. zonefs_err(sb, "Not a zoned block device\n");
  910. return -EINVAL;
  911. }
  912. /*
  913. * Initialize super block information: the maximum file size is updated
  914. * when the zone files are created so that the format option
  915. * ZONEFS_F_AGGRCNV which increases the maximum file size of a file
  916. * beyond the zone size is taken into account.
  917. */
  918. sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
  919. if (!sbi)
  920. return -ENOMEM;
  921. spin_lock_init(&sbi->s_lock);
  922. sb->s_fs_info = sbi;
  923. sb->s_magic = ZONEFS_MAGIC;
  924. sb->s_maxbytes = 0;
  925. sb->s_op = &zonefs_sops;
  926. sb->s_time_gran = 1;
  927. /*
  928. * The block size is set to the device zone write granularity to ensure
  929. * that write operations are always aligned according to the device
  930. * interface constraints.
  931. */
  932. sb_set_blocksize(sb, bdev_zone_write_granularity(sb->s_bdev));
  933. sbi->s_zone_sectors_shift = ilog2(bdev_zone_sectors(sb->s_bdev));
  934. sbi->s_uid = GLOBAL_ROOT_UID;
  935. sbi->s_gid = GLOBAL_ROOT_GID;
  936. sbi->s_perm = 0640;
  937. sbi->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO;
  938. atomic_set(&sbi->s_wro_seq_files, 0);
  939. sbi->s_max_wro_seq_files = bdev_max_open_zones(sb->s_bdev);
  940. atomic_set(&sbi->s_active_seq_files, 0);
  941. sbi->s_max_active_seq_files = bdev_max_active_zones(sb->s_bdev);
  942. ret = zonefs_read_super(sb);
  943. if (ret)
  944. return ret;
  945. ret = zonefs_parse_options(sb, data);
  946. if (ret)
  947. return ret;
  948. zonefs_info(sb, "Mounting %u zones", bdev_nr_zones(sb->s_bdev));
  949. if (!sbi->s_max_wro_seq_files &&
  950. !sbi->s_max_active_seq_files &&
  951. sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
  952. zonefs_info(sb,
  953. "No open and active zone limits. Ignoring explicit_open mount option\n");
  954. sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN;
  955. }
  956. /* Initialize the zone groups */
  957. ret = zonefs_init_zgroups(sb);
  958. if (ret)
  959. goto cleanup;
  960. /* Create root directory inode */
  961. ret = -ENOMEM;
  962. inode = new_inode(sb);
  963. if (!inode)
  964. goto cleanup;
  965. inode->i_ino = bdev_nr_zones(sb->s_bdev);
  966. inode->i_mode = S_IFDIR | 0555;
  967. inode->i_ctime = inode->i_mtime = inode->i_atime = current_time(inode);
  968. inode->i_op = &zonefs_dir_inode_operations;
  969. inode->i_fop = &simple_dir_operations;
  970. set_nlink(inode, 2);
  971. sb->s_root = d_make_root(inode);
  972. if (!sb->s_root)
  973. goto cleanup;
  974. /* Create and populate files in zone groups directories */
  975. for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) {
  976. ret = zonefs_create_zgroup_inodes(sb, t);
  977. if (ret)
  978. goto cleanup;
  979. }
  980. ret = zonefs_sysfs_register(sb);
  981. if (ret)
  982. goto cleanup;
  983. return 0;
  984. cleanup:
  985. zonefs_free_zgroups(sb);
  986. return ret;
  987. }
  988. static struct dentry *zonefs_mount(struct file_system_type *fs_type,
  989. int flags, const char *dev_name, void *data)
  990. {
  991. return mount_bdev(fs_type, flags, dev_name, data, zonefs_fill_super);
  992. }
  993. static void zonefs_kill_super(struct super_block *sb)
  994. {
  995. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  996. if (sb->s_root)
  997. d_genocide(sb->s_root);
  998. zonefs_sysfs_unregister(sb);
  999. zonefs_free_zgroups(sb);
  1000. kill_block_super(sb);
  1001. kfree(sbi);
  1002. }
  1003. /*
  1004. * File system definition and registration.
  1005. */
  1006. static struct file_system_type zonefs_type = {
  1007. .owner = THIS_MODULE,
  1008. .name = "zonefs",
  1009. .mount = zonefs_mount,
  1010. .kill_sb = zonefs_kill_super,
  1011. .fs_flags = FS_REQUIRES_DEV,
  1012. };
  1013. static int __init zonefs_init_inodecache(void)
  1014. {
  1015. zonefs_inode_cachep = kmem_cache_create("zonefs_inode_cache",
  1016. sizeof(struct zonefs_inode_info), 0,
  1017. (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT),
  1018. NULL);
  1019. if (zonefs_inode_cachep == NULL)
  1020. return -ENOMEM;
  1021. return 0;
  1022. }
  1023. static void zonefs_destroy_inodecache(void)
  1024. {
  1025. /*
  1026. * Make sure all delayed rcu free inodes are flushed before we
  1027. * destroy the inode cache.
  1028. */
  1029. rcu_barrier();
  1030. kmem_cache_destroy(zonefs_inode_cachep);
  1031. }
  1032. static int __init zonefs_init(void)
  1033. {
  1034. int ret;
  1035. BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE);
  1036. ret = zonefs_init_inodecache();
  1037. if (ret)
  1038. return ret;
  1039. ret = zonefs_sysfs_init();
  1040. if (ret)
  1041. goto destroy_inodecache;
  1042. ret = register_filesystem(&zonefs_type);
  1043. if (ret)
  1044. goto sysfs_exit;
  1045. return 0;
  1046. sysfs_exit:
  1047. zonefs_sysfs_exit();
  1048. destroy_inodecache:
  1049. zonefs_destroy_inodecache();
  1050. return ret;
  1051. }
  1052. static void __exit zonefs_exit(void)
  1053. {
  1054. unregister_filesystem(&zonefs_type);
  1055. zonefs_sysfs_exit();
  1056. zonefs_destroy_inodecache();
  1057. }
  1058. MODULE_AUTHOR("Damien Le Moal");
  1059. MODULE_DESCRIPTION("Zone file system for zoned block devices");
  1060. MODULE_LICENSE("GPL");
  1061. MODULE_ALIAS_FS("zonefs");
  1062. module_init(zonefs_init);
  1063. module_exit(zonefs_exit);