export.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/ceph/ceph_debug.h>
  3. #include <linux/exportfs.h>
  4. #include <linux/slab.h>
  5. #include <asm/unaligned.h>
  6. #include "super.h"
  7. #include "mds_client.h"
  8. /*
  9. * Basic fh
  10. */
  11. struct ceph_nfs_fh {
  12. u64 ino;
  13. } __attribute__ ((packed));
  14. /*
  15. * Larger fh that includes parent ino.
  16. */
  17. struct ceph_nfs_confh {
  18. u64 ino, parent_ino;
  19. } __attribute__ ((packed));
  20. /*
  21. * fh for snapped inode
  22. */
  23. struct ceph_nfs_snapfh {
  24. u64 ino;
  25. u64 snapid;
  26. u64 parent_ino;
  27. u32 hash;
  28. } __attribute__ ((packed));
  29. static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len,
  30. struct inode *parent_inode)
  31. {
  32. static const int snap_handle_length =
  33. sizeof(struct ceph_nfs_snapfh) >> 2;
  34. struct ceph_nfs_snapfh *sfh = (void *)rawfh;
  35. u64 snapid = ceph_snap(inode);
  36. int ret;
  37. bool no_parent = true;
  38. if (*max_len < snap_handle_length) {
  39. *max_len = snap_handle_length;
  40. ret = FILEID_INVALID;
  41. goto out;
  42. }
  43. ret = -EINVAL;
  44. if (snapid != CEPH_SNAPDIR) {
  45. struct inode *dir;
  46. struct dentry *dentry = d_find_alias(inode);
  47. if (!dentry)
  48. goto out;
  49. rcu_read_lock();
  50. dir = d_inode_rcu(dentry->d_parent);
  51. if (ceph_snap(dir) != CEPH_SNAPDIR) {
  52. sfh->parent_ino = ceph_ino(dir);
  53. sfh->hash = ceph_dentry_hash(dir, dentry);
  54. no_parent = false;
  55. }
  56. rcu_read_unlock();
  57. dput(dentry);
  58. }
  59. if (no_parent) {
  60. if (!S_ISDIR(inode->i_mode))
  61. goto out;
  62. sfh->parent_ino = sfh->ino;
  63. sfh->hash = 0;
  64. }
  65. sfh->ino = ceph_ino(inode);
  66. sfh->snapid = snapid;
  67. *max_len = snap_handle_length;
  68. ret = FILEID_BTRFS_WITH_PARENT;
  69. out:
  70. dout("encode_snapfh %llx.%llx ret=%d\n", ceph_vinop(inode), ret);
  71. return ret;
  72. }
  73. static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
  74. struct inode *parent_inode)
  75. {
  76. static const int handle_length =
  77. sizeof(struct ceph_nfs_fh) >> 2;
  78. static const int connected_handle_length =
  79. sizeof(struct ceph_nfs_confh) >> 2;
  80. int type;
  81. if (ceph_snap(inode) != CEPH_NOSNAP)
  82. return ceph_encode_snapfh(inode, rawfh, max_len, parent_inode);
  83. if (parent_inode && (*max_len < connected_handle_length)) {
  84. *max_len = connected_handle_length;
  85. return FILEID_INVALID;
  86. } else if (*max_len < handle_length) {
  87. *max_len = handle_length;
  88. return FILEID_INVALID;
  89. }
  90. if (parent_inode) {
  91. struct ceph_nfs_confh *cfh = (void *)rawfh;
  92. dout("encode_fh %llx with parent %llx\n",
  93. ceph_ino(inode), ceph_ino(parent_inode));
  94. cfh->ino = ceph_ino(inode);
  95. cfh->parent_ino = ceph_ino(parent_inode);
  96. *max_len = connected_handle_length;
  97. type = FILEID_INO32_GEN_PARENT;
  98. } else {
  99. struct ceph_nfs_fh *fh = (void *)rawfh;
  100. dout("encode_fh %llx\n", ceph_ino(inode));
  101. fh->ino = ceph_ino(inode);
  102. *max_len = handle_length;
  103. type = FILEID_INO32_GEN;
  104. }
  105. return type;
  106. }
  107. static struct inode *__lookup_inode(struct super_block *sb, u64 ino)
  108. {
  109. struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
  110. struct inode *inode;
  111. struct ceph_vino vino;
  112. int err;
  113. vino.ino = ino;
  114. vino.snap = CEPH_NOSNAP;
  115. if (ceph_vino_is_reserved(vino))
  116. return ERR_PTR(-ESTALE);
  117. inode = ceph_find_inode(sb, vino);
  118. if (!inode) {
  119. struct ceph_mds_request *req;
  120. int mask;
  121. req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
  122. USE_ANY_MDS);
  123. if (IS_ERR(req))
  124. return ERR_CAST(req);
  125. mask = CEPH_STAT_CAP_INODE;
  126. if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
  127. mask |= CEPH_CAP_XATTR_SHARED;
  128. req->r_args.lookupino.mask = cpu_to_le32(mask);
  129. req->r_ino1 = vino;
  130. req->r_num_caps = 1;
  131. err = ceph_mdsc_do_request(mdsc, NULL, req);
  132. inode = req->r_target_inode;
  133. if (inode)
  134. ihold(inode);
  135. ceph_mdsc_put_request(req);
  136. if (!inode)
  137. return err < 0 ? ERR_PTR(err) : ERR_PTR(-ESTALE);
  138. } else {
  139. if (ceph_inode_is_shutdown(inode)) {
  140. iput(inode);
  141. return ERR_PTR(-ESTALE);
  142. }
  143. }
  144. return inode;
  145. }
  146. struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino)
  147. {
  148. struct inode *inode = __lookup_inode(sb, ino);
  149. if (IS_ERR(inode))
  150. return inode;
  151. if (inode->i_nlink == 0) {
  152. iput(inode);
  153. return ERR_PTR(-ESTALE);
  154. }
  155. return inode;
  156. }
  157. static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
  158. {
  159. struct inode *inode = __lookup_inode(sb, ino);
  160. struct ceph_inode_info *ci = ceph_inode(inode);
  161. int err;
  162. if (IS_ERR(inode))
  163. return ERR_CAST(inode);
  164. /* We need LINK caps to reliably check i_nlink */
  165. err = ceph_do_getattr(inode, CEPH_CAP_LINK_SHARED, false);
  166. if (err) {
  167. iput(inode);
  168. return ERR_PTR(err);
  169. }
  170. /* -ESTALE if inode as been unlinked and no file is open */
  171. if ((inode->i_nlink == 0) && !__ceph_is_file_opened(ci)) {
  172. iput(inode);
  173. return ERR_PTR(-ESTALE);
  174. }
  175. return d_obtain_alias(inode);
  176. }
  177. static struct dentry *__snapfh_to_dentry(struct super_block *sb,
  178. struct ceph_nfs_snapfh *sfh,
  179. bool want_parent)
  180. {
  181. struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
  182. struct ceph_mds_request *req;
  183. struct inode *inode;
  184. struct ceph_vino vino;
  185. int mask;
  186. int err;
  187. bool unlinked = false;
  188. if (want_parent) {
  189. vino.ino = sfh->parent_ino;
  190. if (sfh->snapid == CEPH_SNAPDIR)
  191. vino.snap = CEPH_NOSNAP;
  192. else if (sfh->ino == sfh->parent_ino)
  193. vino.snap = CEPH_SNAPDIR;
  194. else
  195. vino.snap = sfh->snapid;
  196. } else {
  197. vino.ino = sfh->ino;
  198. vino.snap = sfh->snapid;
  199. }
  200. if (ceph_vino_is_reserved(vino))
  201. return ERR_PTR(-ESTALE);
  202. inode = ceph_find_inode(sb, vino);
  203. if (inode) {
  204. if (ceph_inode_is_shutdown(inode)) {
  205. iput(inode);
  206. return ERR_PTR(-ESTALE);
  207. }
  208. return d_obtain_alias(inode);
  209. }
  210. req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
  211. USE_ANY_MDS);
  212. if (IS_ERR(req))
  213. return ERR_CAST(req);
  214. mask = CEPH_STAT_CAP_INODE;
  215. if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
  216. mask |= CEPH_CAP_XATTR_SHARED;
  217. req->r_args.lookupino.mask = cpu_to_le32(mask);
  218. if (vino.snap < CEPH_NOSNAP) {
  219. req->r_args.lookupino.snapid = cpu_to_le64(vino.snap);
  220. if (!want_parent && sfh->ino != sfh->parent_ino) {
  221. req->r_args.lookupino.parent =
  222. cpu_to_le64(sfh->parent_ino);
  223. req->r_args.lookupino.hash =
  224. cpu_to_le32(sfh->hash);
  225. }
  226. }
  227. req->r_ino1 = vino;
  228. req->r_num_caps = 1;
  229. err = ceph_mdsc_do_request(mdsc, NULL, req);
  230. inode = req->r_target_inode;
  231. if (inode) {
  232. if (vino.snap == CEPH_SNAPDIR) {
  233. if (inode->i_nlink == 0)
  234. unlinked = true;
  235. inode = ceph_get_snapdir(inode);
  236. } else if (ceph_snap(inode) == vino.snap) {
  237. ihold(inode);
  238. } else {
  239. /* mds does not support lookup snapped inode */
  240. inode = ERR_PTR(-EOPNOTSUPP);
  241. }
  242. } else {
  243. inode = ERR_PTR(-ESTALE);
  244. }
  245. ceph_mdsc_put_request(req);
  246. if (want_parent) {
  247. dout("snapfh_to_parent %llx.%llx\n err=%d\n",
  248. vino.ino, vino.snap, err);
  249. } else {
  250. dout("snapfh_to_dentry %llx.%llx parent %llx hash %x err=%d",
  251. vino.ino, vino.snap, sfh->parent_ino, sfh->hash, err);
  252. }
  253. if (IS_ERR(inode))
  254. return ERR_CAST(inode);
  255. /* see comments in ceph_get_parent() */
  256. return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode);
  257. }
  258. /*
  259. * convert regular fh to dentry
  260. */
  261. static struct dentry *ceph_fh_to_dentry(struct super_block *sb,
  262. struct fid *fid,
  263. int fh_len, int fh_type)
  264. {
  265. struct ceph_nfs_fh *fh = (void *)fid->raw;
  266. if (fh_type == FILEID_BTRFS_WITH_PARENT) {
  267. struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
  268. return __snapfh_to_dentry(sb, sfh, false);
  269. }
  270. if (fh_type != FILEID_INO32_GEN &&
  271. fh_type != FILEID_INO32_GEN_PARENT)
  272. return NULL;
  273. if (fh_len < sizeof(*fh) / 4)
  274. return NULL;
  275. dout("fh_to_dentry %llx\n", fh->ino);
  276. return __fh_to_dentry(sb, fh->ino);
  277. }
  278. static struct dentry *__get_parent(struct super_block *sb,
  279. struct dentry *child, u64 ino)
  280. {
  281. struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
  282. struct ceph_mds_request *req;
  283. struct inode *inode;
  284. int mask;
  285. int err;
  286. req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT,
  287. USE_ANY_MDS);
  288. if (IS_ERR(req))
  289. return ERR_CAST(req);
  290. if (child) {
  291. req->r_inode = d_inode(child);
  292. ihold(d_inode(child));
  293. } else {
  294. req->r_ino1 = (struct ceph_vino) {
  295. .ino = ino,
  296. .snap = CEPH_NOSNAP,
  297. };
  298. }
  299. mask = CEPH_STAT_CAP_INODE;
  300. if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
  301. mask |= CEPH_CAP_XATTR_SHARED;
  302. req->r_args.getattr.mask = cpu_to_le32(mask);
  303. req->r_num_caps = 1;
  304. err = ceph_mdsc_do_request(mdsc, NULL, req);
  305. if (err) {
  306. ceph_mdsc_put_request(req);
  307. return ERR_PTR(err);
  308. }
  309. inode = req->r_target_inode;
  310. if (inode)
  311. ihold(inode);
  312. ceph_mdsc_put_request(req);
  313. if (!inode)
  314. return ERR_PTR(-ENOENT);
  315. return d_obtain_alias(inode);
  316. }
  317. static struct dentry *ceph_get_parent(struct dentry *child)
  318. {
  319. struct inode *inode = d_inode(child);
  320. struct dentry *dn;
  321. if (ceph_snap(inode) != CEPH_NOSNAP) {
  322. struct inode* dir;
  323. bool unlinked = false;
  324. /* do not support non-directory */
  325. if (!d_is_dir(child)) {
  326. dn = ERR_PTR(-EINVAL);
  327. goto out;
  328. }
  329. dir = __lookup_inode(inode->i_sb, ceph_ino(inode));
  330. if (IS_ERR(dir)) {
  331. dn = ERR_CAST(dir);
  332. goto out;
  333. }
  334. /* There can be multiple paths to access snapped inode.
  335. * For simplicity, treat snapdir of head inode as parent */
  336. if (ceph_snap(inode) != CEPH_SNAPDIR) {
  337. struct inode *snapdir = ceph_get_snapdir(dir);
  338. if (dir->i_nlink == 0)
  339. unlinked = true;
  340. iput(dir);
  341. if (IS_ERR(snapdir)) {
  342. dn = ERR_CAST(snapdir);
  343. goto out;
  344. }
  345. dir = snapdir;
  346. }
  347. /* If directory has already been deleted, futher get_parent
  348. * will fail. Do not mark snapdir dentry as disconnected,
  349. * this prevent exportfs from doing futher get_parent. */
  350. if (unlinked)
  351. dn = d_obtain_root(dir);
  352. else
  353. dn = d_obtain_alias(dir);
  354. } else {
  355. dn = __get_parent(child->d_sb, child, 0);
  356. }
  357. out:
  358. dout("get_parent %p ino %llx.%llx err=%ld\n",
  359. child, ceph_vinop(inode), (long)PTR_ERR_OR_ZERO(dn));
  360. return dn;
  361. }
  362. /*
  363. * convert regular fh to parent
  364. */
  365. static struct dentry *ceph_fh_to_parent(struct super_block *sb,
  366. struct fid *fid,
  367. int fh_len, int fh_type)
  368. {
  369. struct ceph_nfs_confh *cfh = (void *)fid->raw;
  370. struct dentry *dentry;
  371. if (fh_type == FILEID_BTRFS_WITH_PARENT) {
  372. struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
  373. return __snapfh_to_dentry(sb, sfh, true);
  374. }
  375. if (fh_type != FILEID_INO32_GEN_PARENT)
  376. return NULL;
  377. if (fh_len < sizeof(*cfh) / 4)
  378. return NULL;
  379. dout("fh_to_parent %llx\n", cfh->parent_ino);
  380. dentry = __get_parent(sb, NULL, cfh->ino);
  381. if (unlikely(dentry == ERR_PTR(-ENOENT)))
  382. dentry = __fh_to_dentry(sb, cfh->parent_ino);
  383. return dentry;
  384. }
  385. static int __get_snap_name(struct dentry *parent, char *name,
  386. struct dentry *child)
  387. {
  388. struct inode *inode = d_inode(child);
  389. struct inode *dir = d_inode(parent);
  390. struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
  391. struct ceph_mds_request *req = NULL;
  392. char *last_name = NULL;
  393. unsigned next_offset = 2;
  394. int err = -EINVAL;
  395. if (ceph_ino(inode) != ceph_ino(dir))
  396. goto out;
  397. if (ceph_snap(inode) == CEPH_SNAPDIR) {
  398. if (ceph_snap(dir) == CEPH_NOSNAP) {
  399. strcpy(name, fsc->mount_options->snapdir_name);
  400. err = 0;
  401. }
  402. goto out;
  403. }
  404. if (ceph_snap(dir) != CEPH_SNAPDIR)
  405. goto out;
  406. while (1) {
  407. struct ceph_mds_reply_info_parsed *rinfo;
  408. struct ceph_mds_reply_dir_entry *rde;
  409. int i;
  410. req = ceph_mdsc_create_request(fsc->mdsc, CEPH_MDS_OP_LSSNAP,
  411. USE_AUTH_MDS);
  412. if (IS_ERR(req)) {
  413. err = PTR_ERR(req);
  414. req = NULL;
  415. goto out;
  416. }
  417. err = ceph_alloc_readdir_reply_buffer(req, inode);
  418. if (err)
  419. goto out;
  420. req->r_direct_mode = USE_AUTH_MDS;
  421. req->r_readdir_offset = next_offset;
  422. req->r_args.readdir.flags =
  423. cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
  424. if (last_name) {
  425. req->r_path2 = last_name;
  426. last_name = NULL;
  427. }
  428. req->r_inode = dir;
  429. ihold(dir);
  430. req->r_dentry = dget(parent);
  431. inode_lock(dir);
  432. err = ceph_mdsc_do_request(fsc->mdsc, NULL, req);
  433. inode_unlock(dir);
  434. if (err < 0)
  435. goto out;
  436. rinfo = &req->r_reply_info;
  437. for (i = 0; i < rinfo->dir_nr; i++) {
  438. rde = rinfo->dir_entries + i;
  439. BUG_ON(!rde->inode.in);
  440. if (ceph_snap(inode) ==
  441. le64_to_cpu(rde->inode.in->snapid)) {
  442. memcpy(name, rde->name, rde->name_len);
  443. name[rde->name_len] = '\0';
  444. err = 0;
  445. goto out;
  446. }
  447. }
  448. if (rinfo->dir_end)
  449. break;
  450. BUG_ON(rinfo->dir_nr <= 0);
  451. rde = rinfo->dir_entries + (rinfo->dir_nr - 1);
  452. next_offset += rinfo->dir_nr;
  453. last_name = kstrndup(rde->name, rde->name_len, GFP_KERNEL);
  454. if (!last_name) {
  455. err = -ENOMEM;
  456. goto out;
  457. }
  458. ceph_mdsc_put_request(req);
  459. req = NULL;
  460. }
  461. err = -ENOENT;
  462. out:
  463. if (req)
  464. ceph_mdsc_put_request(req);
  465. kfree(last_name);
  466. dout("get_snap_name %p ino %llx.%llx err=%d\n",
  467. child, ceph_vinop(inode), err);
  468. return err;
  469. }
  470. static int ceph_get_name(struct dentry *parent, char *name,
  471. struct dentry *child)
  472. {
  473. struct ceph_mds_client *mdsc;
  474. struct ceph_mds_request *req;
  475. struct inode *inode = d_inode(child);
  476. int err;
  477. if (ceph_snap(inode) != CEPH_NOSNAP)
  478. return __get_snap_name(parent, name, child);
  479. mdsc = ceph_inode_to_client(inode)->mdsc;
  480. req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
  481. USE_ANY_MDS);
  482. if (IS_ERR(req))
  483. return PTR_ERR(req);
  484. inode_lock(d_inode(parent));
  485. req->r_inode = inode;
  486. ihold(inode);
  487. req->r_ino2 = ceph_vino(d_inode(parent));
  488. req->r_parent = d_inode(parent);
  489. ihold(req->r_parent);
  490. set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
  491. req->r_num_caps = 2;
  492. err = ceph_mdsc_do_request(mdsc, NULL, req);
  493. inode_unlock(d_inode(parent));
  494. if (!err) {
  495. struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
  496. memcpy(name, rinfo->dname, rinfo->dname_len);
  497. name[rinfo->dname_len] = 0;
  498. dout("get_name %p ino %llx.%llx name %s\n",
  499. child, ceph_vinop(inode), name);
  500. } else {
  501. dout("get_name %p ino %llx.%llx err %d\n",
  502. child, ceph_vinop(inode), err);
  503. }
  504. ceph_mdsc_put_request(req);
  505. return err;
  506. }
  507. const struct export_operations ceph_export_ops = {
  508. .encode_fh = ceph_encode_fh,
  509. .fh_to_dentry = ceph_fh_to_dentry,
  510. .fh_to_parent = ceph_fh_to_parent,
  511. .get_parent = ceph_get_parent,
  512. .get_name = ceph_get_name,
  513. };