dev.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (c) 2014-2016 Christoph Hellwig.
  4. */
  5. #include <linux/sunrpc/svc.h>
  6. #include <linux/blkdev.h>
  7. #include <linux/nfs4.h>
  8. #include <linux/nfs_fs.h>
  9. #include <linux/nfs_xdr.h>
  10. #include <linux/pr.h>
  11. #include "blocklayout.h"
  12. #define NFSDBG_FACILITY NFSDBG_PNFS_LD
  13. static void
  14. bl_free_device(struct pnfs_block_dev *dev)
  15. {
  16. if (dev->nr_children) {
  17. int i;
  18. for (i = 0; i < dev->nr_children; i++)
  19. bl_free_device(&dev->children[i]);
  20. kfree(dev->children);
  21. } else {
  22. if (dev->pr_registered) {
  23. const struct pr_ops *ops =
  24. dev->bdev->bd_disk->fops->pr_ops;
  25. int error;
  26. error = ops->pr_register(dev->bdev, dev->pr_key, 0,
  27. false);
  28. if (error)
  29. pr_err("failed to unregister PR key.\n");
  30. }
  31. if (dev->bdev)
  32. blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
  33. }
  34. }
  35. void
  36. bl_free_deviceid_node(struct nfs4_deviceid_node *d)
  37. {
  38. struct pnfs_block_dev *dev =
  39. container_of(d, struct pnfs_block_dev, node);
  40. bl_free_device(dev);
  41. kfree_rcu(dev, node.rcu);
  42. }
  43. static int
  44. nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
  45. {
  46. __be32 *p;
  47. int i;
  48. p = xdr_inline_decode(xdr, 4);
  49. if (!p)
  50. return -EIO;
  51. b->type = be32_to_cpup(p++);
  52. switch (b->type) {
  53. case PNFS_BLOCK_VOLUME_SIMPLE:
  54. p = xdr_inline_decode(xdr, 4);
  55. if (!p)
  56. return -EIO;
  57. b->simple.nr_sigs = be32_to_cpup(p++);
  58. if (!b->simple.nr_sigs || b->simple.nr_sigs > PNFS_BLOCK_MAX_UUIDS) {
  59. dprintk("Bad signature count: %d\n", b->simple.nr_sigs);
  60. return -EIO;
  61. }
  62. b->simple.len = 4 + 4;
  63. for (i = 0; i < b->simple.nr_sigs; i++) {
  64. p = xdr_inline_decode(xdr, 8 + 4);
  65. if (!p)
  66. return -EIO;
  67. p = xdr_decode_hyper(p, &b->simple.sigs[i].offset);
  68. b->simple.sigs[i].sig_len = be32_to_cpup(p++);
  69. if (b->simple.sigs[i].sig_len > PNFS_BLOCK_UUID_LEN) {
  70. pr_info("signature too long: %d\n",
  71. b->simple.sigs[i].sig_len);
  72. return -EIO;
  73. }
  74. p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len);
  75. if (!p)
  76. return -EIO;
  77. memcpy(&b->simple.sigs[i].sig, p,
  78. b->simple.sigs[i].sig_len);
  79. b->simple.len += 8 + 4 + \
  80. (XDR_QUADLEN(b->simple.sigs[i].sig_len) << 2);
  81. }
  82. break;
  83. case PNFS_BLOCK_VOLUME_SLICE:
  84. p = xdr_inline_decode(xdr, 8 + 8 + 4);
  85. if (!p)
  86. return -EIO;
  87. p = xdr_decode_hyper(p, &b->slice.start);
  88. p = xdr_decode_hyper(p, &b->slice.len);
  89. b->slice.volume = be32_to_cpup(p++);
  90. break;
  91. case PNFS_BLOCK_VOLUME_CONCAT:
  92. p = xdr_inline_decode(xdr, 4);
  93. if (!p)
  94. return -EIO;
  95. b->concat.volumes_count = be32_to_cpup(p++);
  96. if (b->concat.volumes_count > PNFS_BLOCK_MAX_DEVICES) {
  97. dprintk("Too many volumes: %d\n", b->concat.volumes_count);
  98. return -EIO;
  99. }
  100. p = xdr_inline_decode(xdr, b->concat.volumes_count * 4);
  101. if (!p)
  102. return -EIO;
  103. for (i = 0; i < b->concat.volumes_count; i++)
  104. b->concat.volumes[i] = be32_to_cpup(p++);
  105. break;
  106. case PNFS_BLOCK_VOLUME_STRIPE:
  107. p = xdr_inline_decode(xdr, 8 + 4);
  108. if (!p)
  109. return -EIO;
  110. p = xdr_decode_hyper(p, &b->stripe.chunk_size);
  111. b->stripe.volumes_count = be32_to_cpup(p++);
  112. if (b->stripe.volumes_count > PNFS_BLOCK_MAX_DEVICES) {
  113. dprintk("Too many volumes: %d\n", b->stripe.volumes_count);
  114. return -EIO;
  115. }
  116. p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4);
  117. if (!p)
  118. return -EIO;
  119. for (i = 0; i < b->stripe.volumes_count; i++)
  120. b->stripe.volumes[i] = be32_to_cpup(p++);
  121. break;
  122. case PNFS_BLOCK_VOLUME_SCSI:
  123. p = xdr_inline_decode(xdr, 4 + 4 + 4);
  124. if (!p)
  125. return -EIO;
  126. b->scsi.code_set = be32_to_cpup(p++);
  127. b->scsi.designator_type = be32_to_cpup(p++);
  128. b->scsi.designator_len = be32_to_cpup(p++);
  129. p = xdr_inline_decode(xdr, b->scsi.designator_len);
  130. if (!p)
  131. return -EIO;
  132. if (b->scsi.designator_len > 256)
  133. return -EIO;
  134. memcpy(&b->scsi.designator, p, b->scsi.designator_len);
  135. p = xdr_inline_decode(xdr, 8);
  136. if (!p)
  137. return -EIO;
  138. p = xdr_decode_hyper(p, &b->scsi.pr_key);
  139. break;
  140. default:
  141. dprintk("unknown volume type!\n");
  142. return -EIO;
  143. }
  144. return 0;
  145. }
  146. static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset,
  147. struct pnfs_block_dev_map *map)
  148. {
  149. map->start = dev->start;
  150. map->len = dev->len;
  151. map->disk_offset = dev->disk_offset;
  152. map->bdev = dev->bdev;
  153. return true;
  154. }
  155. static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset,
  156. struct pnfs_block_dev_map *map)
  157. {
  158. int i;
  159. for (i = 0; i < dev->nr_children; i++) {
  160. struct pnfs_block_dev *child = &dev->children[i];
  161. if (child->start > offset ||
  162. child->start + child->len <= offset)
  163. continue;
  164. child->map(child, offset - child->start, map);
  165. return true;
  166. }
  167. dprintk("%s: ran off loop!\n", __func__);
  168. return false;
  169. }
  170. static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
  171. struct pnfs_block_dev_map *map)
  172. {
  173. struct pnfs_block_dev *child;
  174. u64 chunk;
  175. u32 chunk_idx;
  176. u64 disk_offset;
  177. chunk = div_u64(offset, dev->chunk_size);
  178. div_u64_rem(chunk, dev->nr_children, &chunk_idx);
  179. if (chunk_idx >= dev->nr_children) {
  180. dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
  181. __func__, chunk_idx, offset, dev->chunk_size);
  182. /* error, should not happen */
  183. return false;
  184. }
  185. /* truncate offset to the beginning of the stripe */
  186. offset = chunk * dev->chunk_size;
  187. /* disk offset of the stripe */
  188. disk_offset = div_u64(offset, dev->nr_children);
  189. child = &dev->children[chunk_idx];
  190. child->map(child, disk_offset, map);
  191. map->start += offset;
  192. map->disk_offset += disk_offset;
  193. map->len = dev->chunk_size;
  194. return true;
  195. }
  196. static int
  197. bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
  198. struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask);
  199. static int
  200. bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
  201. struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
  202. {
  203. struct pnfs_block_volume *v = &volumes[idx];
  204. struct block_device *bdev;
  205. dev_t dev;
  206. dev = bl_resolve_deviceid(server, v, gfp_mask);
  207. if (!dev)
  208. return -EIO;
  209. bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL);
  210. if (IS_ERR(bdev)) {
  211. printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
  212. MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
  213. return PTR_ERR(bdev);
  214. }
  215. d->bdev = bdev;
  216. d->len = bdev_nr_bytes(d->bdev);
  217. d->map = bl_map_simple;
  218. printk(KERN_INFO "pNFS: using block device %s\n",
  219. d->bdev->bd_disk->disk_name);
  220. return 0;
  221. }
  222. static bool
  223. bl_validate_designator(struct pnfs_block_volume *v)
  224. {
  225. switch (v->scsi.designator_type) {
  226. case PS_DESIGNATOR_EUI64:
  227. if (v->scsi.code_set != PS_CODE_SET_BINARY)
  228. return false;
  229. if (v->scsi.designator_len != 8 &&
  230. v->scsi.designator_len != 10 &&
  231. v->scsi.designator_len != 16)
  232. return false;
  233. return true;
  234. case PS_DESIGNATOR_NAA:
  235. if (v->scsi.code_set != PS_CODE_SET_BINARY)
  236. return false;
  237. if (v->scsi.designator_len != 8 &&
  238. v->scsi.designator_len != 16)
  239. return false;
  240. return true;
  241. case PS_DESIGNATOR_T10:
  242. case PS_DESIGNATOR_NAME:
  243. pr_err("pNFS: unsupported designator "
  244. "(code set %d, type %d, len %d.\n",
  245. v->scsi.code_set,
  246. v->scsi.designator_type,
  247. v->scsi.designator_len);
  248. return false;
  249. default:
  250. pr_err("pNFS: invalid designator "
  251. "(code set %d, type %d, len %d.\n",
  252. v->scsi.code_set,
  253. v->scsi.designator_type,
  254. v->scsi.designator_len);
  255. return false;
  256. }
  257. }
  258. static struct block_device *
  259. bl_open_path(struct pnfs_block_volume *v, const char *prefix)
  260. {
  261. struct block_device *bdev;
  262. const char *devname;
  263. devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/%s%*phN",
  264. prefix, v->scsi.designator_len, v->scsi.designator);
  265. if (!devname)
  266. return ERR_PTR(-ENOMEM);
  267. bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL);
  268. if (IS_ERR(bdev)) {
  269. pr_warn("pNFS: failed to open device %s (%ld)\n",
  270. devname, PTR_ERR(bdev));
  271. }
  272. kfree(devname);
  273. return bdev;
  274. }
  275. static int
  276. bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
  277. struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
  278. {
  279. struct pnfs_block_volume *v = &volumes[idx];
  280. struct block_device *bdev;
  281. const struct pr_ops *ops;
  282. int error;
  283. if (!bl_validate_designator(v))
  284. return -EINVAL;
  285. /*
  286. * Try to open the RH/Fedora specific dm-mpath udev path first, as the
  287. * wwn- links will only point to the first discovered SCSI device there.
  288. * On other distributions like Debian, the default SCSI by-id path will
  289. * point to the dm-multipath device if one exists.
  290. */
  291. bdev = bl_open_path(v, "dm-uuid-mpath-0x");
  292. if (IS_ERR(bdev))
  293. bdev = bl_open_path(v, "wwn-0x");
  294. if (IS_ERR(bdev))
  295. return PTR_ERR(bdev);
  296. d->bdev = bdev;
  297. d->len = bdev_nr_bytes(d->bdev);
  298. d->map = bl_map_simple;
  299. d->pr_key = v->scsi.pr_key;
  300. pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
  301. d->bdev->bd_disk->disk_name, d->pr_key);
  302. ops = d->bdev->bd_disk->fops->pr_ops;
  303. if (!ops) {
  304. pr_err("pNFS: block device %s does not support reservations.",
  305. d->bdev->bd_disk->disk_name);
  306. error = -EINVAL;
  307. goto out_blkdev_put;
  308. }
  309. error = ops->pr_register(d->bdev, 0, d->pr_key, true);
  310. if (error) {
  311. pr_err("pNFS: failed to register key for block device %s.",
  312. d->bdev->bd_disk->disk_name);
  313. goto out_blkdev_put;
  314. }
  315. d->pr_registered = true;
  316. return 0;
  317. out_blkdev_put:
  318. blkdev_put(d->bdev, FMODE_READ | FMODE_WRITE);
  319. return error;
  320. }
  321. static int
  322. bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
  323. struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
  324. {
  325. struct pnfs_block_volume *v = &volumes[idx];
  326. int ret;
  327. ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask);
  328. if (ret)
  329. return ret;
  330. d->disk_offset = v->slice.start;
  331. d->len = v->slice.len;
  332. return 0;
  333. }
  334. static int
  335. bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
  336. struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
  337. {
  338. struct pnfs_block_volume *v = &volumes[idx];
  339. u64 len = 0;
  340. int ret, i;
  341. d->children = kcalloc(v->concat.volumes_count,
  342. sizeof(struct pnfs_block_dev), gfp_mask);
  343. if (!d->children)
  344. return -ENOMEM;
  345. for (i = 0; i < v->concat.volumes_count; i++) {
  346. ret = bl_parse_deviceid(server, &d->children[i],
  347. volumes, v->concat.volumes[i], gfp_mask);
  348. if (ret)
  349. return ret;
  350. d->nr_children++;
  351. d->children[i].start += len;
  352. len += d->children[i].len;
  353. }
  354. d->len = len;
  355. d->map = bl_map_concat;
  356. return 0;
  357. }
  358. static int
  359. bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
  360. struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
  361. {
  362. struct pnfs_block_volume *v = &volumes[idx];
  363. u64 len = 0;
  364. int ret, i;
  365. d->children = kcalloc(v->stripe.volumes_count,
  366. sizeof(struct pnfs_block_dev), gfp_mask);
  367. if (!d->children)
  368. return -ENOMEM;
  369. for (i = 0; i < v->stripe.volumes_count; i++) {
  370. ret = bl_parse_deviceid(server, &d->children[i],
  371. volumes, v->stripe.volumes[i], gfp_mask);
  372. if (ret)
  373. return ret;
  374. d->nr_children++;
  375. len += d->children[i].len;
  376. }
  377. d->len = len;
  378. d->chunk_size = v->stripe.chunk_size;
  379. d->map = bl_map_stripe;
  380. return 0;
  381. }
  382. static int
  383. bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
  384. struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
  385. {
  386. switch (volumes[idx].type) {
  387. case PNFS_BLOCK_VOLUME_SIMPLE:
  388. return bl_parse_simple(server, d, volumes, idx, gfp_mask);
  389. case PNFS_BLOCK_VOLUME_SLICE:
  390. return bl_parse_slice(server, d, volumes, idx, gfp_mask);
  391. case PNFS_BLOCK_VOLUME_CONCAT:
  392. return bl_parse_concat(server, d, volumes, idx, gfp_mask);
  393. case PNFS_BLOCK_VOLUME_STRIPE:
  394. return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
  395. case PNFS_BLOCK_VOLUME_SCSI:
  396. return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
  397. default:
  398. dprintk("unsupported volume type: %d\n", volumes[idx].type);
  399. return -EIO;
  400. }
  401. }
  402. struct nfs4_deviceid_node *
  403. bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
  404. gfp_t gfp_mask)
  405. {
  406. struct nfs4_deviceid_node *node = NULL;
  407. struct pnfs_block_volume *volumes;
  408. struct pnfs_block_dev *top;
  409. struct xdr_stream xdr;
  410. struct xdr_buf buf;
  411. struct page *scratch;
  412. int nr_volumes, ret, i;
  413. __be32 *p;
  414. scratch = alloc_page(gfp_mask);
  415. if (!scratch)
  416. goto out;
  417. xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen);
  418. xdr_set_scratch_page(&xdr, scratch);
  419. p = xdr_inline_decode(&xdr, sizeof(__be32));
  420. if (!p)
  421. goto out_free_scratch;
  422. nr_volumes = be32_to_cpup(p++);
  423. volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume),
  424. gfp_mask);
  425. if (!volumes)
  426. goto out_free_scratch;
  427. for (i = 0; i < nr_volumes; i++) {
  428. ret = nfs4_block_decode_volume(&xdr, &volumes[i]);
  429. if (ret < 0)
  430. goto out_free_volumes;
  431. }
  432. top = kzalloc(sizeof(*top), gfp_mask);
  433. if (!top)
  434. goto out_free_volumes;
  435. ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask);
  436. node = &top->node;
  437. nfs4_init_deviceid_node(node, server, &pdev->dev_id);
  438. if (ret)
  439. nfs4_mark_deviceid_unavailable(node);
  440. out_free_volumes:
  441. kfree(volumes);
  442. out_free_scratch:
  443. __free_page(scratch);
  444. out:
  445. return node;
  446. }