io-cmd-bdev.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * NVMe I/O command implementation.
  4. * Copyright (c) 2015-2016 HGST, a Western Digital Company.
  5. */
  6. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  7. #include <linux/blkdev.h>
  8. #include <linux/blk-integrity.h>
  9. #include <linux/memremap.h>
  10. #include <linux/module.h>
  11. #include "nvmet.h"
  12. void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
  13. {
  14. /* Logical blocks per physical block, 0's based. */
  15. const __le16 lpp0b = to0based(bdev_physical_block_size(bdev) /
  16. bdev_logical_block_size(bdev));
  17. /*
  18. * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN,
  19. * NAWUPF, and NACWU are defined for this namespace and should be
  20. * used by the host for this namespace instead of the AWUN, AWUPF,
  21. * and ACWU fields in the Identify Controller data structure. If
  22. * any of these fields are zero that means that the corresponding
  23. * field from the identify controller data structure should be used.
  24. */
  25. id->nsfeat |= 1 << 1;
  26. id->nawun = lpp0b;
  27. id->nawupf = lpp0b;
  28. id->nacwu = lpp0b;
  29. /*
  30. * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and
  31. * NOWS are defined for this namespace and should be used by
  32. * the host for I/O optimization.
  33. */
  34. id->nsfeat |= 1 << 4;
  35. /* NPWG = Namespace Preferred Write Granularity. 0's based */
  36. id->npwg = lpp0b;
  37. /* NPWA = Namespace Preferred Write Alignment. 0's based */
  38. id->npwa = id->npwg;
  39. /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */
  40. id->npdg = to0based(bdev_discard_granularity(bdev) /
  41. bdev_logical_block_size(bdev));
  42. /* NPDG = Namespace Preferred Deallocate Alignment */
  43. id->npda = id->npdg;
  44. /* NOWS = Namespace Optimal Write Size */
  45. id->nows = to0based(bdev_io_opt(bdev) / bdev_logical_block_size(bdev));
  46. }
  47. void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
  48. {
  49. if (ns->bdev) {
  50. blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
  51. ns->bdev = NULL;
  52. }
  53. }
  54. static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns)
  55. {
  56. struct blk_integrity *bi = bdev_get_integrity(ns->bdev);
  57. if (bi) {
  58. ns->metadata_size = bi->tuple_size;
  59. if (bi->profile == &t10_pi_type1_crc)
  60. ns->pi_type = NVME_NS_DPS_PI_TYPE1;
  61. else if (bi->profile == &t10_pi_type3_crc)
  62. ns->pi_type = NVME_NS_DPS_PI_TYPE3;
  63. else
  64. /* Unsupported metadata type */
  65. ns->metadata_size = 0;
  66. }
  67. }
  68. int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
  69. {
  70. int ret;
  71. /*
  72. * When buffered_io namespace attribute is enabled that means user want
  73. * this block device to be used as a file, so block device can take
  74. * an advantage of cache.
  75. */
  76. if (ns->buffered_io)
  77. return -ENOTBLK;
  78. ns->bdev = blkdev_get_by_path(ns->device_path,
  79. FMODE_READ | FMODE_WRITE, NULL);
  80. if (IS_ERR(ns->bdev)) {
  81. ret = PTR_ERR(ns->bdev);
  82. if (ret != -ENOTBLK) {
  83. pr_err("failed to open block device %s: (%ld)\n",
  84. ns->device_path, PTR_ERR(ns->bdev));
  85. }
  86. ns->bdev = NULL;
  87. return ret;
  88. }
  89. ns->size = bdev_nr_bytes(ns->bdev);
  90. ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
  91. ns->pi_type = 0;
  92. ns->metadata_size = 0;
  93. if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10))
  94. nvmet_bdev_ns_enable_integrity(ns);
  95. if (bdev_is_zoned(ns->bdev)) {
  96. if (!nvmet_bdev_zns_enable(ns)) {
  97. nvmet_bdev_ns_disable(ns);
  98. return -EINVAL;
  99. }
  100. ns->csi = NVME_CSI_ZNS;
  101. }
  102. return 0;
  103. }
  104. void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns)
  105. {
  106. ns->size = bdev_nr_bytes(ns->bdev);
  107. }
  108. u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
  109. {
  110. u16 status = NVME_SC_SUCCESS;
  111. if (likely(blk_sts == BLK_STS_OK))
  112. return status;
  113. /*
  114. * Right now there exists M : 1 mapping between block layer error
  115. * to the NVMe status code (see nvme_error_status()). For consistency,
  116. * when we reverse map we use most appropriate NVMe Status code from
  117. * the group of the NVMe staus codes used in the nvme_error_status().
  118. */
  119. switch (blk_sts) {
  120. case BLK_STS_NOSPC:
  121. status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
  122. req->error_loc = offsetof(struct nvme_rw_command, length);
  123. break;
  124. case BLK_STS_TARGET:
  125. status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
  126. req->error_loc = offsetof(struct nvme_rw_command, slba);
  127. break;
  128. case BLK_STS_NOTSUPP:
  129. req->error_loc = offsetof(struct nvme_common_command, opcode);
  130. switch (req->cmd->common.opcode) {
  131. case nvme_cmd_dsm:
  132. case nvme_cmd_write_zeroes:
  133. status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
  134. break;
  135. default:
  136. status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
  137. }
  138. break;
  139. case BLK_STS_MEDIUM:
  140. status = NVME_SC_ACCESS_DENIED;
  141. req->error_loc = offsetof(struct nvme_rw_command, nsid);
  142. break;
  143. case BLK_STS_IOERR:
  144. default:
  145. status = NVME_SC_INTERNAL | NVME_SC_DNR;
  146. req->error_loc = offsetof(struct nvme_common_command, opcode);
  147. }
  148. switch (req->cmd->common.opcode) {
  149. case nvme_cmd_read:
  150. case nvme_cmd_write:
  151. req->error_slba = le64_to_cpu(req->cmd->rw.slba);
  152. break;
  153. case nvme_cmd_write_zeroes:
  154. req->error_slba =
  155. le64_to_cpu(req->cmd->write_zeroes.slba);
  156. break;
  157. default:
  158. req->error_slba = 0;
  159. }
  160. return status;
  161. }
  162. static void nvmet_bio_done(struct bio *bio)
  163. {
  164. struct nvmet_req *req = bio->bi_private;
  165. nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status));
  166. nvmet_req_bio_put(req, bio);
  167. }
  168. #ifdef CONFIG_BLK_DEV_INTEGRITY
  169. static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
  170. struct sg_mapping_iter *miter)
  171. {
  172. struct blk_integrity *bi;
  173. struct bio_integrity_payload *bip;
  174. int rc;
  175. size_t resid, len;
  176. bi = bdev_get_integrity(req->ns->bdev);
  177. if (unlikely(!bi)) {
  178. pr_err("Unable to locate bio_integrity\n");
  179. return -ENODEV;
  180. }
  181. bip = bio_integrity_alloc(bio, GFP_NOIO,
  182. bio_max_segs(req->metadata_sg_cnt));
  183. if (IS_ERR(bip)) {
  184. pr_err("Unable to allocate bio_integrity_payload\n");
  185. return PTR_ERR(bip);
  186. }
  187. bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
  188. /* virtual start sector must be in integrity interval units */
  189. bip_set_seed(bip, bio->bi_iter.bi_sector >>
  190. (bi->interval_exp - SECTOR_SHIFT));
  191. resid = bip->bip_iter.bi_size;
  192. while (resid > 0 && sg_miter_next(miter)) {
  193. len = min_t(size_t, miter->length, resid);
  194. rc = bio_integrity_add_page(bio, miter->page, len,
  195. offset_in_page(miter->addr));
  196. if (unlikely(rc != len)) {
  197. pr_err("bio_integrity_add_page() failed; %d\n", rc);
  198. sg_miter_stop(miter);
  199. return -ENOMEM;
  200. }
  201. resid -= len;
  202. if (len < miter->length)
  203. miter->consumed -= miter->length - len;
  204. }
  205. sg_miter_stop(miter);
  206. return 0;
  207. }
  208. #else
  209. static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
  210. struct sg_mapping_iter *miter)
  211. {
  212. return -EINVAL;
  213. }
  214. #endif /* CONFIG_BLK_DEV_INTEGRITY */
  215. static void nvmet_bdev_execute_rw(struct nvmet_req *req)
  216. {
  217. unsigned int sg_cnt = req->sg_cnt;
  218. struct bio *bio;
  219. struct scatterlist *sg;
  220. struct blk_plug plug;
  221. sector_t sector;
  222. blk_opf_t opf;
  223. int i, rc;
  224. struct sg_mapping_iter prot_miter;
  225. unsigned int iter_flags;
  226. unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len;
  227. if (!nvmet_check_transfer_len(req, total_len))
  228. return;
  229. if (!req->sg_cnt) {
  230. nvmet_req_complete(req, 0);
  231. return;
  232. }
  233. if (req->cmd->rw.opcode == nvme_cmd_write) {
  234. opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
  235. if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
  236. opf |= REQ_FUA;
  237. iter_flags = SG_MITER_TO_SG;
  238. } else {
  239. opf = REQ_OP_READ;
  240. iter_flags = SG_MITER_FROM_SG;
  241. }
  242. if (is_pci_p2pdma_page(sg_page(req->sg)))
  243. opf |= REQ_NOMERGE;
  244. sector = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
  245. if (nvmet_use_inline_bvec(req)) {
  246. bio = &req->b.inline_bio;
  247. bio_init(bio, req->ns->bdev, req->inline_bvec,
  248. ARRAY_SIZE(req->inline_bvec), opf);
  249. } else {
  250. bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), opf,
  251. GFP_KERNEL);
  252. }
  253. bio->bi_iter.bi_sector = sector;
  254. bio->bi_private = req;
  255. bio->bi_end_io = nvmet_bio_done;
  256. blk_start_plug(&plug);
  257. if (req->metadata_len)
  258. sg_miter_start(&prot_miter, req->metadata_sg,
  259. req->metadata_sg_cnt, iter_flags);
  260. for_each_sg(req->sg, sg, req->sg_cnt, i) {
  261. while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
  262. != sg->length) {
  263. struct bio *prev = bio;
  264. if (req->metadata_len) {
  265. rc = nvmet_bdev_alloc_bip(req, bio,
  266. &prot_miter);
  267. if (unlikely(rc)) {
  268. bio_io_error(bio);
  269. return;
  270. }
  271. }
  272. bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt),
  273. opf, GFP_KERNEL);
  274. bio->bi_iter.bi_sector = sector;
  275. bio_chain(bio, prev);
  276. submit_bio(prev);
  277. }
  278. sector += sg->length >> 9;
  279. sg_cnt--;
  280. }
  281. if (req->metadata_len) {
  282. rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter);
  283. if (unlikely(rc)) {
  284. bio_io_error(bio);
  285. return;
  286. }
  287. }
  288. submit_bio(bio);
  289. blk_finish_plug(&plug);
  290. }
  291. static void nvmet_bdev_execute_flush(struct nvmet_req *req)
  292. {
  293. struct bio *bio = &req->b.inline_bio;
  294. if (!bdev_write_cache(req->ns->bdev)) {
  295. nvmet_req_complete(req, NVME_SC_SUCCESS);
  296. return;
  297. }
  298. if (!nvmet_check_transfer_len(req, 0))
  299. return;
  300. bio_init(bio, req->ns->bdev, req->inline_bvec,
  301. ARRAY_SIZE(req->inline_bvec), REQ_OP_WRITE | REQ_PREFLUSH);
  302. bio->bi_private = req;
  303. bio->bi_end_io = nvmet_bio_done;
  304. submit_bio(bio);
  305. }
  306. u16 nvmet_bdev_flush(struct nvmet_req *req)
  307. {
  308. if (!bdev_write_cache(req->ns->bdev))
  309. return 0;
  310. if (blkdev_issue_flush(req->ns->bdev))
  311. return NVME_SC_INTERNAL | NVME_SC_DNR;
  312. return 0;
  313. }
  314. static u16 nvmet_bdev_discard_range(struct nvmet_req *req,
  315. struct nvme_dsm_range *range, struct bio **bio)
  316. {
  317. struct nvmet_ns *ns = req->ns;
  318. int ret;
  319. ret = __blkdev_issue_discard(ns->bdev,
  320. nvmet_lba_to_sect(ns, range->slba),
  321. le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
  322. GFP_KERNEL, bio);
  323. if (ret && ret != -EOPNOTSUPP) {
  324. req->error_slba = le64_to_cpu(range->slba);
  325. return errno_to_nvme_status(req, ret);
  326. }
  327. return NVME_SC_SUCCESS;
  328. }
  329. static void nvmet_bdev_execute_discard(struct nvmet_req *req)
  330. {
  331. struct nvme_dsm_range range;
  332. struct bio *bio = NULL;
  333. int i;
  334. u16 status;
  335. for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
  336. status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
  337. sizeof(range));
  338. if (status)
  339. break;
  340. status = nvmet_bdev_discard_range(req, &range, &bio);
  341. if (status)
  342. break;
  343. }
  344. if (bio) {
  345. bio->bi_private = req;
  346. bio->bi_end_io = nvmet_bio_done;
  347. if (status)
  348. bio_io_error(bio);
  349. else
  350. submit_bio(bio);
  351. } else {
  352. nvmet_req_complete(req, status);
  353. }
  354. }
  355. static void nvmet_bdev_execute_dsm(struct nvmet_req *req)
  356. {
  357. if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req)))
  358. return;
  359. switch (le32_to_cpu(req->cmd->dsm.attributes)) {
  360. case NVME_DSMGMT_AD:
  361. nvmet_bdev_execute_discard(req);
  362. return;
  363. case NVME_DSMGMT_IDR:
  364. case NVME_DSMGMT_IDW:
  365. default:
  366. /* Not supported yet */
  367. nvmet_req_complete(req, 0);
  368. return;
  369. }
  370. }
  371. static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
  372. {
  373. struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
  374. struct bio *bio = NULL;
  375. sector_t sector;
  376. sector_t nr_sector;
  377. int ret;
  378. if (!nvmet_check_transfer_len(req, 0))
  379. return;
  380. sector = nvmet_lba_to_sect(req->ns, write_zeroes->slba);
  381. nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
  382. (req->ns->blksize_shift - 9));
  383. ret = __blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
  384. GFP_KERNEL, &bio, 0);
  385. if (bio) {
  386. bio->bi_private = req;
  387. bio->bi_end_io = nvmet_bio_done;
  388. submit_bio(bio);
  389. } else {
  390. nvmet_req_complete(req, errno_to_nvme_status(req, ret));
  391. }
  392. }
  393. u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
  394. {
  395. switch (req->cmd->common.opcode) {
  396. case nvme_cmd_read:
  397. case nvme_cmd_write:
  398. req->execute = nvmet_bdev_execute_rw;
  399. if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns))
  400. req->metadata_len = nvmet_rw_metadata_len(req);
  401. return 0;
  402. case nvme_cmd_flush:
  403. req->execute = nvmet_bdev_execute_flush;
  404. return 0;
  405. case nvme_cmd_dsm:
  406. req->execute = nvmet_bdev_execute_dsm;
  407. return 0;
  408. case nvme_cmd_write_zeroes:
  409. req->execute = nvmet_bdev_execute_write_zeroes;
  410. return 0;
  411. default:
  412. return nvmet_report_invalid_opcode(req);
  413. }
  414. }