io-cmd-file.c 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * NVMe Over Fabrics Target File I/O commands implementation.
  4. * Copyright (c) 2017-2018 Western Digital Corporation or its
  5. * affiliates.
  6. */
  7. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  8. #include <linux/uio.h>
  9. #include <linux/falloc.h>
  10. #include <linux/file.h>
  11. #include <linux/fs.h>
  12. #include "nvmet.h"
  13. #define NVMET_MIN_MPOOL_OBJ 16
  14. void nvmet_file_ns_revalidate(struct nvmet_ns *ns)
  15. {
  16. ns->size = i_size_read(ns->file->f_mapping->host);
  17. }
  18. void nvmet_file_ns_disable(struct nvmet_ns *ns)
  19. {
  20. if (ns->file) {
  21. if (ns->buffered_io)
  22. flush_workqueue(buffered_io_wq);
  23. mempool_destroy(ns->bvec_pool);
  24. ns->bvec_pool = NULL;
  25. fput(ns->file);
  26. ns->file = NULL;
  27. }
  28. }
  29. int nvmet_file_ns_enable(struct nvmet_ns *ns)
  30. {
  31. int flags = O_RDWR | O_LARGEFILE;
  32. int ret = 0;
  33. if (!ns->buffered_io)
  34. flags |= O_DIRECT;
  35. ns->file = filp_open(ns->device_path, flags, 0);
  36. if (IS_ERR(ns->file)) {
  37. ret = PTR_ERR(ns->file);
  38. pr_err("failed to open file %s: (%d)\n",
  39. ns->device_path, ret);
  40. ns->file = NULL;
  41. return ret;
  42. }
  43. nvmet_file_ns_revalidate(ns);
  44. /*
  45. * i_blkbits can be greater than the universally accepted upper bound,
  46. * so make sure we export a sane namespace lba_shift.
  47. */
  48. ns->blksize_shift = min_t(u8,
  49. file_inode(ns->file)->i_blkbits, 12);
  50. ns->bvec_pool = mempool_create(NVMET_MIN_MPOOL_OBJ, mempool_alloc_slab,
  51. mempool_free_slab, nvmet_bvec_cache);
  52. if (!ns->bvec_pool) {
  53. ret = -ENOMEM;
  54. goto err;
  55. }
  56. return ret;
  57. err:
  58. fput(ns->file);
  59. ns->file = NULL;
  60. ns->size = 0;
  61. ns->blksize_shift = 0;
  62. return ret;
  63. }
  64. static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
  65. unsigned long nr_segs, size_t count, int ki_flags)
  66. {
  67. struct kiocb *iocb = &req->f.iocb;
  68. ssize_t (*call_iter)(struct kiocb *iocb, struct iov_iter *iter);
  69. struct iov_iter iter;
  70. int rw;
  71. if (req->cmd->rw.opcode == nvme_cmd_write) {
  72. if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
  73. ki_flags |= IOCB_DSYNC;
  74. call_iter = req->ns->file->f_op->write_iter;
  75. rw = ITER_SOURCE;
  76. } else {
  77. call_iter = req->ns->file->f_op->read_iter;
  78. rw = ITER_DEST;
  79. }
  80. iov_iter_bvec(&iter, rw, req->f.bvec, nr_segs, count);
  81. iocb->ki_pos = pos;
  82. iocb->ki_filp = req->ns->file;
  83. iocb->ki_flags = ki_flags | iocb->ki_filp->f_iocb_flags;
  84. return call_iter(iocb, &iter);
  85. }
  86. static void nvmet_file_io_done(struct kiocb *iocb, long ret)
  87. {
  88. struct nvmet_req *req = container_of(iocb, struct nvmet_req, f.iocb);
  89. u16 status = NVME_SC_SUCCESS;
  90. if (req->f.bvec != req->inline_bvec) {
  91. if (likely(req->f.mpool_alloc == false))
  92. kfree(req->f.bvec);
  93. else
  94. mempool_free(req->f.bvec, req->ns->bvec_pool);
  95. }
  96. if (unlikely(ret != req->transfer_len))
  97. status = errno_to_nvme_status(req, ret);
  98. nvmet_req_complete(req, status);
  99. }
  100. static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags)
  101. {
  102. ssize_t nr_bvec = req->sg_cnt;
  103. unsigned long bv_cnt = 0;
  104. bool is_sync = false;
  105. size_t len = 0, total_len = 0;
  106. ssize_t ret = 0;
  107. loff_t pos;
  108. int i;
  109. struct scatterlist *sg;
  110. if (req->f.mpool_alloc && nr_bvec > NVMET_MAX_MPOOL_BVEC)
  111. is_sync = true;
  112. pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift;
  113. if (unlikely(pos + req->transfer_len > req->ns->size)) {
  114. nvmet_req_complete(req, errno_to_nvme_status(req, -ENOSPC));
  115. return true;
  116. }
  117. memset(&req->f.iocb, 0, sizeof(struct kiocb));
  118. for_each_sg(req->sg, sg, req->sg_cnt, i) {
  119. bvec_set_page(&req->f.bvec[bv_cnt], sg_page(sg), sg->length,
  120. sg->offset);
  121. len += req->f.bvec[bv_cnt].bv_len;
  122. total_len += req->f.bvec[bv_cnt].bv_len;
  123. bv_cnt++;
  124. WARN_ON_ONCE((nr_bvec - 1) < 0);
  125. if (unlikely(is_sync) &&
  126. (nr_bvec - 1 == 0 || bv_cnt == NVMET_MAX_MPOOL_BVEC)) {
  127. ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len, 0);
  128. if (ret < 0)
  129. goto complete;
  130. pos += len;
  131. bv_cnt = 0;
  132. len = 0;
  133. }
  134. nr_bvec--;
  135. }
  136. if (WARN_ON_ONCE(total_len != req->transfer_len)) {
  137. ret = -EIO;
  138. goto complete;
  139. }
  140. if (unlikely(is_sync)) {
  141. ret = total_len;
  142. goto complete;
  143. }
  144. /*
  145. * A NULL ki_complete ask for synchronous execution, which we want
  146. * for the IOCB_NOWAIT case.
  147. */
  148. if (!(ki_flags & IOCB_NOWAIT))
  149. req->f.iocb.ki_complete = nvmet_file_io_done;
  150. ret = nvmet_file_submit_bvec(req, pos, bv_cnt, total_len, ki_flags);
  151. switch (ret) {
  152. case -EIOCBQUEUED:
  153. return true;
  154. case -EAGAIN:
  155. if (WARN_ON_ONCE(!(ki_flags & IOCB_NOWAIT)))
  156. goto complete;
  157. return false;
  158. case -EOPNOTSUPP:
  159. /*
  160. * For file systems returning error -EOPNOTSUPP, handle
  161. * IOCB_NOWAIT error case separately and retry without
  162. * IOCB_NOWAIT.
  163. */
  164. if ((ki_flags & IOCB_NOWAIT))
  165. return false;
  166. break;
  167. }
  168. complete:
  169. nvmet_file_io_done(&req->f.iocb, ret);
  170. return true;
  171. }
  172. static void nvmet_file_buffered_io_work(struct work_struct *w)
  173. {
  174. struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
  175. nvmet_file_execute_io(req, 0);
  176. }
  177. static void nvmet_file_submit_buffered_io(struct nvmet_req *req)
  178. {
  179. INIT_WORK(&req->f.work, nvmet_file_buffered_io_work);
  180. queue_work(buffered_io_wq, &req->f.work);
  181. }
  182. static void nvmet_file_execute_rw(struct nvmet_req *req)
  183. {
  184. ssize_t nr_bvec = req->sg_cnt;
  185. if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req)))
  186. return;
  187. if (!req->sg_cnt || !nr_bvec) {
  188. nvmet_req_complete(req, 0);
  189. return;
  190. }
  191. if (nr_bvec > NVMET_MAX_INLINE_BIOVEC)
  192. req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
  193. GFP_KERNEL);
  194. else
  195. req->f.bvec = req->inline_bvec;
  196. if (unlikely(!req->f.bvec)) {
  197. /* fallback under memory pressure */
  198. req->f.bvec = mempool_alloc(req->ns->bvec_pool, GFP_KERNEL);
  199. req->f.mpool_alloc = true;
  200. } else
  201. req->f.mpool_alloc = false;
  202. if (req->ns->buffered_io) {
  203. if (likely(!req->f.mpool_alloc) &&
  204. (req->ns->file->f_mode & FMODE_NOWAIT) &&
  205. nvmet_file_execute_io(req, IOCB_NOWAIT))
  206. return;
  207. nvmet_file_submit_buffered_io(req);
  208. } else
  209. nvmet_file_execute_io(req, 0);
  210. }
  211. u16 nvmet_file_flush(struct nvmet_req *req)
  212. {
  213. return errno_to_nvme_status(req, vfs_fsync(req->ns->file, 1));
  214. }
  215. static void nvmet_file_flush_work(struct work_struct *w)
  216. {
  217. struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
  218. nvmet_req_complete(req, nvmet_file_flush(req));
  219. }
  220. static void nvmet_file_execute_flush(struct nvmet_req *req)
  221. {
  222. if (!nvmet_check_transfer_len(req, 0))
  223. return;
  224. INIT_WORK(&req->f.work, nvmet_file_flush_work);
  225. queue_work(nvmet_wq, &req->f.work);
  226. }
  227. static void nvmet_file_execute_discard(struct nvmet_req *req)
  228. {
  229. int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
  230. struct nvme_dsm_range range;
  231. loff_t offset, len;
  232. u16 status = 0;
  233. int ret;
  234. int i;
  235. for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
  236. status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
  237. sizeof(range));
  238. if (status)
  239. break;
  240. offset = le64_to_cpu(range.slba) << req->ns->blksize_shift;
  241. len = le32_to_cpu(range.nlb);
  242. len <<= req->ns->blksize_shift;
  243. if (offset + len > req->ns->size) {
  244. req->error_slba = le64_to_cpu(range.slba);
  245. status = errno_to_nvme_status(req, -ENOSPC);
  246. break;
  247. }
  248. ret = vfs_fallocate(req->ns->file, mode, offset, len);
  249. if (ret && ret != -EOPNOTSUPP) {
  250. req->error_slba = le64_to_cpu(range.slba);
  251. status = errno_to_nvme_status(req, ret);
  252. break;
  253. }
  254. }
  255. nvmet_req_complete(req, status);
  256. }
  257. static void nvmet_file_dsm_work(struct work_struct *w)
  258. {
  259. struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
  260. switch (le32_to_cpu(req->cmd->dsm.attributes)) {
  261. case NVME_DSMGMT_AD:
  262. nvmet_file_execute_discard(req);
  263. return;
  264. case NVME_DSMGMT_IDR:
  265. case NVME_DSMGMT_IDW:
  266. default:
  267. /* Not supported yet */
  268. nvmet_req_complete(req, 0);
  269. return;
  270. }
  271. }
  272. static void nvmet_file_execute_dsm(struct nvmet_req *req)
  273. {
  274. if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req)))
  275. return;
  276. INIT_WORK(&req->f.work, nvmet_file_dsm_work);
  277. queue_work(nvmet_wq, &req->f.work);
  278. }
  279. static void nvmet_file_write_zeroes_work(struct work_struct *w)
  280. {
  281. struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
  282. struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
  283. int mode = FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE;
  284. loff_t offset;
  285. loff_t len;
  286. int ret;
  287. offset = le64_to_cpu(write_zeroes->slba) << req->ns->blksize_shift;
  288. len = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
  289. req->ns->blksize_shift);
  290. if (unlikely(offset + len > req->ns->size)) {
  291. nvmet_req_complete(req, errno_to_nvme_status(req, -ENOSPC));
  292. return;
  293. }
  294. ret = vfs_fallocate(req->ns->file, mode, offset, len);
  295. nvmet_req_complete(req, ret < 0 ? errno_to_nvme_status(req, ret) : 0);
  296. }
  297. static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
  298. {
  299. if (!nvmet_check_transfer_len(req, 0))
  300. return;
  301. INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work);
  302. queue_work(nvmet_wq, &req->f.work);
  303. }
  304. u16 nvmet_file_parse_io_cmd(struct nvmet_req *req)
  305. {
  306. switch (req->cmd->common.opcode) {
  307. case nvme_cmd_read:
  308. case nvme_cmd_write:
  309. req->execute = nvmet_file_execute_rw;
  310. return 0;
  311. case nvme_cmd_flush:
  312. req->execute = nvmet_file_execute_flush;
  313. return 0;
  314. case nvme_cmd_dsm:
  315. req->execute = nvmet_file_execute_dsm;
  316. return 0;
  317. case nvme_cmd_write_zeroes:
  318. req->execute = nvmet_file_execute_write_zeroes;
  319. return 0;
  320. default:
  321. return nvmet_report_invalid_opcode(req);
  322. }
  323. }