Merge branch 'linus/master' into rdma.git for-next

rdma.git merge resolution for the 4.19 merge window

Conflicts:
 drivers/infiniband/core/rdma_core.c
   - Use the rdma code and revise with the new spelling for
     atomic_fetch_add_unless
 drivers/nvme/host/rdma.c
   - Replace max_sge with max_send_sge in new blk code
 drivers/nvme/target/rdma.c
   - Use the blk code and revise to use NULL for ib_post_recv when
     appropriate
   - Replace max_sge with max_recv_sge in new blk code
 net/rds/ib_send.c
   - Use the net code and revise to use NULL for ib_post_recv when
     appropriate

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
Jason Gunthorpe
2018-08-16 14:13:03 -06:00
7051 changed files with 339696 additions and 129229 deletions

View File

@@ -33,16 +33,17 @@
#include "nvmet.h"
/*
* We allow up to a page of inline data to go with the SQE
* We allow at least 1 page, up to 4 SGEs, and up to 16KB of inline data
*/
#define NVMET_RDMA_INLINE_DATA_SIZE PAGE_SIZE
#define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE PAGE_SIZE
#define NVMET_RDMA_MAX_INLINE_SGE 4
#define NVMET_RDMA_MAX_INLINE_DATA_SIZE max_t(int, SZ_16K, PAGE_SIZE)
struct nvmet_rdma_cmd {
struct ib_sge sge[2];
struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1];
struct ib_cqe cqe;
struct ib_recv_wr wr;
struct scatterlist inline_sg;
struct page *inline_page;
struct scatterlist inline_sg[NVMET_RDMA_MAX_INLINE_SGE];
struct nvme_command *nvme_cmd;
struct nvmet_rdma_queue *queue;
};
@@ -116,6 +117,8 @@ struct nvmet_rdma_device {
size_t srq_size;
struct kref ref;
struct list_head entry;
int inline_data_size;
int inline_page_count;
};
static bool nvmet_rdma_use_srq;
@@ -138,6 +141,11 @@ static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
static const struct nvmet_fabrics_ops nvmet_rdma_ops;
static int num_pages(int len)
{
return 1 + (((len - 1) & PAGE_MASK) >> PAGE_SHIFT);
}
/* XXX: really should move to a generic header sooner or later.. */
static inline u32 get_unaligned_le24(const u8 *p)
{
@@ -184,6 +192,71 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags);
}
static void nvmet_rdma_free_inline_pages(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_cmd *c)
{
struct scatterlist *sg;
struct ib_sge *sge;
int i;
if (!ndev->inline_data_size)
return;
sg = c->inline_sg;
sge = &c->sge[1];
for (i = 0; i < ndev->inline_page_count; i++, sg++, sge++) {
if (sge->length)
ib_dma_unmap_page(ndev->device, sge->addr,
sge->length, DMA_FROM_DEVICE);
if (sg_page(sg))
__free_page(sg_page(sg));
}
}
static int nvmet_rdma_alloc_inline_pages(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_cmd *c)
{
struct scatterlist *sg;
struct ib_sge *sge;
struct page *pg;
int len;
int i;
if (!ndev->inline_data_size)
return 0;
sg = c->inline_sg;
sg_init_table(sg, ndev->inline_page_count);
sge = &c->sge[1];
len = ndev->inline_data_size;
for (i = 0; i < ndev->inline_page_count; i++, sg++, sge++) {
pg = alloc_page(GFP_KERNEL);
if (!pg)
goto out_err;
sg_assign_page(sg, pg);
sge->addr = ib_dma_map_page(ndev->device,
pg, 0, PAGE_SIZE, DMA_FROM_DEVICE);
if (ib_dma_mapping_error(ndev->device, sge->addr))
goto out_err;
sge->length = min_t(int, len, PAGE_SIZE);
sge->lkey = ndev->pd->local_dma_lkey;
len -= sge->length;
}
return 0;
out_err:
for (; i >= 0; i--, sg--, sge--) {
if (sge->length)
ib_dma_unmap_page(ndev->device, sge->addr,
sge->length, DMA_FROM_DEVICE);
if (sg_page(sg))
__free_page(sg_page(sg));
}
return -ENOMEM;
}
static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_cmd *c, bool admin)
{
@@ -200,33 +273,17 @@ static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
c->sge[0].length = sizeof(*c->nvme_cmd);
c->sge[0].lkey = ndev->pd->local_dma_lkey;
if (!admin) {
c->inline_page = alloc_pages(GFP_KERNEL,
get_order(NVMET_RDMA_INLINE_DATA_SIZE));
if (!c->inline_page)
goto out_unmap_cmd;
c->sge[1].addr = ib_dma_map_page(ndev->device,
c->inline_page, 0, NVMET_RDMA_INLINE_DATA_SIZE,
DMA_FROM_DEVICE);
if (ib_dma_mapping_error(ndev->device, c->sge[1].addr))
goto out_free_inline_page;
c->sge[1].length = NVMET_RDMA_INLINE_DATA_SIZE;
c->sge[1].lkey = ndev->pd->local_dma_lkey;
}
if (!admin && nvmet_rdma_alloc_inline_pages(ndev, c))
goto out_unmap_cmd;
c->cqe.done = nvmet_rdma_recv_done;
c->wr.wr_cqe = &c->cqe;
c->wr.sg_list = c->sge;
c->wr.num_sge = admin ? 1 : 2;
c->wr.num_sge = admin ? 1 : ndev->inline_page_count + 1;
return 0;
out_free_inline_page:
if (!admin) {
__free_pages(c->inline_page,
get_order(NVMET_RDMA_INLINE_DATA_SIZE));
}
out_unmap_cmd:
ib_dma_unmap_single(ndev->device, c->sge[0].addr,
sizeof(*c->nvme_cmd), DMA_FROM_DEVICE);
@@ -240,12 +297,8 @@ out:
static void nvmet_rdma_free_cmd(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_cmd *c, bool admin)
{
if (!admin) {
ib_dma_unmap_page(ndev->device, c->sge[1].addr,
NVMET_RDMA_INLINE_DATA_SIZE, DMA_FROM_DEVICE);
__free_pages(c->inline_page,
get_order(NVMET_RDMA_INLINE_DATA_SIZE));
}
if (!admin)
nvmet_rdma_free_inline_pages(ndev, c);
ib_dma_unmap_single(ndev->device, c->sge[0].addr,
sizeof(*c->nvme_cmd), DMA_FROM_DEVICE);
kfree(c->nvme_cmd);
@@ -382,13 +435,21 @@ static void nvmet_rdma_free_rsps(struct nvmet_rdma_queue *queue)
static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_cmd *cmd)
{
int ret;
ib_dma_sync_single_for_device(ndev->device,
cmd->sge[0].addr, cmd->sge[0].length,
DMA_FROM_DEVICE);
if (ndev->srq)
return ib_post_srq_recv(ndev->srq, &cmd->wr, NULL);
return ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, NULL);
ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL);
else
ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, NULL);
if (unlikely(ret))
pr_err("post_recv cmd failed\n");
return ret;
}
static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue)
@@ -427,7 +488,7 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
}
if (rsp->req.sg != &rsp->cmd->inline_sg)
if (rsp->req.sg != rsp->cmd->inline_sg)
sgl_free(rsp->req.sg);
if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list)))
@@ -491,7 +552,7 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
rsp->send_sge.addr, rsp->send_sge.length,
DMA_TO_DEVICE);
if (ib_post_send(cm_id->qp, first_wr, NULL)) {
if (unlikely(ib_post_send(cm_id->qp, first_wr, NULL))) {
pr_err("sending cmd response failed\n");
nvmet_rdma_release_rsp(rsp);
}
@@ -527,10 +588,25 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len,
u64 off)
{
sg_init_table(&rsp->cmd->inline_sg, 1);
sg_set_page(&rsp->cmd->inline_sg, rsp->cmd->inline_page, len, off);
rsp->req.sg = &rsp->cmd->inline_sg;
rsp->req.sg_cnt = 1;
int sg_count = num_pages(len);
struct scatterlist *sg;
int i;
sg = rsp->cmd->inline_sg;
for (i = 0; i < sg_count; i++, sg++) {
if (i < sg_count - 1)
sg_unmark_end(sg);
else
sg_mark_end(sg);
sg->offset = off;
sg->length = min_t(int, len, PAGE_SIZE - off);
len -= sg->length;
if (!i)
off = 0;
}
rsp->req.sg = rsp->cmd->inline_sg;
rsp->req.sg_cnt = sg_count;
}
static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
@@ -542,7 +618,7 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
if (!nvme_is_write(rsp->req.cmd))
return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
if (off + len > NVMET_RDMA_INLINE_DATA_SIZE) {
if (off + len > rsp->queue->dev->inline_data_size) {
pr_err("invalid inline data offset!\n");
return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR;
}
@@ -741,7 +817,7 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
srq_size = 4095; /* XXX: tune */
srq_attr.attr.max_wr = srq_size;
srq_attr.attr.max_sge = 2;
srq_attr.attr.max_sge = 1 + ndev->inline_page_count;
srq_attr.attr.srq_limit = 0;
srq_attr.srq_type = IB_SRQT_BASIC;
srq = ib_create_srq(ndev->pd, &srq_attr);
@@ -763,11 +839,16 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
ndev->srq = srq;
ndev->srq_size = srq_size;
for (i = 0; i < srq_size; i++)
nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]);
for (i = 0; i < srq_size; i++) {
ret = nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]);
if (ret)
goto out_free_cmds;
}
return 0;
out_free_cmds:
nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false);
out_destroy_srq:
ib_destroy_srq(srq);
return ret;
@@ -791,7 +872,10 @@ static void nvmet_rdma_free_dev(struct kref *ref)
static struct nvmet_rdma_device *
nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
{
struct nvmet_port *port = cm_id->context;
struct nvmet_rdma_device *ndev;
int inline_page_count;
int inline_sge_count;
int ret;
mutex_lock(&device_list_mutex);
@@ -805,6 +889,18 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
if (!ndev)
goto out_err;
inline_page_count = num_pages(port->inline_data_size);
inline_sge_count = max(cm_id->device->attrs.max_sge_rd,
cm_id->device->attrs.max_recv_sge) - 1;
if (inline_page_count > inline_sge_count) {
pr_warn("inline_data_size %d cannot be supported by device %s. Reducing to %lu.\n",
port->inline_data_size, cm_id->device->name,
inline_sge_count * PAGE_SIZE);
port->inline_data_size = inline_sge_count * PAGE_SIZE;
inline_page_count = inline_sge_count;
}
ndev->inline_data_size = port->inline_data_size;
ndev->inline_page_count = inline_page_count;
ndev->device = cm_id->device;
kref_init(&ndev->ref);
@@ -879,7 +975,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
} else {
/* +1 for drain */
qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size;
qp_attr.cap.max_recv_sge = 2;
qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count;
}
ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr);
@@ -897,13 +993,17 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
if (!ndev->srq) {
for (i = 0; i < queue->recv_queue_size; i++) {
queue->cmds[i].queue = queue;
nvmet_rdma_post_recv(ndev, &queue->cmds[i]);
ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]);
if (ret)
goto err_destroy_qp;
}
}
out:
return ret;
err_destroy_qp:
rdma_destroy_qp(queue->cm_id);
err_destroy_cq:
ib_free_cq(queue->cq);
goto out;
@@ -1377,6 +1477,15 @@ static int nvmet_rdma_add_port(struct nvmet_port *port)
return -EINVAL;
}
if (port->inline_data_size < 0) {
port->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE;
} else if (port->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) {
pr_warn("inline_data_size %u is too large, reducing to %u\n",
port->inline_data_size,
NVMET_RDMA_MAX_INLINE_DATA_SIZE);
port->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE;
}
ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr,
port->disc_addr.trsvcid, &addr);
if (ret) {
@@ -1454,7 +1563,6 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
.owner = THIS_MODULE,
.type = NVMF_TRTYPE_RDMA,
.sqe_inline_size = NVMET_RDMA_INLINE_DATA_SIZE,
.msdbd = 1,
.has_keyed_sgls = 1,
.add_port = nvmet_rdma_add_port,