net/rds: Handle ODP mr registration/unregistration
On-Demand-Paging MRs are registered using ib_reg_user_mr and unregistered with ib_dereg_mr. Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com> Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com> Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
This commit is contained in:

committed by
Leon Romanovsky

parent
c4c86abb3f
commit
2eafa1746f
149
net/rds/rdma.c
149
net/rds/rdma.c
@@ -177,13 +177,14 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
|
||||
struct rds_conn_path *cp)
|
||||
{
|
||||
struct rds_mr *mr = NULL, *found;
|
||||
struct scatterlist *sg = NULL;
|
||||
unsigned int nr_pages;
|
||||
struct page **pages = NULL;
|
||||
struct scatterlist *sg;
|
||||
void *trans_private;
|
||||
unsigned long flags;
|
||||
rds_rdma_cookie_t cookie;
|
||||
unsigned int nents;
|
||||
unsigned int nents = 0;
|
||||
int need_odp = 0;
|
||||
long i;
|
||||
int ret;
|
||||
|
||||
@@ -197,6 +198,21 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* If the combination of the addr and size requested for this memory
|
||||
* region causes an integer overflow, return error.
|
||||
*/
|
||||
if (((args->vec.addr + args->vec.bytes) < args->vec.addr) ||
|
||||
PAGE_ALIGN(args->vec.addr + args->vec.bytes) <
|
||||
(args->vec.addr + args->vec.bytes)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!can_do_mlock()) {
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
nr_pages = rds_pages_in_vec(&args->vec);
|
||||
if (nr_pages == 0) {
|
||||
ret = -EINVAL;
|
||||
@@ -250,36 +266,44 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
|
||||
* the zero page.
|
||||
*/
|
||||
ret = rds_pin_pages(args->vec.addr, nr_pages, pages, 1);
|
||||
if (ret < 0)
|
||||
if (ret == -EOPNOTSUPP) {
|
||||
need_odp = 1;
|
||||
} else if (ret <= 0) {
|
||||
goto out;
|
||||
} else {
|
||||
nents = ret;
|
||||
sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL);
|
||||
if (!sg) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
WARN_ON(!nents);
|
||||
sg_init_table(sg, nents);
|
||||
|
||||
nents = ret;
|
||||
sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL);
|
||||
if (!sg) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
/* Stick all pages into the scatterlist */
|
||||
for (i = 0 ; i < nents; i++)
|
||||
sg_set_page(&sg[i], pages[i], PAGE_SIZE, 0);
|
||||
|
||||
rdsdebug("RDS: trans_private nents is %u\n", nents);
|
||||
}
|
||||
WARN_ON(!nents);
|
||||
sg_init_table(sg, nents);
|
||||
|
||||
/* Stick all pages into the scatterlist */
|
||||
for (i = 0 ; i < nents; i++)
|
||||
sg_set_page(&sg[i], pages[i], PAGE_SIZE, 0);
|
||||
|
||||
rdsdebug("RDS: trans_private nents is %u\n", nents);
|
||||
|
||||
/* Obtain a transport specific MR. If this succeeds, the
|
||||
* s/g list is now owned by the MR.
|
||||
* Note that dma_map() implies that pending writes are
|
||||
* flushed to RAM, so no dma_sync is needed here. */
|
||||
trans_private = rs->rs_transport->get_mr(sg, nents, rs,
|
||||
&mr->r_key,
|
||||
cp ? cp->cp_conn : NULL);
|
||||
trans_private = rs->rs_transport->get_mr(
|
||||
sg, nents, rs, &mr->r_key, cp ? cp->cp_conn : NULL,
|
||||
args->vec.addr, args->vec.bytes,
|
||||
need_odp ? ODP_ZEROBASED : ODP_NOT_NEEDED);
|
||||
|
||||
if (IS_ERR(trans_private)) {
|
||||
for (i = 0 ; i < nents; i++)
|
||||
put_page(sg_page(&sg[i]));
|
||||
kfree(sg);
|
||||
/* In ODP case, we don't GUP pages, so don't need
|
||||
* to release anything.
|
||||
*/
|
||||
if (!need_odp) {
|
||||
for (i = 0 ; i < nents; i++)
|
||||
put_page(sg_page(&sg[i]));
|
||||
kfree(sg);
|
||||
}
|
||||
ret = PTR_ERR(trans_private);
|
||||
goto out;
|
||||
}
|
||||
@@ -293,7 +317,11 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
|
||||
* map page aligned regions. So we keep the offset, and build
|
||||
* a 64bit cookie containing <R_Key, offset> and pass that
|
||||
* around. */
|
||||
cookie = rds_rdma_make_cookie(mr->r_key, args->vec.addr & ~PAGE_MASK);
|
||||
if (need_odp)
|
||||
cookie = rds_rdma_make_cookie(mr->r_key, 0);
|
||||
else
|
||||
cookie = rds_rdma_make_cookie(mr->r_key,
|
||||
args->vec.addr & ~PAGE_MASK);
|
||||
if (cookie_ret)
|
||||
*cookie_ret = cookie;
|
||||
|
||||
@@ -458,22 +486,26 @@ void rds_rdma_free_op(struct rm_rdma_op *ro)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < ro->op_nents; i++) {
|
||||
struct page *page = sg_page(&ro->op_sg[i]);
|
||||
if (ro->op_odp_mr) {
|
||||
rds_mr_put(ro->op_odp_mr);
|
||||
} else {
|
||||
for (i = 0; i < ro->op_nents; i++) {
|
||||
struct page *page = sg_page(&ro->op_sg[i]);
|
||||
|
||||
/* Mark page dirty if it was possibly modified, which
|
||||
* is the case for a RDMA_READ which copies from remote
|
||||
* to local memory */
|
||||
if (!ro->op_write) {
|
||||
WARN_ON(!page->mapping && irqs_disabled());
|
||||
set_page_dirty(page);
|
||||
/* Mark page dirty if it was possibly modified, which
|
||||
* is the case for a RDMA_READ which copies from remote
|
||||
* to local memory
|
||||
*/
|
||||
if (!ro->op_write)
|
||||
set_page_dirty(page);
|
||||
put_page(page);
|
||||
}
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
kfree(ro->op_notifier);
|
||||
ro->op_notifier = NULL;
|
||||
ro->op_active = 0;
|
||||
ro->op_odp_mr = NULL;
|
||||
}
|
||||
|
||||
void rds_atomic_free_op(struct rm_atomic_op *ao)
|
||||
@@ -583,6 +615,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
|
||||
struct rds_iovec *iovs;
|
||||
unsigned int i, j;
|
||||
int ret = 0;
|
||||
bool odp_supported = true;
|
||||
|
||||
if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args))
|
||||
|| rm->rdma.op_active)
|
||||
@@ -604,6 +637,9 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
|
||||
ret = -EINVAL;
|
||||
goto out_ret;
|
||||
}
|
||||
/* odp-mr is not supported for multiple requests within one message */
|
||||
if (args->nr_local != 1)
|
||||
odp_supported = false;
|
||||
|
||||
iovs = vec->iov;
|
||||
|
||||
@@ -625,6 +661,8 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
|
||||
op->op_silent = !!(args->flags & RDS_RDMA_SILENT);
|
||||
op->op_active = 1;
|
||||
op->op_recverr = rs->rs_recverr;
|
||||
op->op_odp_mr = NULL;
|
||||
|
||||
WARN_ON(!nr_pages);
|
||||
op->op_sg = rds_message_alloc_sgs(rm, nr_pages, &ret);
|
||||
if (!op->op_sg)
|
||||
@@ -674,10 +712,44 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
|
||||
* If it's a READ operation, we need to pin the pages for writing.
|
||||
*/
|
||||
ret = rds_pin_pages(iov->addr, nr, pages, !op->op_write);
|
||||
if (ret < 0)
|
||||
if ((!odp_supported && ret <= 0) ||
|
||||
(odp_supported && ret <= 0 && ret != -EOPNOTSUPP))
|
||||
goto out_pages;
|
||||
else
|
||||
ret = 0;
|
||||
|
||||
if (ret == -EOPNOTSUPP) {
|
||||
struct rds_mr *local_odp_mr;
|
||||
|
||||
if (!rs->rs_transport->get_mr) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto out_pages;
|
||||
}
|
||||
local_odp_mr =
|
||||
kzalloc(sizeof(*local_odp_mr), GFP_KERNEL);
|
||||
if (!local_odp_mr) {
|
||||
ret = -ENOMEM;
|
||||
goto out_pages;
|
||||
}
|
||||
RB_CLEAR_NODE(&local_odp_mr->r_rb_node);
|
||||
refcount_set(&local_odp_mr->r_refcount, 1);
|
||||
local_odp_mr->r_trans = rs->rs_transport;
|
||||
local_odp_mr->r_sock = rs;
|
||||
local_odp_mr->r_trans_private =
|
||||
rs->rs_transport->get_mr(
|
||||
NULL, 0, rs, &local_odp_mr->r_key, NULL,
|
||||
iov->addr, iov->bytes, ODP_VIRTUAL);
|
||||
if (IS_ERR(local_odp_mr->r_trans_private)) {
|
||||
ret = IS_ERR(local_odp_mr->r_trans_private);
|
||||
rdsdebug("get_mr ret %d %p\"", ret,
|
||||
local_odp_mr->r_trans_private);
|
||||
kfree(local_odp_mr);
|
||||
ret = -EOPNOTSUPP;
|
||||
goto out_pages;
|
||||
}
|
||||
rdsdebug("Need odp; local_odp_mr %p trans_private %p\n",
|
||||
local_odp_mr, local_odp_mr->r_trans_private);
|
||||
op->op_odp_mr = local_odp_mr;
|
||||
op->op_odp_addr = iov->addr;
|
||||
}
|
||||
|
||||
rdsdebug("RDS: nr_bytes %u nr %u iov->bytes %llu iov->addr %llx\n",
|
||||
nr_bytes, nr, iov->bytes, iov->addr);
|
||||
@@ -693,6 +765,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
|
||||
min_t(unsigned int, iov->bytes, PAGE_SIZE - offset),
|
||||
offset);
|
||||
|
||||
sg_dma_len(sg) = sg->length;
|
||||
rdsdebug("RDS: sg->offset %x sg->len %x iov->addr %llx iov->bytes %llu\n",
|
||||
sg->offset, sg->length, iov->addr, iov->bytes);
|
||||
|
||||
@@ -711,6 +784,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
|
||||
goto out_pages;
|
||||
}
|
||||
op->op_bytes = nr_bytes;
|
||||
ret = 0;
|
||||
|
||||
out_pages:
|
||||
kfree(pages);
|
||||
@@ -757,7 +831,8 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
|
||||
spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
|
||||
|
||||
if (mr) {
|
||||
mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE);
|
||||
mr->r_trans->sync_mr(mr->r_trans_private,
|
||||
DMA_TO_DEVICE);
|
||||
rm->rdma.op_rdma_mr = mr;
|
||||
}
|
||||
return err;
|
||||
|
Reference in New Issue
Block a user