IB/qib: Avoid returning EBUSY from MR deregister
A timing issue can occur where qib_mr_dereg can return -EBUSY if the MR use count is not zero. This can occur if the MR is de-registered while RDMA read response packets are being progressed from the SDMA ring. The suspicion is that the peer sent an RDMA read request, which has already been copied across to the peer. The peer sees the completion of his request and then communicates to the responder that the MR is not needed any longer. The responder tries to de-register the MR, catching some responses remaining in the SDMA ring holding the MR use count. The code now uses a get/put paradigm to track MR use counts and coordinates with the MR de-registration process using a completion when the count has reached zero. A timeout on the delay is in place to catch other EBUSY issues. The reference count protocol is as follows: - The return to the user counts as 1 - A reference from the lk_table or the qib_ibdev counts as 1. - Transient I/O operations increase/decrease as necessary A lot of code duplication has been folded into the new routines init_qib_mregion() and deinit_qib_mregion(). Additionally, explicit initialization of fields to zero is now handled by kzalloc(). Also, duplicated code 'while.*num_sge' that decrements reference counts have been consolidated in qib_put_ss(). Reviewed-by: Ramkrishna Vepa <ramkrishna.vepa@intel.com> Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
This commit is contained in:

committed by
Roland Dreier

parent
354dff1bd8
commit
6a82649f21
@@ -47,6 +47,43 @@ static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr)
|
||||
return container_of(ibfmr, struct qib_fmr, ibfmr);
|
||||
}
|
||||
|
||||
static int init_qib_mregion(struct qib_mregion *mr, struct ib_pd *pd,
|
||||
int count)
|
||||
{
|
||||
int m, i = 0;
|
||||
int rval = 0;
|
||||
|
||||
m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ;
|
||||
for (; i < m; i++) {
|
||||
mr->map[i] = kzalloc(sizeof *mr->map[0], GFP_KERNEL);
|
||||
if (!mr->map[i])
|
||||
goto bail;
|
||||
}
|
||||
mr->mapsz = m;
|
||||
init_completion(&mr->comp);
|
||||
/* count returning the ptr to user */
|
||||
atomic_set(&mr->refcount, 1);
|
||||
mr->pd = pd;
|
||||
mr->max_segs = count;
|
||||
out:
|
||||
return rval;
|
||||
bail:
|
||||
while (i)
|
||||
kfree(mr->map[--i]);
|
||||
rval = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
static void deinit_qib_mregion(struct qib_mregion *mr)
|
||||
{
|
||||
int i = mr->mapsz;
|
||||
|
||||
mr->mapsz = 0;
|
||||
while (i)
|
||||
kfree(mr->map[--i]);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* qib_get_dma_mr - get a DMA memory region
|
||||
* @pd: protection domain for this memory region
|
||||
@@ -58,10 +95,9 @@ static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr)
|
||||
*/
|
||||
struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc)
|
||||
{
|
||||
struct qib_ibdev *dev = to_idev(pd->device);
|
||||
struct qib_mr *mr;
|
||||
struct qib_mr *mr = NULL;
|
||||
struct ib_mr *ret;
|
||||
unsigned long flags;
|
||||
int rval;
|
||||
|
||||
if (to_ipd(pd)->user) {
|
||||
ret = ERR_PTR(-EPERM);
|
||||
@@ -74,61 +110,64 @@ struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc)
|
||||
goto bail;
|
||||
}
|
||||
|
||||
rval = init_qib_mregion(&mr->mr, pd, 0);
|
||||
if (rval) {
|
||||
ret = ERR_PTR(rval);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
|
||||
rval = qib_alloc_lkey(&mr->mr, 1);
|
||||
if (rval) {
|
||||
ret = ERR_PTR(rval);
|
||||
goto bail_mregion;
|
||||
}
|
||||
|
||||
mr->mr.access_flags = acc;
|
||||
atomic_set(&mr->mr.refcount, 0);
|
||||
|
||||
spin_lock_irqsave(&dev->lk_table.lock, flags);
|
||||
if (!dev->dma_mr)
|
||||
dev->dma_mr = &mr->mr;
|
||||
spin_unlock_irqrestore(&dev->lk_table.lock, flags);
|
||||
|
||||
ret = &mr->ibmr;
|
||||
|
||||
bail:
|
||||
done:
|
||||
return ret;
|
||||
|
||||
bail_mregion:
|
||||
deinit_qib_mregion(&mr->mr);
|
||||
bail:
|
||||
kfree(mr);
|
||||
goto done;
|
||||
}
|
||||
|
||||
static struct qib_mr *alloc_mr(int count, struct qib_lkey_table *lk_table)
|
||||
static struct qib_mr *alloc_mr(int count, struct ib_pd *pd)
|
||||
{
|
||||
struct qib_mr *mr;
|
||||
int m, i = 0;
|
||||
int rval = -ENOMEM;
|
||||
int m;
|
||||
|
||||
/* Allocate struct plus pointers to first level page tables. */
|
||||
m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ;
|
||||
mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL);
|
||||
mr = kzalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL);
|
||||
if (!mr)
|
||||
goto done;
|
||||
|
||||
/* Allocate first level page tables. */
|
||||
for (; i < m; i++) {
|
||||
mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL);
|
||||
if (!mr->mr.map[i])
|
||||
goto bail;
|
||||
}
|
||||
mr->mr.mapsz = m;
|
||||
mr->mr.page_shift = 0;
|
||||
mr->mr.max_segs = count;
|
||||
goto bail;
|
||||
|
||||
rval = init_qib_mregion(&mr->mr, pd, count);
|
||||
if (rval)
|
||||
goto bail;
|
||||
/*
|
||||
* ib_reg_phys_mr() will initialize mr->ibmr except for
|
||||
* lkey and rkey.
|
||||
*/
|
||||
if (!qib_alloc_lkey(lk_table, &mr->mr))
|
||||
goto bail;
|
||||
rval = qib_alloc_lkey(&mr->mr, 0);
|
||||
if (rval)
|
||||
goto bail_mregion;
|
||||
mr->ibmr.lkey = mr->mr.lkey;
|
||||
mr->ibmr.rkey = mr->mr.lkey;
|
||||
|
||||
atomic_set(&mr->mr.refcount, 0);
|
||||
goto done;
|
||||
|
||||
bail:
|
||||
while (i)
|
||||
kfree(mr->mr.map[--i]);
|
||||
kfree(mr);
|
||||
mr = NULL;
|
||||
|
||||
done:
|
||||
return mr;
|
||||
|
||||
bail_mregion:
|
||||
deinit_qib_mregion(&mr->mr);
|
||||
bail:
|
||||
kfree(mr);
|
||||
mr = ERR_PTR(rval);
|
||||
goto done;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -148,19 +187,15 @@ struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
|
||||
int n, m, i;
|
||||
struct ib_mr *ret;
|
||||
|
||||
mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table);
|
||||
if (mr == NULL) {
|
||||
ret = ERR_PTR(-ENOMEM);
|
||||
mr = alloc_mr(num_phys_buf, pd);
|
||||
if (IS_ERR(mr)) {
|
||||
ret = (struct ib_mr *)mr;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
mr->mr.pd = pd;
|
||||
mr->mr.user_base = *iova_start;
|
||||
mr->mr.iova = *iova_start;
|
||||
mr->mr.length = 0;
|
||||
mr->mr.offset = 0;
|
||||
mr->mr.access_flags = acc;
|
||||
mr->umem = NULL;
|
||||
|
||||
m = 0;
|
||||
n = 0;
|
||||
@@ -186,7 +221,6 @@ bail:
|
||||
* @pd: protection domain for this memory region
|
||||
* @start: starting userspace address
|
||||
* @length: length of region to register
|
||||
* @virt_addr: virtual address to use (from HCA's point of view)
|
||||
* @mr_access_flags: access flags for this memory region
|
||||
* @udata: unused by the QLogic_IB driver
|
||||
*
|
||||
@@ -216,14 +250,13 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
|
||||
list_for_each_entry(chunk, &umem->chunk_list, list)
|
||||
n += chunk->nents;
|
||||
|
||||
mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
|
||||
if (!mr) {
|
||||
ret = ERR_PTR(-ENOMEM);
|
||||
mr = alloc_mr(n, pd);
|
||||
if (IS_ERR(mr)) {
|
||||
ret = (struct ib_mr *)mr;
|
||||
ib_umem_release(umem);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
mr->mr.pd = pd;
|
||||
mr->mr.user_base = start;
|
||||
mr->mr.iova = virt_addr;
|
||||
mr->mr.length = length;
|
||||
@@ -271,21 +304,25 @@ bail:
|
||||
int qib_dereg_mr(struct ib_mr *ibmr)
|
||||
{
|
||||
struct qib_mr *mr = to_imr(ibmr);
|
||||
struct qib_ibdev *dev = to_idev(ibmr->device);
|
||||
int ret;
|
||||
int i;
|
||||
int ret = 0;
|
||||
unsigned long timeout;
|
||||
|
||||
ret = qib_free_lkey(dev, &mr->mr);
|
||||
if (ret)
|
||||
return ret;
|
||||
qib_free_lkey(&mr->mr);
|
||||
|
||||
i = mr->mr.mapsz;
|
||||
while (i)
|
||||
kfree(mr->mr.map[--i]);
|
||||
qib_put_mr(&mr->mr); /* will set completion if last */
|
||||
timeout = wait_for_completion_timeout(&mr->mr.comp,
|
||||
5 * HZ);
|
||||
if (!timeout) {
|
||||
qib_get_mr(&mr->mr);
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
deinit_qib_mregion(&mr->mr);
|
||||
if (mr->umem)
|
||||
ib_umem_release(mr->umem);
|
||||
kfree(mr);
|
||||
return 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -298,17 +335,9 @@ struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
|
||||
{
|
||||
struct qib_mr *mr;
|
||||
|
||||
mr = alloc_mr(max_page_list_len, &to_idev(pd->device)->lk_table);
|
||||
if (mr == NULL)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
mr->mr.pd = pd;
|
||||
mr->mr.user_base = 0;
|
||||
mr->mr.iova = 0;
|
||||
mr->mr.length = 0;
|
||||
mr->mr.offset = 0;
|
||||
mr->mr.access_flags = 0;
|
||||
mr->umem = NULL;
|
||||
mr = alloc_mr(max_page_list_len, pd);
|
||||
if (IS_ERR(mr))
|
||||
return (struct ib_mr *)mr;
|
||||
|
||||
return &mr->ibmr;
|
||||
}
|
||||
@@ -322,11 +351,11 @@ qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len)
|
||||
if (size > PAGE_SIZE)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
pl = kmalloc(sizeof *pl, GFP_KERNEL);
|
||||
pl = kzalloc(sizeof *pl, GFP_KERNEL);
|
||||
if (!pl)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
pl->page_list = kmalloc(size, GFP_KERNEL);
|
||||
pl->page_list = kzalloc(size, GFP_KERNEL);
|
||||
if (!pl->page_list)
|
||||
goto err_free;
|
||||
|
||||
@@ -355,57 +384,47 @@ struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
|
||||
struct ib_fmr_attr *fmr_attr)
|
||||
{
|
||||
struct qib_fmr *fmr;
|
||||
int m, i = 0;
|
||||
int m;
|
||||
struct ib_fmr *ret;
|
||||
int rval = -ENOMEM;
|
||||
|
||||
/* Allocate struct plus pointers to first level page tables. */
|
||||
m = (fmr_attr->max_pages + QIB_SEGSZ - 1) / QIB_SEGSZ;
|
||||
fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL);
|
||||
fmr = kzalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL);
|
||||
if (!fmr)
|
||||
goto bail;
|
||||
|
||||
/* Allocate first level page tables. */
|
||||
for (; i < m; i++) {
|
||||
fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0],
|
||||
GFP_KERNEL);
|
||||
if (!fmr->mr.map[i])
|
||||
goto bail;
|
||||
}
|
||||
fmr->mr.mapsz = m;
|
||||
rval = init_qib_mregion(&fmr->mr, pd, fmr_attr->max_pages);
|
||||
if (rval)
|
||||
goto bail;
|
||||
|
||||
/*
|
||||
* ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
|
||||
* rkey.
|
||||
*/
|
||||
if (!qib_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr))
|
||||
goto bail;
|
||||
rval = qib_alloc_lkey(&fmr->mr, 0);
|
||||
if (rval)
|
||||
goto bail_mregion;
|
||||
fmr->ibfmr.rkey = fmr->mr.lkey;
|
||||
fmr->ibfmr.lkey = fmr->mr.lkey;
|
||||
/*
|
||||
* Resources are allocated but no valid mapping (RKEY can't be
|
||||
* used).
|
||||
*/
|
||||
fmr->mr.pd = pd;
|
||||
fmr->mr.user_base = 0;
|
||||
fmr->mr.iova = 0;
|
||||
fmr->mr.length = 0;
|
||||
fmr->mr.offset = 0;
|
||||
fmr->mr.access_flags = mr_access_flags;
|
||||
fmr->mr.max_segs = fmr_attr->max_pages;
|
||||
fmr->mr.page_shift = fmr_attr->page_shift;
|
||||
|
||||
atomic_set(&fmr->mr.refcount, 0);
|
||||
ret = &fmr->ibfmr;
|
||||
goto done;
|
||||
|
||||
bail:
|
||||
while (i)
|
||||
kfree(fmr->mr.map[--i]);
|
||||
kfree(fmr);
|
||||
ret = ERR_PTR(-ENOMEM);
|
||||
|
||||
done:
|
||||
return ret;
|
||||
|
||||
bail_mregion:
|
||||
deinit_qib_mregion(&fmr->mr);
|
||||
bail:
|
||||
kfree(fmr);
|
||||
ret = ERR_PTR(rval);
|
||||
goto done;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -428,7 +447,8 @@ int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
|
||||
u32 ps;
|
||||
int ret;
|
||||
|
||||
if (atomic_read(&fmr->mr.refcount))
|
||||
i = atomic_read(&fmr->mr.refcount);
|
||||
if (i > 2)
|
||||
return -EBUSY;
|
||||
|
||||
if (list_len > fmr->mr.max_segs) {
|
||||
@@ -490,16 +510,20 @@ int qib_unmap_fmr(struct list_head *fmr_list)
|
||||
int qib_dealloc_fmr(struct ib_fmr *ibfmr)
|
||||
{
|
||||
struct qib_fmr *fmr = to_ifmr(ibfmr);
|
||||
int ret;
|
||||
int i;
|
||||
int ret = 0;
|
||||
unsigned long timeout;
|
||||
|
||||
ret = qib_free_lkey(to_idev(ibfmr->device), &fmr->mr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
i = fmr->mr.mapsz;
|
||||
while (i)
|
||||
kfree(fmr->mr.map[--i]);
|
||||
qib_free_lkey(&fmr->mr);
|
||||
qib_put_mr(&fmr->mr); /* will set completion if last */
|
||||
timeout = wait_for_completion_timeout(&fmr->mr.comp,
|
||||
5 * HZ);
|
||||
if (!timeout) {
|
||||
qib_get_mr(&fmr->mr);
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
deinit_qib_mregion(&fmr->mr);
|
||||
kfree(fmr);
|
||||
return 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
Reference in New Issue
Block a user