IB/ipath: Performance improvements via mmap of queues

Improve performance of userspace post receive, post SRQ receive, and
poll CQ operations for ipath by allowing userspace to directly mmap()
receive queues and completion queues.  This eliminates the copying
between userspace and the kernel in the data path.

Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
This commit is contained in:
Ralph Campbell
2006-09-22 15:22:26 -07:00
gecommit door Roland Dreier
bovenliggende 9bc57e2d19
commit 373d991580
9 gewijzigde bestanden met toevoegingen van 788 en 385 verwijderingen

Bestand weergeven

@@ -48,66 +48,39 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr)
{
struct ipath_srq *srq = to_isrq(ibsrq);
struct ipath_ibdev *dev = to_idev(ibsrq->device);
struct ipath_rwq *wq;
unsigned long flags;
int ret;
for (; wr; wr = wr->next) {
struct ipath_rwqe *wqe;
u32 next;
int i, j;
int i;
if (wr->num_sge > srq->rq.max_sge) {
if ((unsigned) wr->num_sge > srq->rq.max_sge) {
*bad_wr = wr;
ret = -ENOMEM;
goto bail;
}
spin_lock_irqsave(&srq->rq.lock, flags);
next = srq->rq.head + 1;
wq = srq->rq.wq;
next = wq->head + 1;
if (next >= srq->rq.size)
next = 0;
if (next == srq->rq.tail) {
if (next == wq->tail) {
spin_unlock_irqrestore(&srq->rq.lock, flags);
*bad_wr = wr;
ret = -ENOMEM;
goto bail;
}
wqe = get_rwqe_ptr(&srq->rq, srq->rq.head);
wqe = get_rwqe_ptr(&srq->rq, wq->head);
wqe->wr_id = wr->wr_id;
wqe->sg_list[0].mr = NULL;
wqe->sg_list[0].vaddr = NULL;
wqe->sg_list[0].length = 0;
wqe->sg_list[0].sge_length = 0;
wqe->length = 0;
for (i = 0, j = 0; i < wr->num_sge; i++) {
/* Check LKEY */
if (to_ipd(srq->ibsrq.pd)->user &&
wr->sg_list[i].lkey == 0) {
spin_unlock_irqrestore(&srq->rq.lock,
flags);
*bad_wr = wr;
ret = -EINVAL;
goto bail;
}
if (wr->sg_list[i].length == 0)
continue;
if (!ipath_lkey_ok(&dev->lk_table,
&wqe->sg_list[j],
&wr->sg_list[i],
IB_ACCESS_LOCAL_WRITE)) {
spin_unlock_irqrestore(&srq->rq.lock,
flags);
*bad_wr = wr;
ret = -EINVAL;
goto bail;
}
wqe->length += wr->sg_list[i].length;
j++;
}
wqe->num_sge = j;
srq->rq.head = next;
wqe->num_sge = wr->num_sge;
for (i = 0; i < wr->num_sge; i++)
wqe->sg_list[i] = wr->sg_list[i];
wq->head = next;
spin_unlock_irqrestore(&srq->rq.lock, flags);
}
ret = 0;
@@ -133,53 +106,95 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
ret = ERR_PTR(-ENOMEM);
goto bail;
goto done;
}
if (srq_init_attr->attr.max_wr == 0) {
ret = ERR_PTR(-EINVAL);
goto bail;
goto done;
}
if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) ||
(srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) {
ret = ERR_PTR(-EINVAL);
goto bail;
goto done;
}
srq = kmalloc(sizeof(*srq), GFP_KERNEL);
if (!srq) {
ret = ERR_PTR(-ENOMEM);
goto bail;
goto done;
}
/*
* Need to use vmalloc() if we want to support large #s of entries.
*/
srq->rq.size = srq_init_attr->attr.max_wr + 1;
sz = sizeof(struct ipath_sge) * srq_init_attr->attr.max_sge +
srq->rq.max_sge = srq_init_attr->attr.max_sge;
sz = sizeof(struct ib_sge) * srq->rq.max_sge +
sizeof(struct ipath_rwqe);
srq->rq.wq = vmalloc(srq->rq.size * sz);
srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz);
if (!srq->rq.wq) {
kfree(srq);
ret = ERR_PTR(-ENOMEM);
goto bail;
goto bail_srq;
}
/*
* Return the address of the RWQ as the offset to mmap.
* See ipath_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
struct ipath_mmap_info *ip;
__u64 offset = (__u64) srq->rq.wq;
int err;
err = ib_copy_to_udata(udata, &offset, sizeof(offset));
if (err) {
ret = ERR_PTR(err);
goto bail_wq;
}
/* Allocate info for ipath_mmap(). */
ip = kmalloc(sizeof(*ip), GFP_KERNEL);
if (!ip) {
ret = ERR_PTR(-ENOMEM);
goto bail_wq;
}
srq->ip = ip;
ip->context = ibpd->uobject->context;
ip->obj = srq->rq.wq;
kref_init(&ip->ref);
ip->mmap_cnt = 0;
ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
srq->rq.size * sz);
spin_lock_irq(&dev->pending_lock);
ip->next = dev->pending_mmaps;
dev->pending_mmaps = ip;
spin_unlock_irq(&dev->pending_lock);
} else
srq->ip = NULL;
/*
* ib_create_srq() will initialize srq->ibsrq.
*/
spin_lock_init(&srq->rq.lock);
srq->rq.head = 0;
srq->rq.tail = 0;
srq->rq.wq->head = 0;
srq->rq.wq->tail = 0;
srq->rq.max_sge = srq_init_attr->attr.max_sge;
srq->limit = srq_init_attr->attr.srq_limit;
ret = &srq->ibsrq;
dev->n_srqs_allocated++;
bail:
ret = &srq->ibsrq;
goto done;
bail_wq:
vfree(srq->rq.wq);
bail_srq:
kfree(srq);
done:
return ret;
}
@@ -195,78 +210,123 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
struct ib_udata *udata)
{
struct ipath_srq *srq = to_isrq(ibsrq);
unsigned long flags;
int ret;
if (attr_mask & IB_SRQ_MAX_WR)
if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
(attr->max_sge > srq->rq.max_sge)) {
ret = -EINVAL;
goto bail;
}
if (attr_mask & IB_SRQ_LIMIT)
if (attr->srq_limit >= srq->rq.size) {
ret = -EINVAL;
goto bail;
}
int ret = 0;
if (attr_mask & IB_SRQ_MAX_WR) {
struct ipath_rwqe *wq, *p;
u32 sz, size, n;
struct ipath_rwq *owq;
struct ipath_rwq *wq;
struct ipath_rwqe *p;
u32 sz, size, n, head, tail;
/* Check that the requested sizes are below the limits. */
if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
((attr_mask & IB_SRQ_LIMIT) ?
attr->srq_limit : srq->limit) > attr->max_wr) {
ret = -EINVAL;
goto bail;
}
sz = sizeof(struct ipath_rwqe) +
attr->max_sge * sizeof(struct ipath_sge);
srq->rq.max_sge * sizeof(struct ib_sge);
size = attr->max_wr + 1;
wq = vmalloc(size * sz);
wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz);
if (!wq) {
ret = -ENOMEM;
goto bail;
}
spin_lock_irqsave(&srq->rq.lock, flags);
if (srq->rq.head < srq->rq.tail)
n = srq->rq.size + srq->rq.head - srq->rq.tail;
/*
* Return the address of the RWQ as the offset to mmap.
* See ipath_mmap() for details.
*/
if (udata && udata->inlen >= sizeof(__u64)) {
__u64 offset_addr;
__u64 offset = (__u64) wq;
ret = ib_copy_from_udata(&offset_addr, udata,
sizeof(offset_addr));
if (ret) {
vfree(wq);
goto bail;
}
udata->outbuf = (void __user *) offset_addr;
ret = ib_copy_to_udata(udata, &offset,
sizeof(offset));
if (ret) {
vfree(wq);
goto bail;
}
}
spin_lock_irq(&srq->rq.lock);
/*
* validate head pointer value and compute
* the number of remaining WQEs.
*/
owq = srq->rq.wq;
head = owq->head;
if (head >= srq->rq.size)
head = 0;
tail = owq->tail;
if (tail >= srq->rq.size)
tail = 0;
n = head;
if (n < tail)
n += srq->rq.size - tail;
else
n = srq->rq.head - srq->rq.tail;
if (size <= n || size <= srq->limit) {
spin_unlock_irqrestore(&srq->rq.lock, flags);
n -= tail;
if (size <= n) {
spin_unlock_irq(&srq->rq.lock);
vfree(wq);
ret = -EINVAL;
goto bail;
}
n = 0;
p = wq;
while (srq->rq.tail != srq->rq.head) {
p = wq->wq;
while (tail != head) {
struct ipath_rwqe *wqe;
int i;
wqe = get_rwqe_ptr(&srq->rq, srq->rq.tail);
wqe = get_rwqe_ptr(&srq->rq, tail);
p->wr_id = wqe->wr_id;
p->length = wqe->length;
p->num_sge = wqe->num_sge;
for (i = 0; i < wqe->num_sge; i++)
p->sg_list[i] = wqe->sg_list[i];
n++;
p = (struct ipath_rwqe *)((char *) p + sz);
if (++srq->rq.tail >= srq->rq.size)
srq->rq.tail = 0;
if (++tail >= srq->rq.size)
tail = 0;
}
vfree(srq->rq.wq);
srq->rq.wq = wq;
srq->rq.size = size;
srq->rq.head = n;
srq->rq.tail = 0;
srq->rq.max_sge = attr->max_sge;
spin_unlock_irqrestore(&srq->rq.lock, flags);
}
wq->head = n;
wq->tail = 0;
if (attr_mask & IB_SRQ_LIMIT)
srq->limit = attr->srq_limit;
spin_unlock_irq(&srq->rq.lock);
if (attr_mask & IB_SRQ_LIMIT) {
spin_lock_irqsave(&srq->rq.lock, flags);
srq->limit = attr->srq_limit;
spin_unlock_irqrestore(&srq->rq.lock, flags);
vfree(owq);
if (srq->ip) {
struct ipath_mmap_info *ip = srq->ip;
struct ipath_ibdev *dev = to_idev(srq->ibsrq.device);
ip->obj = wq;
ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
size * sz);
spin_lock_irq(&dev->pending_lock);
ip->next = dev->pending_mmaps;
dev->pending_mmaps = ip;
spin_unlock_irq(&dev->pending_lock);
}
} else if (attr_mask & IB_SRQ_LIMIT) {
spin_lock_irq(&srq->rq.lock);
if (attr->srq_limit >= srq->rq.size)
ret = -EINVAL;
else
srq->limit = attr->srq_limit;
spin_unlock_irq(&srq->rq.lock);
}
ret = 0;
bail:
return ret;