fs/9p: Update zero-copy implementation in 9p

* remove lot of update to different data structure
* add a seperate callback for zero copy request.
* above makes non zero copy code path simpler
* remove conditionalizing TREAD/TREADDIR/TWRITE in the zero copy path
* Fix the dotu p9_check_errors with zero copy. Add sufficient doc around
* Add support for both in and output buffers in zero copy callback
* pin and unpin pages in the same context
* use helpers instead of defining page offset and rest of page ourself
* Fix mem leak in p9_check_errors
* Remove 'E' and 'F' in p9pdu_vwritef

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
This commit is contained in:
Aneesh Kumar K.V
2011-08-16 10:50:10 +05:30
committed by Eric Van Hensbergen
parent c3b92c8787
commit abfa034e4b
9 changed files with 522 additions and 364 deletions

View File

@@ -150,12 +150,10 @@ static void req_done(struct virtqueue *vq)
while (1) {
spin_lock_irqsave(&chan->lock, flags);
rc = virtqueue_get_buf(chan->vq, &len);
if (rc == NULL) {
spin_unlock_irqrestore(&chan->lock, flags);
break;
}
chan->ring_bufs_avail = 1;
spin_unlock_irqrestore(&chan->lock, flags);
/* Wakeup if anyone waiting for VirtIO ring space. */
@@ -163,17 +161,6 @@ static void req_done(struct virtqueue *vq)
P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
req = p9_tag_lookup(chan->client, rc->tag);
if (req->tc->private) {
struct trans_rpage_info *rp = req->tc->private;
int p = rp->rp_nr_pages;
/*Release pages */
p9_release_req_pages(rp);
atomic_sub(p, &vp_pinned);
wake_up(&vp_wq);
if (rp->rp_alloc)
kfree(rp);
req->tc->private = NULL;
}
req->status = REQ_STATUS_RCVD;
p9_client_cb(chan->client, req);
}
@@ -193,9 +180,8 @@ static void req_done(struct virtqueue *vq)
*
*/
static int
pack_sg_list(struct scatterlist *sg, int start, int limit, char *data,
int count)
static int pack_sg_list(struct scatterlist *sg, int start,
int limit, char *data, int count)
{
int s;
int index = start;
@@ -224,31 +210,36 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
* this takes a list of pages.
* @sg: scatter/gather list to pack into
* @start: which segment of the sg_list to start at
* @pdata_off: Offset into the first page
* @**pdata: a list of pages to add into sg.
* @nr_pages: number of pages to pack into the scatter/gather list
* @data: data to pack into scatter/gather list
* @count: amount of data to pack into the scatter/gather list
*/
static int
pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off,
struct page **pdata, int count)
pack_sg_list_p(struct scatterlist *sg, int start, int limit,
struct page **pdata, int nr_pages, char *data, int count)
{
int s;
int i = 0;
int i = 0, s;
int data_off;
int index = start;
if (pdata_off) {
s = min((int)(PAGE_SIZE - pdata_off), count);
sg_set_page(&sg[index++], pdata[i++], s, pdata_off);
BUG_ON(nr_pages > (limit - start));
/*
* if the first page doesn't start at
* page boundary find the offset
*/
data_off = offset_in_page(data);
while (nr_pages) {
s = rest_of_page(data);
if (s > count)
s = count;
sg_set_page(&sg[index++], pdata[i++], s, data_off);
data_off = 0;
data += s;
count -= s;
nr_pages--;
}
while (count) {
BUG_ON(index > limit);
s = min((int)PAGE_SIZE, count);
sg_set_page(&sg[index++], pdata[i++], s, 0);
count -= s;
}
return index-start;
return index - start;
}
/**
@@ -261,114 +252,23 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off,
static int
p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
{
int in, out, inp, outp;
struct virtio_chan *chan = client->trans;
int err;
int in, out;
unsigned long flags;
size_t pdata_off = 0;
struct trans_rpage_info *rpinfo = NULL;
int err, pdata_len = 0;
struct virtio_chan *chan = client->trans;
P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
req->status = REQ_STATUS_SENT;
if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) {
int nr_pages = p9_nr_pages(req);
int rpinfo_size = sizeof(struct trans_rpage_info) +
sizeof(struct page *) * nr_pages;
if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
err = wait_event_interruptible(vp_wq,
atomic_read(&vp_pinned) < chan->p9_max_pages);
if (err == -ERESTARTSYS)
return err;
P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n");
}
if (rpinfo_size <= (req->tc->capacity - req->tc->size)) {
/* We can use sdata */
req->tc->private = req->tc->sdata + req->tc->size;
rpinfo = (struct trans_rpage_info *)req->tc->private;
rpinfo->rp_alloc = 0;
} else {
req->tc->private = kmalloc(rpinfo_size, GFP_NOFS);
if (!req->tc->private) {
P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: "
"private kmalloc returned NULL");
return -ENOMEM;
}
rpinfo = (struct trans_rpage_info *)req->tc->private;
rpinfo->rp_alloc = 1;
}
err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages,
req->tc->id == P9_TREAD ? 1 : 0);
if (err < 0) {
if (rpinfo->rp_alloc)
kfree(rpinfo);
return err;
} else {
atomic_add(rpinfo->rp_nr_pages, &vp_pinned);
}
}
req_retry_pinned:
req_retry:
spin_lock_irqsave(&chan->lock, flags);
/* Handle out VirtIO ring buffers */
out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata,
req->tc->size);
out = pack_sg_list(chan->sg, 0,
VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) {
/* We have additional write payload buffer to take care */
if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
pdata_off, rpinfo->rp_data, pdata_len);
} else {
char *pbuf;
if (req->tc->pubuf)
pbuf = (__force char *) req->tc->pubuf;
else
pbuf = req->tc->pkbuf;
outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf,
req->tc->pbuf_size);
}
out += outp;
}
/* Handle in VirtIO ring buffers */
if (req->tc->pbuf_size &&
((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) {
/*
* Take care of additional Read payload.
* 11 is the read/write header = PDU Header(7) + IO Size (4).
* Arrange in such a way that server places header in the
* alloced memory and payload onto the user buffer.
*/
inp = pack_sg_list(chan->sg, out,
VIRTQUEUE_NUM, req->rc->sdata, 11);
/*
* Running executables in the filesystem may result in
* a read request with kernel buffer as opposed to user buffer.
*/
if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM,
pdata_off, rpinfo->rp_data, pdata_len);
} else {
char *pbuf;
if (req->tc->pubuf)
pbuf = (__force char *) req->tc->pubuf;
else
pbuf = req->tc->pkbuf;
in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM,
pbuf, req->tc->pbuf_size);
}
in += inp;
} else {
in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM,
req->rc->sdata, req->rc->capacity);
}
in = pack_sg_list(chan->sg, out,
VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
if (err < 0) {
@@ -381,18 +281,15 @@ req_retry_pinned:
return err;
P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n");
goto req_retry_pinned;
goto req_retry;
} else {
spin_unlock_irqrestore(&chan->lock, flags);
P9_DPRINTK(P9_DEBUG_TRANS,
"9p debug: "
"virtio rpc add_buf returned failure");
if (rpinfo && rpinfo->rp_alloc)
kfree(rpinfo);
return -EIO;
}
}
virtqueue_kick(chan->vq);
spin_unlock_irqrestore(&chan->lock, flags);
@@ -400,6 +297,169 @@ req_retry_pinned:
return 0;
}
static int p9_get_mapped_pages(struct virtio_chan *chan,
struct page **pages, char *data,
int nr_pages, int write, int kern_buf)
{
int err;
if (!kern_buf) {
/*
* We allow only p9_max_pages pinned. We wait for the
* Other zc request to finish here
*/
if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
err = wait_event_interruptible(vp_wq,
(atomic_read(&vp_pinned) < chan->p9_max_pages));
if (err == -ERESTARTSYS)
return err;
}
err = p9_payload_gup(data, &nr_pages, pages, write);
if (err < 0)
return err;
atomic_add(nr_pages, &vp_pinned);
} else {
/* kernel buffer, no need to pin pages */
int s, index = 0;
int count = nr_pages;
while (nr_pages) {
s = rest_of_page(data);
pages[index++] = virt_to_page(data);
data += s;
nr_pages--;
}
nr_pages = count;
}
return nr_pages;
}
/**
* p9_virtio_zc_request - issue a zero copy request
* @client: client instance issuing the request
* @req: request to be issued
* @uidata: user bffer that should be ued for zero copy read
* @uodata: user buffer that shoud be user for zero copy write
* @inlen: read buffer size
* @olen: write buffer size
* @hdrlen: reader header size, This is the size of response protocol data
*
*/
static int
p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
char *uidata, char *uodata, int inlen,
int outlen, int in_hdr_len, int kern_buf)
{
int in, out, err;
unsigned long flags;
int in_nr_pages = 0, out_nr_pages = 0;
struct page **in_pages = NULL, **out_pages = NULL;
struct virtio_chan *chan = client->trans;
P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
if (uodata) {
out_nr_pages = p9_nr_pages(uodata, outlen);
out_pages = kmalloc(sizeof(struct page *) * out_nr_pages,
GFP_NOFS);
if (!out_pages) {
err = -ENOMEM;
goto err_out;
}
out_nr_pages = p9_get_mapped_pages(chan, out_pages, uodata,
out_nr_pages, 0, kern_buf);
if (out_nr_pages < 0) {
err = out_nr_pages;
kfree(out_pages);
out_pages = NULL;
goto err_out;
}
}
if (uidata) {
in_nr_pages = p9_nr_pages(uidata, inlen);
in_pages = kmalloc(sizeof(struct page *) * in_nr_pages,
GFP_NOFS);
if (!in_pages) {
err = -ENOMEM;
goto err_out;
}
in_nr_pages = p9_get_mapped_pages(chan, in_pages, uidata,
in_nr_pages, 1, kern_buf);
if (in_nr_pages < 0) {
err = in_nr_pages;
kfree(in_pages);
in_pages = NULL;
goto err_out;
}
}
req->status = REQ_STATUS_SENT;
req_retry_pinned:
spin_lock_irqsave(&chan->lock, flags);
/* out data */
out = pack_sg_list(chan->sg, 0,
VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
if (out_pages)
out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
out_pages, out_nr_pages, uodata, outlen);
/*
* Take care of in data
* For example TREAD have 11.
* 11 is the read/write header = PDU Header(7) + IO Size (4).
* Arrange in such a way that server places header in the
* alloced memory and payload onto the user buffer.
*/
in = pack_sg_list(chan->sg, out,
VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
if (in_pages)
in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
in_pages, in_nr_pages, uidata, inlen);
err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
if (err < 0) {
if (err == -ENOSPC) {
chan->ring_bufs_avail = 0;
spin_unlock_irqrestore(&chan->lock, flags);
err = wait_event_interruptible(*chan->vc_wq,
chan->ring_bufs_avail);
if (err == -ERESTARTSYS)
goto err_out;
P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n");
goto req_retry_pinned;
} else {
spin_unlock_irqrestore(&chan->lock, flags);
P9_DPRINTK(P9_DEBUG_TRANS,
"9p debug: "
"virtio rpc add_buf returned failure");
err = -EIO;
goto err_out;
}
}
virtqueue_kick(chan->vq);
spin_unlock_irqrestore(&chan->lock, flags);
P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
err = wait_event_interruptible(*req->wq,
req->status >= REQ_STATUS_RCVD);
/*
* Non kernel buffers are pinned, unpin them
*/
err_out:
if (!kern_buf) {
if (in_pages) {
p9_release_pages(in_pages, in_nr_pages);
atomic_sub(in_nr_pages, &vp_pinned);
}
if (out_pages) {
p9_release_pages(out_pages, out_nr_pages);
atomic_sub(out_nr_pages, &vp_pinned);
}
/* wakeup anybody waiting for slots to pin pages */
wake_up(&vp_wq);
}
kfree(in_pages);
kfree(out_pages);
return err;
}
static ssize_t p9_mount_tag_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -591,8 +651,8 @@ static struct p9_trans_module p9_virtio_trans = {
.create = p9_virtio_create,
.close = p9_virtio_close,
.request = p9_virtio_request,
.zc_request = p9_virtio_zc_request,
.cancel = p9_virtio_cancel,
/*
* We leave one entry for input and one entry for response
* headers. We also skip one more entry to accomodate, address
@@ -600,7 +660,6 @@ static struct p9_trans_module p9_virtio_trans = {
* page in zero copy.
*/
.maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3),
.pref = P9_TRANS_PREF_PAYLOAD_SEP,
.def = 0,
.owner = THIS_MODULE,
};