virtiofs: schedule blocking async replies in separate worker
In virtiofs (unlike in regular fuse) processing of async replies is
serialized. This can result in a deadlock in rare corner cases when
there's a circular dependency between the completion of two or more async
replies.
Such a deadlock can be reproduced with xfstests:generic/503 if TEST_DIR ==
SCRATCH_MNT (which is a misconfiguration):
- Process A is waiting for page lock in worker thread context and blocked
(virtio_fs_requests_done_work()).
- Process B is holding page lock and waiting for pending writes to
finish (fuse_wait_on_page_writeback()).
- Write requests are waiting in virtqueue and can't complete because
worker thread is blocked on page lock (process A).
Fix this by creating a unique work_struct for each async reply that can
block (O_DIRECT read).
Fixes: a62a8ef9d9
("virtio-fs: add virtiofs filesystem")
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
This commit is contained in:

committed by
Miklos Szeredi

parent
ae83d0b416
commit
bb737bbe48
@@ -712,6 +712,7 @@ static ssize_t fuse_async_req_send(struct fuse_conn *fc,
|
|||||||
spin_unlock(&io->lock);
|
spin_unlock(&io->lock);
|
||||||
|
|
||||||
ia->ap.args.end = fuse_aio_complete_req;
|
ia->ap.args.end = fuse_aio_complete_req;
|
||||||
|
ia->ap.args.may_block = io->should_dirty;
|
||||||
err = fuse_simple_background(fc, &ia->ap.args, GFP_KERNEL);
|
err = fuse_simple_background(fc, &ia->ap.args, GFP_KERNEL);
|
||||||
if (err)
|
if (err)
|
||||||
fuse_aio_complete_req(fc, &ia->ap.args, err);
|
fuse_aio_complete_req(fc, &ia->ap.args, err);
|
||||||
|
@@ -249,6 +249,7 @@ struct fuse_args {
|
|||||||
bool out_argvar:1;
|
bool out_argvar:1;
|
||||||
bool page_zeroing:1;
|
bool page_zeroing:1;
|
||||||
bool page_replace:1;
|
bool page_replace:1;
|
||||||
|
bool may_block:1;
|
||||||
struct fuse_in_arg in_args[3];
|
struct fuse_in_arg in_args[3];
|
||||||
struct fuse_arg out_args[2];
|
struct fuse_arg out_args[2];
|
||||||
void (*end)(struct fuse_conn *fc, struct fuse_args *args, int error);
|
void (*end)(struct fuse_conn *fc, struct fuse_args *args, int error);
|
||||||
|
@@ -60,6 +60,12 @@ struct virtio_fs_forget {
|
|||||||
struct virtio_fs_forget_req req;
|
struct virtio_fs_forget_req req;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct virtio_fs_req_work {
|
||||||
|
struct fuse_req *req;
|
||||||
|
struct virtio_fs_vq *fsvq;
|
||||||
|
struct work_struct done_work;
|
||||||
|
};
|
||||||
|
|
||||||
static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
|
static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
|
||||||
struct fuse_req *req, bool in_flight);
|
struct fuse_req *req, bool in_flight);
|
||||||
|
|
||||||
@@ -485,36 +491,16 @@ static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Work function for request completion */
|
/* Work function for request completion */
|
||||||
static void virtio_fs_requests_done_work(struct work_struct *work)
|
static void virtio_fs_request_complete(struct fuse_req *req,
|
||||||
|
struct virtio_fs_vq *fsvq)
|
||||||
{
|
{
|
||||||
struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
|
|
||||||
done_work);
|
|
||||||
struct fuse_pqueue *fpq = &fsvq->fud->pq;
|
struct fuse_pqueue *fpq = &fsvq->fud->pq;
|
||||||
struct fuse_conn *fc = fsvq->fud->fc;
|
struct fuse_conn *fc = fsvq->fud->fc;
|
||||||
struct virtqueue *vq = fsvq->vq;
|
|
||||||
struct fuse_req *req;
|
|
||||||
struct fuse_args_pages *ap;
|
|
||||||
struct fuse_req *next;
|
|
||||||
struct fuse_args *args;
|
struct fuse_args *args;
|
||||||
|
struct fuse_args_pages *ap;
|
||||||
unsigned int len, i, thislen;
|
unsigned int len, i, thislen;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
LIST_HEAD(reqs);
|
|
||||||
|
|
||||||
/* Collect completed requests off the virtqueue */
|
|
||||||
spin_lock(&fsvq->lock);
|
|
||||||
do {
|
|
||||||
virtqueue_disable_cb(vq);
|
|
||||||
|
|
||||||
while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
|
|
||||||
spin_lock(&fpq->lock);
|
|
||||||
list_move_tail(&req->list, &reqs);
|
|
||||||
spin_unlock(&fpq->lock);
|
|
||||||
}
|
|
||||||
} while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
|
|
||||||
spin_unlock(&fsvq->lock);
|
|
||||||
|
|
||||||
/* End requests */
|
|
||||||
list_for_each_entry_safe(req, next, &reqs, list) {
|
|
||||||
/*
|
/*
|
||||||
* TODO verify that server properly follows FUSE protocol
|
* TODO verify that server properly follows FUSE protocol
|
||||||
* (oh.uniq, oh.len)
|
* (oh.uniq, oh.len)
|
||||||
@@ -540,13 +526,63 @@ static void virtio_fs_requests_done_work(struct work_struct *work)
|
|||||||
|
|
||||||
spin_lock(&fpq->lock);
|
spin_lock(&fpq->lock);
|
||||||
clear_bit(FR_SENT, &req->flags);
|
clear_bit(FR_SENT, &req->flags);
|
||||||
list_del_init(&req->list);
|
|
||||||
spin_unlock(&fpq->lock);
|
spin_unlock(&fpq->lock);
|
||||||
|
|
||||||
fuse_request_end(fc, req);
|
fuse_request_end(fc, req);
|
||||||
spin_lock(&fsvq->lock);
|
spin_lock(&fsvq->lock);
|
||||||
dec_in_flight_req(fsvq);
|
dec_in_flight_req(fsvq);
|
||||||
spin_unlock(&fsvq->lock);
|
spin_unlock(&fsvq->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void virtio_fs_complete_req_work(struct work_struct *work)
|
||||||
|
{
|
||||||
|
struct virtio_fs_req_work *w =
|
||||||
|
container_of(work, typeof(*w), done_work);
|
||||||
|
|
||||||
|
virtio_fs_request_complete(w->req, w->fsvq);
|
||||||
|
kfree(w);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void virtio_fs_requests_done_work(struct work_struct *work)
|
||||||
|
{
|
||||||
|
struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
|
||||||
|
done_work);
|
||||||
|
struct fuse_pqueue *fpq = &fsvq->fud->pq;
|
||||||
|
struct virtqueue *vq = fsvq->vq;
|
||||||
|
struct fuse_req *req;
|
||||||
|
struct fuse_req *next;
|
||||||
|
unsigned int len;
|
||||||
|
LIST_HEAD(reqs);
|
||||||
|
|
||||||
|
/* Collect completed requests off the virtqueue */
|
||||||
|
spin_lock(&fsvq->lock);
|
||||||
|
do {
|
||||||
|
virtqueue_disable_cb(vq);
|
||||||
|
|
||||||
|
while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
|
||||||
|
spin_lock(&fpq->lock);
|
||||||
|
list_move_tail(&req->list, &reqs);
|
||||||
|
spin_unlock(&fpq->lock);
|
||||||
|
}
|
||||||
|
} while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
|
||||||
|
spin_unlock(&fsvq->lock);
|
||||||
|
|
||||||
|
/* End requests */
|
||||||
|
list_for_each_entry_safe(req, next, &reqs, list) {
|
||||||
|
list_del_init(&req->list);
|
||||||
|
|
||||||
|
/* blocking async request completes in a worker context */
|
||||||
|
if (req->args->may_block) {
|
||||||
|
struct virtio_fs_req_work *w;
|
||||||
|
|
||||||
|
w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL);
|
||||||
|
INIT_WORK(&w->done_work, virtio_fs_complete_req_work);
|
||||||
|
w->fsvq = fsvq;
|
||||||
|
w->req = req;
|
||||||
|
schedule_work(&w->done_work);
|
||||||
|
} else {
|
||||||
|
virtio_fs_request_complete(req, fsvq);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user