Merge tag 'fuse-update-5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse

Pull fuse updates from Miklos Szeredi:

 - Fix a rare deadlock in virtiofs

 - Fix st_blocks in writeback cache mode

 - Fix wrong checks in splice move causing spurious warnings

 - Fix a race between a GETATTR request and a FUSE_NOTIFY_INVAL_INODE
   notification

 - Use rb-tree instead of linear search for pages currently under
   writeout by userspace

 - Fix copy_file_range() inconsistencies

* tag 'fuse-update-5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse:
  fuse: copy_file_range should truncate cache
  fuse: fix copy_file_range cache issues
  fuse: optimize writepages search
  fuse: update attr_version counter on fuse_notify_inval_inode()
  fuse: don't check refcount after stealing page
  fuse: fix weird page warning
  fuse: use dump_page
  virtiofs: do not use fuse_fill_super_common() for device installation
  fuse: always allow query of st_dev
  fuse: always flush dirty data on close(2)
  fuse: invalidate inode attr in writeback cache mode
  fuse: Update stale comment in queue_interrupt()
  fuse: BUG_ON correction in fuse_dev_splice_write()
  virtiofs: Add mount option and atime behavior to the doc
  virtiofs: schedule blocking async replies in separate worker
This commit is contained in:
Linus Torvalds
2020-06-09 15:48:24 -07:00
7 changed files with 219 additions and 85 deletions

View File

@@ -342,7 +342,7 @@ static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
list_add_tail(&req->intr_entry, &fiq->interrupts);
/*
* Pairs with smp_mb() implied by test_and_set_bit()
* from request_end().
* from fuse_request_end().
*/
smp_mb();
if (test_bit(FR_FINISHED, &req->flags)) {
@@ -764,16 +764,15 @@ static int fuse_check_page(struct page *page)
{
if (page_mapcount(page) ||
page->mapping != NULL ||
page_count(page) != 1 ||
(page->flags & PAGE_FLAGS_CHECK_AT_PREP &
~(1 << PG_locked |
1 << PG_referenced |
1 << PG_uptodate |
1 << PG_lru |
1 << PG_active |
1 << PG_reclaim))) {
pr_warn("trying to steal weird page\n");
pr_warn(" page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
1 << PG_reclaim |
1 << PG_waiters))) {
dump_page(page, "fuse: trying to steal weird page");
return 1;
}
return 0;
@@ -1977,8 +1976,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
struct pipe_buffer *ibuf;
struct pipe_buffer *obuf;
BUG_ON(nbuf >= pipe->ring_size);
BUG_ON(tail == head);
if (WARN_ON(nbuf >= count || tail == head))
goto out_free;
ibuf = &pipe->bufs[tail & mask];
obuf = &bufs[nbuf];

View File

@@ -1689,8 +1689,18 @@ static int fuse_getattr(const struct path *path, struct kstat *stat,
struct inode *inode = d_inode(path->dentry);
struct fuse_conn *fc = get_fuse_conn(inode);
if (!fuse_allow_current_process(fc))
if (!fuse_allow_current_process(fc)) {
if (!request_mask) {
/*
* If user explicitly requested *nothing* then don't
* error out, but return st_dev only.
*/
stat->result_mask = 0;
stat->dev = inode->i_sb->s_dev;
return 0;
}
return -EACCES;
}
return fuse_update_get_attr(inode, NULL, stat, request_mask, flags);
}

View File

@@ -357,7 +357,7 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
struct fuse_writepage_args {
struct fuse_io_args ia;
struct list_head writepages_entry;
struct rb_node writepages_entry;
struct list_head queue_entry;
struct fuse_writepage_args *next;
struct inode *inode;
@@ -366,17 +366,23 @@ struct fuse_writepage_args {
static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
pgoff_t idx_from, pgoff_t idx_to)
{
struct fuse_writepage_args *wpa;
struct rb_node *n;
list_for_each_entry(wpa, &fi->writepages, writepages_entry) {
n = fi->writepages.rb_node;
while (n) {
struct fuse_writepage_args *wpa;
pgoff_t curr_index;
wpa = rb_entry(n, struct fuse_writepage_args, writepages_entry);
WARN_ON(get_fuse_inode(wpa->inode) != fi);
curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT;
if (idx_from < curr_index + wpa->ia.ap.num_pages &&
curr_index <= idx_to) {
if (idx_from >= curr_index + wpa->ia.ap.num_pages)
n = n->rb_right;
else if (idx_to < curr_index)
n = n->rb_left;
else
return wpa;
}
}
return NULL;
}
@@ -445,9 +451,6 @@ static int fuse_flush(struct file *file, fl_owner_t id)
if (is_bad_inode(inode))
return -EIO;
if (fc->no_flush)
return 0;
err = write_inode_now(inode, 1);
if (err)
return err;
@@ -460,6 +463,10 @@ static int fuse_flush(struct file *file, fl_owner_t id)
if (err)
return err;
err = 0;
if (fc->no_flush)
goto inval_attr_out;
memset(&inarg, 0, sizeof(inarg));
inarg.fh = ff->fh;
inarg.lock_owner = fuse_lock_owner_id(fc, id);
@@ -475,6 +482,14 @@ static int fuse_flush(struct file *file, fl_owner_t id)
fc->no_flush = 1;
err = 0;
}
inval_attr_out:
/*
* In memory i_blocks is not maintained by fuse, if writeback cache is
* enabled, i_blocks from cached attr may not be accurate.
*/
if (!err && fc->writeback_cache)
fuse_invalidate_attr(inode);
return err;
}
@@ -712,6 +727,7 @@ static ssize_t fuse_async_req_send(struct fuse_conn *fc,
spin_unlock(&io->lock);
ia->ap.args.end = fuse_aio_complete_req;
ia->ap.args.may_block = io->should_dirty;
err = fuse_simple_background(fc, &ia->ap.args, GFP_KERNEL);
if (err)
fuse_aio_complete_req(fc, &ia->ap.args, err);
@@ -1570,7 +1586,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc,
struct backing_dev_info *bdi = inode_to_bdi(inode);
int i;
list_del(&wpa->writepages_entry);
rb_erase(&wpa->writepages_entry, &fi->writepages);
for (i = 0; i < ap->num_pages; i++) {
dec_wb_stat(&bdi->wb, WB_WRITEBACK);
dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP);
@@ -1658,6 +1674,36 @@ __acquires(fi->lock)
}
}
static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa)
{
pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT;
pgoff_t idx_to = idx_from + wpa->ia.ap.num_pages - 1;
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
WARN_ON(!wpa->ia.ap.num_pages);
while (*p) {
struct fuse_writepage_args *curr;
pgoff_t curr_index;
parent = *p;
curr = rb_entry(parent, struct fuse_writepage_args,
writepages_entry);
WARN_ON(curr->inode != wpa->inode);
curr_index = curr->ia.write.in.offset >> PAGE_SHIFT;
if (idx_from >= curr_index + curr->ia.ap.num_pages)
p = &(*p)->rb_right;
else if (idx_to < curr_index)
p = &(*p)->rb_left;
else
return (void) WARN_ON(true);
}
rb_link_node(&wpa->writepages_entry, parent, p);
rb_insert_color(&wpa->writepages_entry, root);
}
static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
int error)
{
@@ -1676,7 +1722,7 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
wpa->next = next->next;
next->next = NULL;
next->ia.ff = fuse_file_get(wpa->ia.ff);
list_add(&next->writepages_entry, &fi->writepages);
tree_insert(&fi->writepages, next);
/*
* Skip fuse_flush_writepages() to make it easy to crop requests
@@ -1811,7 +1857,7 @@ static int fuse_writepage_locked(struct page *page)
inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
spin_lock(&fi->lock);
list_add(&wpa->writepages_entry, &fi->writepages);
tree_insert(&fi->writepages, wpa);
list_add_tail(&wpa->queue_entry, &fi->queued_writes);
fuse_flush_writepages(inode);
spin_unlock(&fi->lock);
@@ -1923,10 +1969,10 @@ static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa,
WARN_ON(new_ap->num_pages != 0);
spin_lock(&fi->lock);
list_del(&new_wpa->writepages_entry);
rb_erase(&new_wpa->writepages_entry, &fi->writepages);
old_wpa = fuse_find_writeback(fi, page->index, page->index);
if (!old_wpa) {
list_add(&new_wpa->writepages_entry, &fi->writepages);
tree_insert(&fi->writepages, new_wpa);
spin_unlock(&fi->lock);
return false;
}
@@ -2041,7 +2087,7 @@ static int fuse_writepages_fill(struct page *page,
wpa->inode = inode;
spin_lock(&fi->lock);
list_add(&wpa->writepages_entry, &fi->writepages);
tree_insert(&fi->writepages, wpa);
spin_unlock(&fi->lock);
data->wpa = wpa;
@@ -3235,13 +3281,11 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
return -EXDEV;
if (fc->writeback_cache) {
inode_lock(inode_in);
err = fuse_writeback_range(inode_in, pos_in, pos_in + len);
inode_unlock(inode_in);
if (err)
return err;
}
inode_lock(inode_in);
err = fuse_writeback_range(inode_in, pos_in, pos_in + len - 1);
inode_unlock(inode_in);
if (err)
return err;
inode_lock(inode_out);
@@ -3249,11 +3293,27 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
if (err)
goto out;
if (fc->writeback_cache) {
err = fuse_writeback_range(inode_out, pos_out, pos_out + len);
if (err)
goto out;
}
/*
* Write out dirty pages in the destination file before sending the COPY
* request to userspace. After the request is completed, truncate off
* pages (including partial ones) from the cache that have been copied,
* since these contain stale data at that point.
*
* This should be mostly correct, but if the COPY writes to partial
* pages (at the start or end) and the parts not covered by the COPY are
* written through a memory map after calling fuse_writeback_range(),
* then these partial page modifications will be lost on truncation.
*
* It is unlikely that someone would rely on such mixed style
* modifications. Yet this does give less guarantees than if the
* copying was performed with write(2).
*
* To fix this a i_mmap_sem style lock could be used to prevent new
* faults while the copy is ongoing.
*/
err = fuse_writeback_range(inode_out, pos_out, pos_out + len - 1);
if (err)
goto out;
if (is_unstable)
set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
@@ -3274,6 +3334,10 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
if (err)
goto out;
truncate_inode_pages_range(inode_out->i_mapping,
ALIGN_DOWN(pos_out, PAGE_SIZE),
ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1);
if (fc->writeback_cache) {
fuse_write_update_size(inode_out, pos_out + outarg.size);
file_update_time(file_out);
@@ -3351,5 +3415,5 @@ void fuse_init_file_inode(struct inode *inode)
INIT_LIST_HEAD(&fi->queued_writes);
fi->writectr = 0;
init_waitqueue_head(&fi->page_waitq);
INIT_LIST_HEAD(&fi->writepages);
fi->writepages = RB_ROOT;
}

View File

@@ -111,7 +111,7 @@ struct fuse_inode {
wait_queue_head_t page_waitq;
/* List of writepage requestst (pending or sent) */
struct list_head writepages;
struct rb_root writepages;
};
/* readdir cache (directory only) */
@@ -249,6 +249,7 @@ struct fuse_args {
bool out_argvar:1;
bool page_zeroing:1;
bool page_replace:1;
bool may_block:1;
struct fuse_in_arg in_args[3];
struct fuse_arg out_args[2];
void (*end)(struct fuse_conn *fc, struct fuse_args *args, int error);

View File

@@ -321,6 +321,8 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
loff_t offset, loff_t len)
{
struct fuse_conn *fc = get_fuse_conn_super(sb);
struct fuse_inode *fi;
struct inode *inode;
pgoff_t pg_start;
pgoff_t pg_end;
@@ -329,6 +331,11 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
if (!inode)
return -ENOENT;
fi = get_fuse_inode(inode);
spin_lock(&fi->lock);
fi->attr_version = atomic64_inc_return(&fc->attr_version);
spin_unlock(&fi->lock);
fuse_invalidate_attr(inode);
forget_all_cached_acls(inode);
if (offset >= 0) {
@@ -1113,7 +1120,7 @@ EXPORT_SYMBOL_GPL(fuse_dev_free);
int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
{
struct fuse_dev *fud;
struct fuse_dev *fud = NULL;
struct fuse_conn *fc = get_fuse_conn_super(sb);
struct inode *root;
struct dentry *root_dentry;
@@ -1155,9 +1162,12 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
if (sb->s_user_ns != &init_user_ns)
sb->s_xattr = fuse_no_acl_xattr_handlers;
fud = fuse_dev_alloc_install(fc);
if (!fud)
goto err;
if (ctx->fudptr) {
err = -ENOMEM;
fud = fuse_dev_alloc_install(fc);
if (!fud)
goto err;
}
fc->dev = sb->s_dev;
fc->sb = sb;
@@ -1191,7 +1201,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
mutex_lock(&fuse_mutex);
err = -EINVAL;
if (*ctx->fudptr)
if (ctx->fudptr && *ctx->fudptr)
goto err_unlock;
err = fuse_ctl_add_conn(fc);
@@ -1200,7 +1210,8 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
list_add_tail(&fc->entry, &fuse_conn_list);
sb->s_root = root_dentry;
*ctx->fudptr = fud;
if (ctx->fudptr)
*ctx->fudptr = fud;
mutex_unlock(&fuse_mutex);
return 0;
@@ -1208,7 +1219,8 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
mutex_unlock(&fuse_mutex);
dput(root_dentry);
err_dev_free:
fuse_dev_free(fud);
if (fud)
fuse_dev_free(fud);
err:
return err;
}

View File

@@ -60,6 +60,12 @@ struct virtio_fs_forget {
struct virtio_fs_forget_req req;
};
struct virtio_fs_req_work {
struct fuse_req *req;
struct virtio_fs_vq *fsvq;
struct work_struct done_work;
};
static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
struct fuse_req *req, bool in_flight);
@@ -485,19 +491,67 @@ static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
}
/* Work function for request completion */
static void virtio_fs_request_complete(struct fuse_req *req,
struct virtio_fs_vq *fsvq)
{
struct fuse_pqueue *fpq = &fsvq->fud->pq;
struct fuse_conn *fc = fsvq->fud->fc;
struct fuse_args *args;
struct fuse_args_pages *ap;
unsigned int len, i, thislen;
struct page *page;
/*
* TODO verify that server properly follows FUSE protocol
* (oh.uniq, oh.len)
*/
args = req->args;
copy_args_from_argbuf(args, req);
if (args->out_pages && args->page_zeroing) {
len = args->out_args[args->out_numargs - 1].size;
ap = container_of(args, typeof(*ap), args);
for (i = 0; i < ap->num_pages; i++) {
thislen = ap->descs[i].length;
if (len < thislen) {
WARN_ON(ap->descs[i].offset);
page = ap->pages[i];
zero_user_segment(page, len, thislen);
len = 0;
} else {
len -= thislen;
}
}
}
spin_lock(&fpq->lock);
clear_bit(FR_SENT, &req->flags);
spin_unlock(&fpq->lock);
fuse_request_end(fc, req);
spin_lock(&fsvq->lock);
dec_in_flight_req(fsvq);
spin_unlock(&fsvq->lock);
}
static void virtio_fs_complete_req_work(struct work_struct *work)
{
struct virtio_fs_req_work *w =
container_of(work, typeof(*w), done_work);
virtio_fs_request_complete(w->req, w->fsvq);
kfree(w);
}
static void virtio_fs_requests_done_work(struct work_struct *work)
{
struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
done_work);
struct fuse_pqueue *fpq = &fsvq->fud->pq;
struct fuse_conn *fc = fsvq->fud->fc;
struct virtqueue *vq = fsvq->vq;
struct fuse_req *req;
struct fuse_args_pages *ap;
struct fuse_req *next;
struct fuse_args *args;
unsigned int len, i, thislen;
struct page *page;
unsigned int len;
LIST_HEAD(reqs);
/* Collect completed requests off the virtqueue */
@@ -515,38 +569,20 @@ static void virtio_fs_requests_done_work(struct work_struct *work)
/* End requests */
list_for_each_entry_safe(req, next, &reqs, list) {
/*
* TODO verify that server properly follows FUSE protocol
* (oh.uniq, oh.len)
*/
args = req->args;
copy_args_from_argbuf(args, req);
if (args->out_pages && args->page_zeroing) {
len = args->out_args[args->out_numargs - 1].size;
ap = container_of(args, typeof(*ap), args);
for (i = 0; i < ap->num_pages; i++) {
thislen = ap->descs[i].length;
if (len < thislen) {
WARN_ON(ap->descs[i].offset);
page = ap->pages[i];
zero_user_segment(page, len, thislen);
len = 0;
} else {
len -= thislen;
}
}
}
spin_lock(&fpq->lock);
clear_bit(FR_SENT, &req->flags);
list_del_init(&req->list);
spin_unlock(&fpq->lock);
fuse_request_end(fc, req);
spin_lock(&fsvq->lock);
dec_in_flight_req(fsvq);
spin_unlock(&fsvq->lock);
/* blocking async request completes in a worker context */
if (req->args->may_block) {
struct virtio_fs_req_work *w;
w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL);
INIT_WORK(&w->done_work, virtio_fs_complete_req_work);
w->fsvq = fsvq;
w->req = req;
schedule_work(&w->done_work);
} else {
virtio_fs_request_complete(req, fsvq);
}
}
}
@@ -1067,7 +1103,7 @@ static int virtio_fs_fill_super(struct super_block *sb)
err = -ENOMEM;
/* Allocate fuse_dev for hiprio and notification queues */
for (i = 0; i < VQ_REQUEST; i++) {
for (i = 0; i < fs->nvqs; i++) {
struct virtio_fs_vq *fsvq = &fs->vqs[i];
fsvq->fud = fuse_dev_alloc();
@@ -1075,18 +1111,15 @@ static int virtio_fs_fill_super(struct super_block *sb)
goto err_free_fuse_devs;
}
ctx.fudptr = (void **)&fs->vqs[VQ_REQUEST].fud;
/* virtiofs allocates and installs its own fuse devices */
ctx.fudptr = NULL;
err = fuse_fill_super_common(sb, &ctx);
if (err < 0)
goto err_free_fuse_devs;
fc = fs->vqs[VQ_REQUEST].fud->fc;
for (i = 0; i < fs->nvqs; i++) {
struct virtio_fs_vq *fsvq = &fs->vqs[i];
if (i == VQ_REQUEST)
continue; /* already initialized */
fuse_dev_install(fsvq->fud, fc);
}