Merge branch 'for-linus-1' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs update from Al Viro:
"Part one:
- struct filename-related cleanups
- saner iov_iter_init() replacements (and switching the syscalls to
use of those)
- ntfs switch to ->write_iter() (Anton)
- aio cleanups and splitting iocb into common and async parts
(Christoph)
- assorted fixes (me, bfields, Andrew Elble)
There's a lot more, including the completion of switchover to
->{read,write}_iter(), d_inode/d_backing_inode annotations, f_flags
race fixes, etc, but that goes after #for-davem merge. David has
pulled it, and once it's in I'll send the next vfs pull request"
* 'for-linus-1' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (35 commits)
sg_start_req(): use import_iovec()
sg_start_req(): make sure that there's not too many elements in iovec
blk_rq_map_user(): use import_single_range()
sg_io(): use import_iovec()
process_vm_access: switch to {compat_,}import_iovec()
switch keyctl_instantiate_key_common() to iov_iter
switch {compat_,}do_readv_writev() to {compat_,}import_iovec()
aio_setup_vectored_rw(): switch to {compat_,}import_iovec()
vmsplice_to_user(): switch to import_iovec()
kill aio_setup_single_vector()
aio: simplify arguments of aio_setup_..._rw()
aio: lift iov_iter_init() into aio_setup_..._rw()
lift iov_iter into {compat_,}do_readv_writev()
NFS: fix BUG() crash in notify_change() with patch to chown_common()
dcache: return -ESTALE not -EBUSY on distributed fs race
NTFS: Version 2.1.32 - Update file write from aio_write to write_iter.
VFS: Add iov_iter_fault_in_multipages_readable()
drop bogus check in file_open_root()
switch security_inode_getattr() to struct path *
constify tomoyo_realpath_from_path()
...
This commit is contained in:
192
fs/aio.c
192
fs/aio.c
@@ -151,6 +151,38 @@ struct kioctx {
|
||||
unsigned id;
|
||||
};
|
||||
|
||||
/*
|
||||
* We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
|
||||
* cancelled or completed (this makes a certain amount of sense because
|
||||
* successful cancellation - io_cancel() - does deliver the completion to
|
||||
* userspace).
|
||||
*
|
||||
* And since most things don't implement kiocb cancellation and we'd really like
|
||||
* kiocb completion to be lockless when possible, we use ki_cancel to
|
||||
* synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
|
||||
* with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
|
||||
*/
|
||||
#define KIOCB_CANCELLED ((void *) (~0ULL))
|
||||
|
||||
struct aio_kiocb {
|
||||
struct kiocb common;
|
||||
|
||||
struct kioctx *ki_ctx;
|
||||
kiocb_cancel_fn *ki_cancel;
|
||||
|
||||
struct iocb __user *ki_user_iocb; /* user's aiocb */
|
||||
__u64 ki_user_data; /* user's data for completion */
|
||||
|
||||
struct list_head ki_list; /* the aio core uses this
|
||||
* for cancellation */
|
||||
|
||||
/*
|
||||
* If the aio_resfd field of the userspace iocb is not zero,
|
||||
* this is the underlying eventfd context to deliver events to.
|
||||
*/
|
||||
struct eventfd_ctx *ki_eventfd;
|
||||
};
|
||||
|
||||
/*------ sysctl variables----*/
|
||||
static DEFINE_SPINLOCK(aio_nr_lock);
|
||||
unsigned long aio_nr; /* current system wide number of aio requests */
|
||||
@@ -220,7 +252,7 @@ static int __init aio_setup(void)
|
||||
if (IS_ERR(aio_mnt))
|
||||
panic("Failed to create aio fs mount.");
|
||||
|
||||
kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
|
||||
kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
|
||||
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
|
||||
|
||||
pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
|
||||
@@ -484,8 +516,9 @@ static int aio_setup_ring(struct kioctx *ctx)
|
||||
#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
|
||||
#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
|
||||
|
||||
void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
|
||||
void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
|
||||
{
|
||||
struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
|
||||
struct kioctx *ctx = req->ki_ctx;
|
||||
unsigned long flags;
|
||||
|
||||
@@ -500,7 +533,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
|
||||
}
|
||||
EXPORT_SYMBOL(kiocb_set_cancel_fn);
|
||||
|
||||
static int kiocb_cancel(struct kiocb *kiocb)
|
||||
static int kiocb_cancel(struct aio_kiocb *kiocb)
|
||||
{
|
||||
kiocb_cancel_fn *old, *cancel;
|
||||
|
||||
@@ -518,7 +551,7 @@ static int kiocb_cancel(struct kiocb *kiocb)
|
||||
cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
|
||||
} while (cancel != old);
|
||||
|
||||
return cancel(kiocb);
|
||||
return cancel(&kiocb->common);
|
||||
}
|
||||
|
||||
static void free_ioctx(struct work_struct *work)
|
||||
@@ -554,13 +587,13 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
|
||||
static void free_ioctx_users(struct percpu_ref *ref)
|
||||
{
|
||||
struct kioctx *ctx = container_of(ref, struct kioctx, users);
|
||||
struct kiocb *req;
|
||||
struct aio_kiocb *req;
|
||||
|
||||
spin_lock_irq(&ctx->ctx_lock);
|
||||
|
||||
while (!list_empty(&ctx->active_reqs)) {
|
||||
req = list_first_entry(&ctx->active_reqs,
|
||||
struct kiocb, ki_list);
|
||||
struct aio_kiocb, ki_list);
|
||||
|
||||
list_del_init(&req->ki_list);
|
||||
kiocb_cancel(req);
|
||||
@@ -786,22 +819,6 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* wait_on_sync_kiocb:
|
||||
* Waits on the given sync kiocb to complete.
|
||||
*/
|
||||
ssize_t wait_on_sync_kiocb(struct kiocb *req)
|
||||
{
|
||||
while (!req->ki_ctx) {
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
if (req->ki_ctx)
|
||||
break;
|
||||
io_schedule();
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
return req->ki_user_data;
|
||||
}
|
||||
EXPORT_SYMBOL(wait_on_sync_kiocb);
|
||||
|
||||
/*
|
||||
* exit_aio: called when the last user of mm goes away. At this point, there is
|
||||
* no way for any new requests to be submited or any of the io_* syscalls to be
|
||||
@@ -956,9 +973,9 @@ static void user_refill_reqs_available(struct kioctx *ctx)
|
||||
* Allocate a slot for an aio request.
|
||||
* Returns NULL if no requests are free.
|
||||
*/
|
||||
static inline struct kiocb *aio_get_req(struct kioctx *ctx)
|
||||
static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
|
||||
{
|
||||
struct kiocb *req;
|
||||
struct aio_kiocb *req;
|
||||
|
||||
if (!get_reqs_available(ctx)) {
|
||||
user_refill_reqs_available(ctx);
|
||||
@@ -979,10 +996,10 @@ out_put:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void kiocb_free(struct kiocb *req)
|
||||
static void kiocb_free(struct aio_kiocb *req)
|
||||
{
|
||||
if (req->ki_filp)
|
||||
fput(req->ki_filp);
|
||||
if (req->common.ki_filp)
|
||||
fput(req->common.ki_filp);
|
||||
if (req->ki_eventfd != NULL)
|
||||
eventfd_ctx_put(req->ki_eventfd);
|
||||
kmem_cache_free(kiocb_cachep, req);
|
||||
@@ -1018,8 +1035,9 @@ out:
|
||||
/* aio_complete
|
||||
* Called when the io request on the given iocb is complete.
|
||||
*/
|
||||
void aio_complete(struct kiocb *iocb, long res, long res2)
|
||||
static void aio_complete(struct kiocb *kiocb, long res, long res2)
|
||||
{
|
||||
struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
|
||||
struct kioctx *ctx = iocb->ki_ctx;
|
||||
struct aio_ring *ring;
|
||||
struct io_event *ev_page, *event;
|
||||
@@ -1033,13 +1051,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
|
||||
* ref, no other paths have a way to get another ref
|
||||
* - the sync task helpfully left a reference to itself in the iocb
|
||||
*/
|
||||
if (is_sync_kiocb(iocb)) {
|
||||
iocb->ki_user_data = res;
|
||||
smp_wmb();
|
||||
iocb->ki_ctx = ERR_PTR(-EXDEV);
|
||||
wake_up_process(iocb->ki_obj.tsk);
|
||||
return;
|
||||
}
|
||||
BUG_ON(is_sync_kiocb(kiocb));
|
||||
|
||||
if (iocb->ki_list.next) {
|
||||
unsigned long flags;
|
||||
@@ -1065,7 +1077,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
|
||||
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
|
||||
event = ev_page + pos % AIO_EVENTS_PER_PAGE;
|
||||
|
||||
event->obj = (u64)(unsigned long)iocb->ki_obj.user;
|
||||
event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
|
||||
event->data = iocb->ki_user_data;
|
||||
event->res = res;
|
||||
event->res2 = res2;
|
||||
@@ -1074,7 +1086,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
|
||||
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
|
||||
|
||||
pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
|
||||
ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data,
|
||||
ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
|
||||
res, res2);
|
||||
|
||||
/* after flagging the request as done, we
|
||||
@@ -1121,7 +1133,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
|
||||
|
||||
percpu_ref_put(&ctx->reqs);
|
||||
}
|
||||
EXPORT_SYMBOL(aio_complete);
|
||||
|
||||
/* aio_read_events_ring
|
||||
* Pull an event off of the ioctx's event ring. Returns the number of
|
||||
@@ -1349,46 +1360,19 @@ typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
|
||||
unsigned long, loff_t);
|
||||
typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
|
||||
|
||||
static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
|
||||
int rw, char __user *buf,
|
||||
unsigned long *nr_segs,
|
||||
struct iovec **iovec,
|
||||
bool compat)
|
||||
static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len,
|
||||
struct iovec **iovec,
|
||||
bool compat,
|
||||
struct iov_iter *iter)
|
||||
{
|
||||
ssize_t ret;
|
||||
|
||||
*nr_segs = kiocb->ki_nbytes;
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (compat)
|
||||
ret = compat_rw_copy_check_uvector(rw,
|
||||
return compat_import_iovec(rw,
|
||||
(struct compat_iovec __user *)buf,
|
||||
*nr_segs, UIO_FASTIOV, *iovec, iovec);
|
||||
else
|
||||
len, UIO_FASTIOV, iovec, iter);
|
||||
#endif
|
||||
ret = rw_copy_check_uvector(rw,
|
||||
(struct iovec __user *)buf,
|
||||
*nr_segs, UIO_FASTIOV, *iovec, iovec);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/* ki_nbytes now reflect bytes instead of segs */
|
||||
kiocb->ki_nbytes = ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
|
||||
int rw, char __user *buf,
|
||||
unsigned long *nr_segs,
|
||||
struct iovec *iovec)
|
||||
{
|
||||
if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes)))
|
||||
return -EFAULT;
|
||||
|
||||
iovec->iov_base = buf;
|
||||
iovec->iov_len = kiocb->ki_nbytes;
|
||||
*nr_segs = 1;
|
||||
return 0;
|
||||
return import_iovec(rw, (struct iovec __user *)buf,
|
||||
len, UIO_FASTIOV, iovec, iter);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1396,11 +1380,10 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
|
||||
* Performs the initial checks and io submission.
|
||||
*/
|
||||
static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
|
||||
char __user *buf, bool compat)
|
||||
char __user *buf, size_t len, bool compat)
|
||||
{
|
||||
struct file *file = req->ki_filp;
|
||||
ssize_t ret;
|
||||
unsigned long nr_segs;
|
||||
int rw;
|
||||
fmode_t mode;
|
||||
aio_rw_op *rw_op;
|
||||
@@ -1431,21 +1414,22 @@ rw_common:
|
||||
if (!rw_op && !iter_op)
|
||||
return -EINVAL;
|
||||
|
||||
ret = (opcode == IOCB_CMD_PREADV ||
|
||||
opcode == IOCB_CMD_PWRITEV)
|
||||
? aio_setup_vectored_rw(req, rw, buf, &nr_segs,
|
||||
&iovec, compat)
|
||||
: aio_setup_single_vector(req, rw, buf, &nr_segs,
|
||||
iovec);
|
||||
if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV)
|
||||
ret = aio_setup_vectored_rw(rw, buf, len,
|
||||
&iovec, compat, &iter);
|
||||
else {
|
||||
ret = import_single_range(rw, buf, len, iovec, &iter);
|
||||
iovec = NULL;
|
||||
}
|
||||
if (!ret)
|
||||
ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
|
||||
ret = rw_verify_area(rw, file, &req->ki_pos,
|
||||
iov_iter_count(&iter));
|
||||
if (ret < 0) {
|
||||
if (iovec != inline_vecs)
|
||||
kfree(iovec);
|
||||
kfree(iovec);
|
||||
return ret;
|
||||
}
|
||||
|
||||
req->ki_nbytes = ret;
|
||||
len = ret;
|
||||
|
||||
/* XXX: move/kill - rw_verify_area()? */
|
||||
/* This matches the pread()/pwrite() logic */
|
||||
@@ -1458,14 +1442,14 @@ rw_common:
|
||||
file_start_write(file);
|
||||
|
||||
if (iter_op) {
|
||||
iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
|
||||
ret = iter_op(req, &iter);
|
||||
} else {
|
||||
ret = rw_op(req, iovec, nr_segs, req->ki_pos);
|
||||
ret = rw_op(req, iter.iov, iter.nr_segs, req->ki_pos);
|
||||
}
|
||||
|
||||
if (rw == WRITE)
|
||||
file_end_write(file);
|
||||
kfree(iovec);
|
||||
break;
|
||||
|
||||
case IOCB_CMD_FDSYNC:
|
||||
@@ -1487,9 +1471,6 @@ rw_common:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (iovec != inline_vecs)
|
||||
kfree(iovec);
|
||||
|
||||
if (ret != -EIOCBQUEUED) {
|
||||
/*
|
||||
* There's no easy way to restart the syscall since other AIO's
|
||||
@@ -1508,7 +1489,7 @@ rw_common:
|
||||
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
|
||||
struct iocb *iocb, bool compat)
|
||||
{
|
||||
struct kiocb *req;
|
||||
struct aio_kiocb *req;
|
||||
ssize_t ret;
|
||||
|
||||
/* enforce forwards compatibility on users */
|
||||
@@ -1531,11 +1512,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
|
||||
if (unlikely(!req))
|
||||
return -EAGAIN;
|
||||
|
||||
req->ki_filp = fget(iocb->aio_fildes);
|
||||
if (unlikely(!req->ki_filp)) {
|
||||
req->common.ki_filp = fget(iocb->aio_fildes);
|
||||
if (unlikely(!req->common.ki_filp)) {
|
||||
ret = -EBADF;
|
||||
goto out_put_req;
|
||||
}
|
||||
req->common.ki_pos = iocb->aio_offset;
|
||||
req->common.ki_complete = aio_complete;
|
||||
req->common.ki_flags = 0;
|
||||
|
||||
if (iocb->aio_flags & IOCB_FLAG_RESFD) {
|
||||
/*
|
||||
@@ -1550,6 +1534,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
|
||||
req->ki_eventfd = NULL;
|
||||
goto out_put_req;
|
||||
}
|
||||
|
||||
req->common.ki_flags |= IOCB_EVENTFD;
|
||||
}
|
||||
|
||||
ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
|
||||
@@ -1558,13 +1544,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
|
||||
goto out_put_req;
|
||||
}
|
||||
|
||||
req->ki_obj.user = user_iocb;
|
||||
req->ki_user_iocb = user_iocb;
|
||||
req->ki_user_data = iocb->aio_data;
|
||||
req->ki_pos = iocb->aio_offset;
|
||||
req->ki_nbytes = iocb->aio_nbytes;
|
||||
|
||||
ret = aio_run_iocb(req, iocb->aio_lio_opcode,
|
||||
ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode,
|
||||
(char __user *)(unsigned long)iocb->aio_buf,
|
||||
iocb->aio_nbytes,
|
||||
compat);
|
||||
if (ret)
|
||||
goto out_put_req;
|
||||
@@ -1651,10 +1636,10 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
|
||||
/* lookup_kiocb
|
||||
* Finds a given iocb for cancellation.
|
||||
*/
|
||||
static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
|
||||
u32 key)
|
||||
static struct aio_kiocb *
|
||||
lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
|
||||
{
|
||||
struct list_head *pos;
|
||||
struct aio_kiocb *kiocb;
|
||||
|
||||
assert_spin_locked(&ctx->ctx_lock);
|
||||
|
||||
@@ -1662,9 +1647,8 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
|
||||
return NULL;
|
||||
|
||||
/* TODO: use a hash or array, this sucks. */
|
||||
list_for_each(pos, &ctx->active_reqs) {
|
||||
struct kiocb *kiocb = list_kiocb(pos);
|
||||
if (kiocb->ki_obj.user == iocb)
|
||||
list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
|
||||
if (kiocb->ki_user_iocb == iocb)
|
||||
return kiocb;
|
||||
}
|
||||
return NULL;
|
||||
@@ -1684,7 +1668,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
|
||||
struct io_event __user *, result)
|
||||
{
|
||||
struct kioctx *ctx;
|
||||
struct kiocb *kiocb;
|
||||
struct aio_kiocb *kiocb;
|
||||
u32 key;
|
||||
int ret;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user