Merge branch 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull the big VFS changes from Al Viro: "This one is *big* and changes quite a few things around VFS. What's in there: - the first of two really major architecture changes - death to open intents. The former is finally there; it was very long in making, but with Miklos getting through really hard and messy final push in fs/namei.c, we finally have it. Unlike his variant, this one doesn't introduce struct opendata; what we have instead is ->atomic_open() taking preallocated struct file * and passing everything via its fields. Instead of returning struct file *, it returns -E... on error, 0 on success and 1 in "deal with it yourself" case (e.g. symlink found on server, etc.). See comments before fs/namei.c:atomic_open(). That made a lot of goodies finally possible and quite a few are in that pile: ->lookup(), ->d_revalidate() and ->create() do not get struct nameidata * anymore; ->lookup() and ->d_revalidate() get lookup flags instead, ->create() gets "do we want it exclusive" flag. With the introduction of new helper (kern_path_locked()) we are rid of all struct nameidata instances outside of fs/namei.c; it's still visible in namei.h, but not for long. Come the next cycle, declaration will move either to fs/internal.h or to fs/namei.c itself. [me, miklos, hch] - The second major change: behaviour of final fput(). Now we have __fput() done without any locks held by caller *and* not from deep in call stack. That obviously lifts a lot of constraints on the locking in there. Moreover, it's legal now to call fput() from atomic contexts (which has immediately simplified life for aio.c). We also don't need anti-recursion logics in __scm_destroy() anymore. There is a price, though - the damn thing has become partially asynchronous. For fput() from normal process we are guaranteed that pending __fput() will be done before the caller returns to userland, exits or gets stopped for ptrace. For kernel threads and atomic contexts it's done via schedule_work(), so theoretically we might need a way to make sure it's finished; so far only one such place had been found, but there might be more. There's flush_delayed_fput() (do all pending __fput()) and there's __fput_sync() (fput() analog doing __fput() immediately). I hope we won't need them often; see warnings in fs/file_table.c for details. [me, based on task_work series from Oleg merged last cycle] - sync series from Jan - large part of "death to sync_supers()" work from Artem; the only bits missing here are exofs and ext4 ones. As far as I understand, those are going via the exofs and ext4 trees resp.; once they are in, we can put ->write_super() to the rest, along with the thread calling it. - preparatory bits from unionmount series (from dhowells). - assorted cleanups and fixes all over the place, as usual. This is not the last pile for this cycle; there's at least jlayton's ESTALE work and fsfreeze series (the latter - in dire need of fixes, so I'm not sure it'll make the cut this cycle). I'll probably throw symlink/hardlink restrictions stuff from Kees into the next pile, too. Plus there's a lot of misc patches I hadn't thrown into that one - it's large enough as it is..." * 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (127 commits) ext4: switch EXT4_IOC_RESIZE_FS to mnt_want_write_file() btrfs: switch btrfs_ioctl_balance() to mnt_want_write_file() switch dentry_open() to struct path, make it grab references itself spufs: shift dget/mntget towards dentry_open() zoran: don't bother with struct file * in zoran_map ecryptfs: don't reinvent the wheels, please - use struct completion don't expose I_NEW inodes via dentry->d_inode tidy up namei.c a bit unobfuscate follow_up() a bit ext3: pass custom EOF to generic_file_llseek_size() ext4: use core vfs llseek code for dir seeks vfs: allow custom EOF in generic_file_llseek code vfs: Avoid unnecessary WB_SYNC_NONE writeback during sys_sync and reorder sync passes vfs: Remove unnecessary flushing of block devices vfs: Make sys_sync writeout also block device inodes vfs: Create function for iterating over block devices vfs: Reorder operations during sys_sync quota: Move quota syncing to ->sync_fs method quota: Split dquot_quota_sync() to writeback and cache flushing part vfs: Move noop_backing_dev_info check from sync into writeback ...
This commit is contained in:
@@ -595,7 +595,7 @@ void audit_trim_trees(void)
|
||||
|
||||
root_mnt = collect_mounts(&path);
|
||||
path_put(&path);
|
||||
if (!root_mnt)
|
||||
if (IS_ERR(root_mnt))
|
||||
goto skip_it;
|
||||
|
||||
spin_lock(&hash_lock);
|
||||
@@ -669,8 +669,8 @@ int audit_add_tree_rule(struct audit_krule *rule)
|
||||
goto Err;
|
||||
mnt = collect_mounts(&path);
|
||||
path_put(&path);
|
||||
if (!mnt) {
|
||||
err = -ENOMEM;
|
||||
if (IS_ERR(mnt)) {
|
||||
err = PTR_ERR(mnt);
|
||||
goto Err;
|
||||
}
|
||||
|
||||
@@ -719,8 +719,8 @@ int audit_tag_tree(char *old, char *new)
|
||||
return err;
|
||||
tagged = collect_mounts(&path2);
|
||||
path_put(&path2);
|
||||
if (!tagged)
|
||||
return -ENOMEM;
|
||||
if (IS_ERR(tagged))
|
||||
return PTR_ERR(tagged);
|
||||
|
||||
err = kern_path(old, 0, &path1);
|
||||
if (err) {
|
||||
|
@@ -355,34 +355,15 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
|
||||
/* Get path information necessary for adding watches. */
|
||||
static int audit_get_nd(struct audit_watch *watch, struct path *parent)
|
||||
{
|
||||
struct nameidata nd;
|
||||
struct dentry *d;
|
||||
int err;
|
||||
|
||||
err = kern_path_parent(watch->path, &nd);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (nd.last_type != LAST_NORM) {
|
||||
path_put(&nd.path);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
|
||||
d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
|
||||
if (IS_ERR(d)) {
|
||||
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
|
||||
path_put(&nd.path);
|
||||
struct dentry *d = kern_path_locked(watch->path, parent);
|
||||
if (IS_ERR(d))
|
||||
return PTR_ERR(d);
|
||||
}
|
||||
mutex_unlock(&parent->dentry->d_inode->i_mutex);
|
||||
if (d->d_inode) {
|
||||
/* update watch filter fields */
|
||||
watch->dev = d->d_inode->i_sb->s_dev;
|
||||
watch->ino = d->d_inode->i_ino;
|
||||
}
|
||||
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
|
||||
|
||||
*parent = nd.path;
|
||||
dput(d);
|
||||
return 0;
|
||||
}
|
||||
|
@@ -822,7 +822,7 @@ EXPORT_SYMBOL_GPL(cgroup_unlock);
|
||||
*/
|
||||
|
||||
static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
|
||||
static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *);
|
||||
static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int);
|
||||
static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
|
||||
static int cgroup_populate_dir(struct cgroup *cgrp);
|
||||
static const struct inode_operations cgroup_dir_inode_operations;
|
||||
@@ -1587,7 +1587,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
|
||||
opts.new_root = new_root;
|
||||
|
||||
/* Locate an existing or new sb for this hierarchy */
|
||||
sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts);
|
||||
sb = sget(fs_type, cgroup_test_super, cgroup_set_super, 0, &opts);
|
||||
if (IS_ERR(sb)) {
|
||||
ret = PTR_ERR(sb);
|
||||
cgroup_drop_root(opts.new_root);
|
||||
@@ -2570,7 +2570,7 @@ static const struct inode_operations cgroup_dir_inode_operations = {
|
||||
.rename = cgroup_rename,
|
||||
};
|
||||
|
||||
static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
|
||||
static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
|
||||
{
|
||||
if (dentry->d_name.len > NAME_MAX)
|
||||
return ERR_PTR(-ENAMETOOLONG);
|
||||
|
@@ -953,14 +953,11 @@ void do_exit(long code)
|
||||
exit_signals(tsk); /* sets PF_EXITING */
|
||||
/*
|
||||
* tsk->flags are checked in the futex code to protect against
|
||||
* an exiting task cleaning up the robust pi futexes, and in
|
||||
* task_work_add() to avoid the race with exit_task_work().
|
||||
* an exiting task cleaning up the robust pi futexes.
|
||||
*/
|
||||
smp_mb();
|
||||
raw_spin_unlock_wait(&tsk->pi_lock);
|
||||
|
||||
exit_task_work(tsk);
|
||||
|
||||
if (unlikely(in_atomic()))
|
||||
printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
|
||||
current->comm, task_pid_nr(current),
|
||||
@@ -995,6 +992,7 @@ void do_exit(long code)
|
||||
exit_shm(tsk);
|
||||
exit_files(tsk);
|
||||
exit_fs(tsk);
|
||||
exit_task_work(tsk);
|
||||
check_stack_usage();
|
||||
exit_thread();
|
||||
|
||||
|
@@ -1420,7 +1420,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
*/
|
||||
p->group_leader = p;
|
||||
INIT_LIST_HEAD(&p->thread_group);
|
||||
INIT_HLIST_HEAD(&p->task_works);
|
||||
p->task_works = NULL;
|
||||
|
||||
/* Now that the task is set up, run cgroup callbacks if
|
||||
* necessary. We need to run them before the task is visible
|
||||
|
@@ -781,7 +781,7 @@ static void wake_threads_waitq(struct irq_desc *desc)
|
||||
wake_up(&desc->wait_for_threads);
|
||||
}
|
||||
|
||||
static void irq_thread_dtor(struct task_work *unused)
|
||||
static void irq_thread_dtor(struct callback_head *unused)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
struct irq_desc *desc;
|
||||
@@ -813,7 +813,7 @@ static void irq_thread_dtor(struct task_work *unused)
|
||||
*/
|
||||
static int irq_thread(void *data)
|
||||
{
|
||||
struct task_work on_exit_work;
|
||||
struct callback_head on_exit_work;
|
||||
static const struct sched_param param = {
|
||||
.sched_priority = MAX_USER_RT_PRIO/2,
|
||||
};
|
||||
@@ -830,7 +830,7 @@ static int irq_thread(void *data)
|
||||
|
||||
sched_setscheduler(current, SCHED_FIFO, ¶m);
|
||||
|
||||
init_task_work(&on_exit_work, irq_thread_dtor, NULL);
|
||||
init_task_work(&on_exit_work, irq_thread_dtor);
|
||||
task_work_add(current, &on_exit_work, false);
|
||||
|
||||
while (!irq_wait_for_interrupt(action)) {
|
||||
|
@@ -1971,6 +1971,13 @@ static void ptrace_do_notify(int signr, int exit_code, int why)
|
||||
void ptrace_notify(int exit_code)
|
||||
{
|
||||
BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP);
|
||||
if (unlikely(current->task_works)) {
|
||||
if (test_and_clear_ti_thread_flag(current_thread_info(),
|
||||
TIF_NOTIFY_RESUME)) {
|
||||
smp_mb__after_clear_bit();
|
||||
task_work_run();
|
||||
}
|
||||
}
|
||||
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED);
|
||||
@@ -2191,6 +2198,14 @@ int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
|
||||
struct signal_struct *signal = current->signal;
|
||||
int signr;
|
||||
|
||||
if (unlikely(current->task_works)) {
|
||||
if (test_and_clear_ti_thread_flag(current_thread_info(),
|
||||
TIF_NOTIFY_RESUME)) {
|
||||
smp_mb__after_clear_bit();
|
||||
task_work_run();
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(uprobe_deny_signal()))
|
||||
return 0;
|
||||
|
||||
|
@@ -3,82 +3,78 @@
|
||||
#include <linux/tracehook.h>
|
||||
|
||||
int
|
||||
task_work_add(struct task_struct *task, struct task_work *twork, bool notify)
|
||||
task_work_add(struct task_struct *task, struct callback_head *twork, bool notify)
|
||||
{
|
||||
struct callback_head *last, *first;
|
||||
unsigned long flags;
|
||||
int err = -ESRCH;
|
||||
|
||||
#ifndef TIF_NOTIFY_RESUME
|
||||
if (notify)
|
||||
return -ENOTSUPP;
|
||||
#endif
|
||||
/*
|
||||
* We must not insert the new work if the task has already passed
|
||||
* exit_task_work(). We rely on do_exit()->raw_spin_unlock_wait()
|
||||
* and check PF_EXITING under pi_lock.
|
||||
* Not inserting the new work if the task has already passed
|
||||
* exit_task_work() is the responisbility of callers.
|
||||
*/
|
||||
raw_spin_lock_irqsave(&task->pi_lock, flags);
|
||||
if (likely(!(task->flags & PF_EXITING))) {
|
||||
hlist_add_head(&twork->hlist, &task->task_works);
|
||||
err = 0;
|
||||
}
|
||||
last = task->task_works;
|
||||
first = last ? last->next : twork;
|
||||
twork->next = first;
|
||||
if (last)
|
||||
last->next = twork;
|
||||
task->task_works = twork;
|
||||
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
|
||||
|
||||
/* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */
|
||||
if (likely(!err) && notify)
|
||||
if (notify)
|
||||
set_notify_resume(task);
|
||||
return err;
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct task_work *
|
||||
struct callback_head *
|
||||
task_work_cancel(struct task_struct *task, task_work_func_t func)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct task_work *twork;
|
||||
struct hlist_node *pos;
|
||||
struct callback_head *last, *res = NULL;
|
||||
|
||||
raw_spin_lock_irqsave(&task->pi_lock, flags);
|
||||
hlist_for_each_entry(twork, pos, &task->task_works, hlist) {
|
||||
if (twork->func == func) {
|
||||
hlist_del(&twork->hlist);
|
||||
goto found;
|
||||
last = task->task_works;
|
||||
if (last) {
|
||||
struct callback_head *q = last, *p = q->next;
|
||||
while (1) {
|
||||
if (p->func == func) {
|
||||
q->next = p->next;
|
||||
if (p == last)
|
||||
task->task_works = q == p ? NULL : q;
|
||||
res = p;
|
||||
break;
|
||||
}
|
||||
if (p == last)
|
||||
break;
|
||||
q = p;
|
||||
p = q->next;
|
||||
}
|
||||
}
|
||||
twork = NULL;
|
||||
found:
|
||||
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
|
||||
|
||||
return twork;
|
||||
return res;
|
||||
}
|
||||
|
||||
void task_work_run(void)
|
||||
{
|
||||
struct task_struct *task = current;
|
||||
struct hlist_head task_works;
|
||||
struct hlist_node *pos;
|
||||
struct callback_head *p, *q;
|
||||
|
||||
raw_spin_lock_irq(&task->pi_lock);
|
||||
hlist_move_list(&task->task_works, &task_works);
|
||||
raw_spin_unlock_irq(&task->pi_lock);
|
||||
while (1) {
|
||||
raw_spin_lock_irq(&task->pi_lock);
|
||||
p = task->task_works;
|
||||
task->task_works = NULL;
|
||||
raw_spin_unlock_irq(&task->pi_lock);
|
||||
|
||||
if (unlikely(hlist_empty(&task_works)))
|
||||
return;
|
||||
/*
|
||||
* We use hlist to save the space in task_struct, but we want fifo.
|
||||
* Find the last entry, the list should be short, then process them
|
||||
* in reverse order.
|
||||
*/
|
||||
for (pos = task_works.first; pos->next; pos = pos->next)
|
||||
;
|
||||
if (unlikely(!p))
|
||||
return;
|
||||
|
||||
for (;;) {
|
||||
struct hlist_node **pprev = pos->pprev;
|
||||
struct task_work *twork = container_of(pos, struct task_work,
|
||||
hlist);
|
||||
twork->func(twork);
|
||||
|
||||
if (pprev == &task_works.first)
|
||||
break;
|
||||
pos = container_of(pprev, struct hlist_node, next);
|
||||
q = p->next; /* head */
|
||||
p->next = NULL; /* cut it */
|
||||
while (q) {
|
||||
p = q->next;
|
||||
q->func(q);
|
||||
q = p;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user