Merge branch 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull the big VFS changes from Al Viro:
 "This one is *big* and changes quite a few things around VFS.  What's in there:

   - the first of two really major architecture changes - death to open
     intents.

     The former is finally there; it was very long in making, but with
     Miklos getting through really hard and messy final push in
     fs/namei.c, we finally have it.  Unlike his variant, this one
     doesn't introduce struct opendata; what we have instead is
     ->atomic_open() taking preallocated struct file * and passing
     everything via its fields.

     Instead of returning struct file *, it returns -E...  on error, 0
     on success and 1 in "deal with it yourself" case (e.g.  symlink
     found on server, etc.).

     See comments before fs/namei.c:atomic_open().  That made a lot of
     goodies finally possible and quite a few are in that pile:
     ->lookup(), ->d_revalidate() and ->create() do not get struct
     nameidata * anymore; ->lookup() and ->d_revalidate() get lookup
     flags instead, ->create() gets "do we want it exclusive" flag.

     With the introduction of new helper (kern_path_locked()) we are rid
     of all struct nameidata instances outside of fs/namei.c; it's still
     visible in namei.h, but not for long.  Come the next cycle,
     declaration will move either to fs/internal.h or to fs/namei.c
     itself.  [me, miklos, hch]

   - The second major change: behaviour of final fput().  Now we have
     __fput() done without any locks held by caller *and* not from deep
     in call stack.

     That obviously lifts a lot of constraints on the locking in there.
     Moreover, it's legal now to call fput() from atomic contexts (which
     has immediately simplified life for aio.c).  We also don't need
     anti-recursion logics in __scm_destroy() anymore.

     There is a price, though - the damn thing has become partially
     asynchronous.  For fput() from normal process we are guaranteed
     that pending __fput() will be done before the caller returns to
     userland, exits or gets stopped for ptrace.

     For kernel threads and atomic contexts it's done via
     schedule_work(), so theoretically we might need a way to make sure
     it's finished; so far only one such place had been found, but there
     might be more.

     There's flush_delayed_fput() (do all pending __fput()) and there's
     __fput_sync() (fput() analog doing __fput() immediately).  I hope
     we won't need them often; see warnings in fs/file_table.c for
     details.  [me, based on task_work series from Oleg merged last
     cycle]

   - sync series from Jan

   - large part of "death to sync_supers()" work from Artem; the only
     bits missing here are exofs and ext4 ones.  As far as I understand,
     those are going via the exofs and ext4 trees resp.; once they are
     in, we can put ->write_super() to the rest, along with the thread
     calling it.

   - preparatory bits from unionmount series (from dhowells).

   - assorted cleanups and fixes all over the place, as usual.

  This is not the last pile for this cycle; there's at least jlayton's
  ESTALE work and fsfreeze series (the latter - in dire need of fixes,
  so I'm not sure it'll make the cut this cycle).  I'll probably throw
  symlink/hardlink restrictions stuff from Kees into the next pile, too.
  Plus there's a lot of misc patches I hadn't thrown into that one -
  it's large enough as it is..."

* 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (127 commits)
  ext4: switch EXT4_IOC_RESIZE_FS to mnt_want_write_file()
  btrfs: switch btrfs_ioctl_balance() to mnt_want_write_file()
  switch dentry_open() to struct path, make it grab references itself
  spufs: shift dget/mntget towards dentry_open()
  zoran: don't bother with struct file * in zoran_map
  ecryptfs: don't reinvent the wheels, please - use struct completion
  don't expose I_NEW inodes via dentry->d_inode
  tidy up namei.c a bit
  unobfuscate follow_up() a bit
  ext3: pass custom EOF to generic_file_llseek_size()
  ext4: use core vfs llseek code for dir seeks
  vfs: allow custom EOF in generic_file_llseek code
  vfs: Avoid unnecessary WB_SYNC_NONE writeback during sys_sync and reorder sync passes
  vfs: Remove unnecessary flushing of block devices
  vfs: Make sys_sync writeout also block device inodes
  vfs: Create function for iterating over block devices
  vfs: Reorder operations during sys_sync
  quota: Move quota syncing to ->sync_fs method
  quota: Split dquot_quota_sync() to writeback and cache flushing part
  vfs: Move noop_backing_dev_info check from sync into writeback
  ...
This commit is contained in:
Linus Torvalds
2012-07-23 12:27:27 -07:00
210 changed files with 2613 additions and 2402 deletions

View File

@@ -595,7 +595,7 @@ void audit_trim_trees(void)
root_mnt = collect_mounts(&path);
path_put(&path);
if (!root_mnt)
if (IS_ERR(root_mnt))
goto skip_it;
spin_lock(&hash_lock);
@@ -669,8 +669,8 @@ int audit_add_tree_rule(struct audit_krule *rule)
goto Err;
mnt = collect_mounts(&path);
path_put(&path);
if (!mnt) {
err = -ENOMEM;
if (IS_ERR(mnt)) {
err = PTR_ERR(mnt);
goto Err;
}
@@ -719,8 +719,8 @@ int audit_tag_tree(char *old, char *new)
return err;
tagged = collect_mounts(&path2);
path_put(&path2);
if (!tagged)
return -ENOMEM;
if (IS_ERR(tagged))
return PTR_ERR(tagged);
err = kern_path(old, 0, &path1);
if (err) {

View File

@@ -355,34 +355,15 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
/* Get path information necessary for adding watches. */
static int audit_get_nd(struct audit_watch *watch, struct path *parent)
{
struct nameidata nd;
struct dentry *d;
int err;
err = kern_path_parent(watch->path, &nd);
if (err)
return err;
if (nd.last_type != LAST_NORM) {
path_put(&nd.path);
return -EINVAL;
}
mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
if (IS_ERR(d)) {
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
path_put(&nd.path);
struct dentry *d = kern_path_locked(watch->path, parent);
if (IS_ERR(d))
return PTR_ERR(d);
}
mutex_unlock(&parent->dentry->d_inode->i_mutex);
if (d->d_inode) {
/* update watch filter fields */
watch->dev = d->d_inode->i_sb->s_dev;
watch->ino = d->d_inode->i_ino;
}
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
*parent = nd.path;
dput(d);
return 0;
}

View File

@@ -822,7 +822,7 @@ EXPORT_SYMBOL_GPL(cgroup_unlock);
*/
static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *);
static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int);
static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
static int cgroup_populate_dir(struct cgroup *cgrp);
static const struct inode_operations cgroup_dir_inode_operations;
@@ -1587,7 +1587,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
opts.new_root = new_root;
/* Locate an existing or new sb for this hierarchy */
sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts);
sb = sget(fs_type, cgroup_test_super, cgroup_set_super, 0, &opts);
if (IS_ERR(sb)) {
ret = PTR_ERR(sb);
cgroup_drop_root(opts.new_root);
@@ -2570,7 +2570,7 @@ static const struct inode_operations cgroup_dir_inode_operations = {
.rename = cgroup_rename,
};
static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
if (dentry->d_name.len > NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);

View File

@@ -953,14 +953,11 @@ void do_exit(long code)
exit_signals(tsk); /* sets PF_EXITING */
/*
* tsk->flags are checked in the futex code to protect against
* an exiting task cleaning up the robust pi futexes, and in
* task_work_add() to avoid the race with exit_task_work().
* an exiting task cleaning up the robust pi futexes.
*/
smp_mb();
raw_spin_unlock_wait(&tsk->pi_lock);
exit_task_work(tsk);
if (unlikely(in_atomic()))
printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
current->comm, task_pid_nr(current),
@@ -995,6 +992,7 @@ void do_exit(long code)
exit_shm(tsk);
exit_files(tsk);
exit_fs(tsk);
exit_task_work(tsk);
check_stack_usage();
exit_thread();

View File

@@ -1420,7 +1420,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
*/
p->group_leader = p;
INIT_LIST_HEAD(&p->thread_group);
INIT_HLIST_HEAD(&p->task_works);
p->task_works = NULL;
/* Now that the task is set up, run cgroup callbacks if
* necessary. We need to run them before the task is visible

View File

@@ -781,7 +781,7 @@ static void wake_threads_waitq(struct irq_desc *desc)
wake_up(&desc->wait_for_threads);
}
static void irq_thread_dtor(struct task_work *unused)
static void irq_thread_dtor(struct callback_head *unused)
{
struct task_struct *tsk = current;
struct irq_desc *desc;
@@ -813,7 +813,7 @@ static void irq_thread_dtor(struct task_work *unused)
*/
static int irq_thread(void *data)
{
struct task_work on_exit_work;
struct callback_head on_exit_work;
static const struct sched_param param = {
.sched_priority = MAX_USER_RT_PRIO/2,
};
@@ -830,7 +830,7 @@ static int irq_thread(void *data)
sched_setscheduler(current, SCHED_FIFO, &param);
init_task_work(&on_exit_work, irq_thread_dtor, NULL);
init_task_work(&on_exit_work, irq_thread_dtor);
task_work_add(current, &on_exit_work, false);
while (!irq_wait_for_interrupt(action)) {

View File

@@ -1971,6 +1971,13 @@ static void ptrace_do_notify(int signr, int exit_code, int why)
void ptrace_notify(int exit_code)
{
BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP);
if (unlikely(current->task_works)) {
if (test_and_clear_ti_thread_flag(current_thread_info(),
TIF_NOTIFY_RESUME)) {
smp_mb__after_clear_bit();
task_work_run();
}
}
spin_lock_irq(&current->sighand->siglock);
ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED);
@@ -2191,6 +2198,14 @@ int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
struct signal_struct *signal = current->signal;
int signr;
if (unlikely(current->task_works)) {
if (test_and_clear_ti_thread_flag(current_thread_info(),
TIF_NOTIFY_RESUME)) {
smp_mb__after_clear_bit();
task_work_run();
}
}
if (unlikely(uprobe_deny_signal()))
return 0;

View File

@@ -3,82 +3,78 @@
#include <linux/tracehook.h>
int
task_work_add(struct task_struct *task, struct task_work *twork, bool notify)
task_work_add(struct task_struct *task, struct callback_head *twork, bool notify)
{
struct callback_head *last, *first;
unsigned long flags;
int err = -ESRCH;
#ifndef TIF_NOTIFY_RESUME
if (notify)
return -ENOTSUPP;
#endif
/*
* We must not insert the new work if the task has already passed
* exit_task_work(). We rely on do_exit()->raw_spin_unlock_wait()
* and check PF_EXITING under pi_lock.
* Not inserting the new work if the task has already passed
* exit_task_work() is the responisbility of callers.
*/
raw_spin_lock_irqsave(&task->pi_lock, flags);
if (likely(!(task->flags & PF_EXITING))) {
hlist_add_head(&twork->hlist, &task->task_works);
err = 0;
}
last = task->task_works;
first = last ? last->next : twork;
twork->next = first;
if (last)
last->next = twork;
task->task_works = twork;
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
/* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */
if (likely(!err) && notify)
if (notify)
set_notify_resume(task);
return err;
return 0;
}
struct task_work *
struct callback_head *
task_work_cancel(struct task_struct *task, task_work_func_t func)
{
unsigned long flags;
struct task_work *twork;
struct hlist_node *pos;
struct callback_head *last, *res = NULL;
raw_spin_lock_irqsave(&task->pi_lock, flags);
hlist_for_each_entry(twork, pos, &task->task_works, hlist) {
if (twork->func == func) {
hlist_del(&twork->hlist);
goto found;
last = task->task_works;
if (last) {
struct callback_head *q = last, *p = q->next;
while (1) {
if (p->func == func) {
q->next = p->next;
if (p == last)
task->task_works = q == p ? NULL : q;
res = p;
break;
}
if (p == last)
break;
q = p;
p = q->next;
}
}
twork = NULL;
found:
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
return twork;
return res;
}
void task_work_run(void)
{
struct task_struct *task = current;
struct hlist_head task_works;
struct hlist_node *pos;
struct callback_head *p, *q;
raw_spin_lock_irq(&task->pi_lock);
hlist_move_list(&task->task_works, &task_works);
raw_spin_unlock_irq(&task->pi_lock);
while (1) {
raw_spin_lock_irq(&task->pi_lock);
p = task->task_works;
task->task_works = NULL;
raw_spin_unlock_irq(&task->pi_lock);
if (unlikely(hlist_empty(&task_works)))
return;
/*
* We use hlist to save the space in task_struct, but we want fifo.
* Find the last entry, the list should be short, then process them
* in reverse order.
*/
for (pos = task_works.first; pos->next; pos = pos->next)
;
if (unlikely(!p))
return;
for (;;) {
struct hlist_node **pprev = pos->pprev;
struct task_work *twork = container_of(pos, struct task_work,
hlist);
twork->func(twork);
if (pprev == &task_works.first)
break;
pos = container_of(pprev, struct hlist_node, next);
q = p->next; /* head */
p->next = NULL; /* cut it */
while (q) {
p = q->next;
q->func(q);
q = p;
}
}
}