Merge branch 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull the big VFS changes from Al Viro: "This one is *big* and changes quite a few things around VFS. What's in there: - the first of two really major architecture changes - death to open intents. The former is finally there; it was very long in making, but with Miklos getting through really hard and messy final push in fs/namei.c, we finally have it. Unlike his variant, this one doesn't introduce struct opendata; what we have instead is ->atomic_open() taking preallocated struct file * and passing everything via its fields. Instead of returning struct file *, it returns -E... on error, 0 on success and 1 in "deal with it yourself" case (e.g. symlink found on server, etc.). See comments before fs/namei.c:atomic_open(). That made a lot of goodies finally possible and quite a few are in that pile: ->lookup(), ->d_revalidate() and ->create() do not get struct nameidata * anymore; ->lookup() and ->d_revalidate() get lookup flags instead, ->create() gets "do we want it exclusive" flag. With the introduction of new helper (kern_path_locked()) we are rid of all struct nameidata instances outside of fs/namei.c; it's still visible in namei.h, but not for long. Come the next cycle, declaration will move either to fs/internal.h or to fs/namei.c itself. [me, miklos, hch] - The second major change: behaviour of final fput(). Now we have __fput() done without any locks held by caller *and* not from deep in call stack. That obviously lifts a lot of constraints on the locking in there. Moreover, it's legal now to call fput() from atomic contexts (which has immediately simplified life for aio.c). We also don't need anti-recursion logics in __scm_destroy() anymore. There is a price, though - the damn thing has become partially asynchronous. For fput() from normal process we are guaranteed that pending __fput() will be done before the caller returns to userland, exits or gets stopped for ptrace. For kernel threads and atomic contexts it's done via schedule_work(), so theoretically we might need a way to make sure it's finished; so far only one such place had been found, but there might be more. There's flush_delayed_fput() (do all pending __fput()) and there's __fput_sync() (fput() analog doing __fput() immediately). I hope we won't need them often; see warnings in fs/file_table.c for details. [me, based on task_work series from Oleg merged last cycle] - sync series from Jan - large part of "death to sync_supers()" work from Artem; the only bits missing here are exofs and ext4 ones. As far as I understand, those are going via the exofs and ext4 trees resp.; once they are in, we can put ->write_super() to the rest, along with the thread calling it. - preparatory bits from unionmount series (from dhowells). - assorted cleanups and fixes all over the place, as usual. This is not the last pile for this cycle; there's at least jlayton's ESTALE work and fsfreeze series (the latter - in dire need of fixes, so I'm not sure it'll make the cut this cycle). I'll probably throw symlink/hardlink restrictions stuff from Kees into the next pile, too. Plus there's a lot of misc patches I hadn't thrown into that one - it's large enough as it is..." * 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (127 commits) ext4: switch EXT4_IOC_RESIZE_FS to mnt_want_write_file() btrfs: switch btrfs_ioctl_balance() to mnt_want_write_file() switch dentry_open() to struct path, make it grab references itself spufs: shift dget/mntget towards dentry_open() zoran: don't bother with struct file * in zoran_map ecryptfs: don't reinvent the wheels, please - use struct completion don't expose I_NEW inodes via dentry->d_inode tidy up namei.c a bit unobfuscate follow_up() a bit ext3: pass custom EOF to generic_file_llseek_size() ext4: use core vfs llseek code for dir seeks vfs: allow custom EOF in generic_file_llseek code vfs: Avoid unnecessary WB_SYNC_NONE writeback during sys_sync and reorder sync passes vfs: Remove unnecessary flushing of block devices vfs: Make sys_sync writeout also block device inodes vfs: Create function for iterating over block devices vfs: Reorder operations during sys_sync quota: Move quota syncing to ->sync_fs method quota: Split dquot_quota_sync() to writeback and cache flushing part vfs: Move noop_backing_dev_info check from sync into writeback ...
This commit is contained in:
@@ -324,74 +324,27 @@ static inline loff_t ext4_get_htree_eof(struct file *filp)
|
||||
|
||||
|
||||
/*
|
||||
* ext4_dir_llseek() based on generic_file_llseek() to handle both
|
||||
* non-htree and htree directories, where the "offset" is in terms
|
||||
* of the filename hash value instead of the byte offset.
|
||||
* ext4_dir_llseek() calls generic_file_llseek_size to handle htree
|
||||
* directories, where the "offset" is in terms of the filename hash
|
||||
* value instead of the byte offset.
|
||||
*
|
||||
* NOTE: offsets obtained *before* ext4_set_inode_flag(dir, EXT4_INODE_INDEX)
|
||||
* will be invalid once the directory was converted into a dx directory
|
||||
* Because we may return a 64-bit hash that is well beyond offset limits,
|
||||
* we need to pass the max hash as the maximum allowable offset in
|
||||
* the htree directory case.
|
||||
*
|
||||
* For non-htree, ext4_llseek already chooses the proper max offset.
|
||||
*/
|
||||
loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin)
|
||||
{
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
loff_t ret = -EINVAL;
|
||||
int dx_dir = is_dx_dir(inode);
|
||||
loff_t htree_max = ext4_get_htree_eof(file);
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
|
||||
/* NOTE: relative offsets with dx directories might not work
|
||||
* as expected, as it is difficult to figure out the
|
||||
* correct offset between dx hashes */
|
||||
|
||||
switch (origin) {
|
||||
case SEEK_END:
|
||||
if (unlikely(offset > 0))
|
||||
goto out_err; /* not supported for directories */
|
||||
|
||||
/* so only negative offsets are left, does that have a
|
||||
* meaning for directories at all? */
|
||||
if (dx_dir)
|
||||
offset += ext4_get_htree_eof(file);
|
||||
else
|
||||
offset += inode->i_size;
|
||||
break;
|
||||
case SEEK_CUR:
|
||||
/*
|
||||
* Here we special-case the lseek(fd, 0, SEEK_CUR)
|
||||
* position-querying operation. Avoid rewriting the "same"
|
||||
* f_pos value back to the file because a concurrent read(),
|
||||
* write() or lseek() might have altered it
|
||||
*/
|
||||
if (offset == 0) {
|
||||
offset = file->f_pos;
|
||||
goto out_ok;
|
||||
}
|
||||
|
||||
offset += file->f_pos;
|
||||
break;
|
||||
}
|
||||
|
||||
if (unlikely(offset < 0))
|
||||
goto out_err;
|
||||
|
||||
if (!dx_dir) {
|
||||
if (offset > inode->i_sb->s_maxbytes)
|
||||
goto out_err;
|
||||
} else if (offset > ext4_get_htree_eof(file))
|
||||
goto out_err;
|
||||
|
||||
/* Special lock needed here? */
|
||||
if (offset != file->f_pos) {
|
||||
file->f_pos = offset;
|
||||
file->f_version = 0;
|
||||
}
|
||||
|
||||
out_ok:
|
||||
ret = offset;
|
||||
out_err:
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
|
||||
return ret;
|
||||
if (likely(dx_dir))
|
||||
return generic_file_llseek_size(file, offset, origin,
|
||||
htree_max, htree_max);
|
||||
else
|
||||
return ext4_llseek(file, offset, origin);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -211,9 +211,9 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_llseek() copied from generic_file_llseek() to handle both
|
||||
* block-mapped and extent-mapped maxbytes values. This should
|
||||
* otherwise be identical with generic_file_llseek().
|
||||
* ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
|
||||
* by calling generic_file_llseek_size() with the appropriate maxbytes
|
||||
* value for each.
|
||||
*/
|
||||
loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
|
||||
{
|
||||
@@ -225,7 +225,8 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
|
||||
else
|
||||
maxbytes = inode->i_sb->s_maxbytes;
|
||||
|
||||
return generic_file_llseek_size(file, offset, origin, maxbytes);
|
||||
return generic_file_llseek_size(file, offset, origin,
|
||||
maxbytes, i_size_read(inode));
|
||||
}
|
||||
|
||||
const struct file_operations ext4_file_operations = {
|
||||
|
@@ -135,14 +135,7 @@ static int ext4_sync_parent(struct inode *inode)
|
||||
inode = igrab(inode);
|
||||
while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
|
||||
ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
|
||||
dentry = NULL;
|
||||
spin_lock(&inode->i_lock);
|
||||
if (!list_empty(&inode->i_dentry)) {
|
||||
dentry = list_first_entry(&inode->i_dentry,
|
||||
struct dentry, d_alias);
|
||||
dget(dentry);
|
||||
}
|
||||
spin_unlock(&inode->i_lock);
|
||||
dentry = d_find_any_alias(inode);
|
||||
if (!dentry)
|
||||
break;
|
||||
next = igrab(dentry->d_parent->d_inode);
|
||||
@@ -232,7 +225,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
|
||||
if (!journal) {
|
||||
ret = __sync_inode(inode, datasync);
|
||||
if (!ret && !list_empty(&inode->i_dentry))
|
||||
if (!ret && !hlist_empty(&inode->i_dentry))
|
||||
ret = ext4_sync_parent(inode);
|
||||
goto out;
|
||||
}
|
||||
|
@@ -389,7 +389,7 @@ group_add_out:
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = mnt_want_write(filp->f_path.mnt);
|
||||
err = mnt_want_write_file(filp);
|
||||
if (err)
|
||||
goto resizefs_out;
|
||||
|
||||
@@ -401,7 +401,7 @@ group_add_out:
|
||||
}
|
||||
if (err == 0)
|
||||
err = err2;
|
||||
mnt_drop_write(filp->f_path.mnt);
|
||||
mnt_drop_write_file(filp);
|
||||
resizefs_out:
|
||||
ext4_resize_end(sb);
|
||||
return err;
|
||||
|
@@ -1312,7 +1312,7 @@ errout:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
|
||||
static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
|
||||
{
|
||||
struct inode *inode;
|
||||
struct ext4_dir_entry_2 *de;
|
||||
@@ -2072,8 +2072,8 @@ static int ext4_add_nondir(handle_t *handle,
|
||||
int err = ext4_add_entry(handle, dentry, inode);
|
||||
if (!err) {
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
d_instantiate(dentry, inode);
|
||||
unlock_new_inode(inode);
|
||||
d_instantiate(dentry, inode);
|
||||
return 0;
|
||||
}
|
||||
drop_nlink(inode);
|
||||
@@ -2091,7 +2091,7 @@ static int ext4_add_nondir(handle_t *handle,
|
||||
* with d_instantiate().
|
||||
*/
|
||||
static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
struct nameidata *nd)
|
||||
bool excl)
|
||||
{
|
||||
handle_t *handle;
|
||||
struct inode *inode;
|
||||
@@ -2249,8 +2249,8 @@ out_clear_inode:
|
||||
err = ext4_mark_inode_dirty(handle, dir);
|
||||
if (err)
|
||||
goto out_clear_inode;
|
||||
d_instantiate(dentry, inode);
|
||||
unlock_new_inode(inode);
|
||||
d_instantiate(dentry, inode);
|
||||
out_stop:
|
||||
brelse(dir_block);
|
||||
ext4_journal_stop(handle);
|
||||
|
@@ -4325,6 +4325,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
|
||||
|
||||
trace_ext4_sync_fs(sb, wait);
|
||||
flush_workqueue(sbi->dio_unwritten_wq);
|
||||
/*
|
||||
* Writeback quota in non-journalled quota case - journalled quota has
|
||||
* no dirty dquots
|
||||
*/
|
||||
dquot_writeback_dquots(sb, -1);
|
||||
if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
|
||||
if (wait)
|
||||
jbd2_log_wait_commit(sbi->s_journal, target);
|
||||
|
Reference in New Issue
Block a user