Merge branch 'for-chris-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/fdmanana/linux into for-linus-4.7

Signed-off-by: Chris Mason <clm@fb.com>
This commit is contained in:
Chris Mason
2016-05-17 14:43:19 -07:00
419 changed files with 4858 additions and 2268 deletions

View File

@@ -824,6 +824,7 @@ retry:
async_extent->ram_size - 1, 0);
goto out_free_reserve;
}
btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
/*
* clear dirty, set writeback and unlock the pages.
@@ -861,6 +862,7 @@ retry:
}
return;
out_free_reserve:
btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
out_free:
extent_clear_unlock_delalloc(inode, async_extent->start,
@@ -1038,6 +1040,8 @@ static noinline int cow_file_range(struct inode *inode,
goto out_drop_extent_cache;
}
btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
if (disk_num_bytes < cur_alloc_size)
break;
@@ -1066,6 +1070,7 @@ out:
out_drop_extent_cache:
btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
out_reserve:
btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
out_unlock:
extent_clear_unlock_delalloc(inode, start, end, locked_page,
@@ -1377,6 +1382,9 @@ next_slot:
*/
if (csum_exist_in_range(root, disk_bytenr, num_bytes))
goto out_check;
if (!btrfs_inc_nocow_writers(root->fs_info,
disk_bytenr))
goto out_check;
nocow = 1;
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
extent_end = found_key.offset +
@@ -1391,6 +1399,9 @@ out_check:
path->slots[0]++;
if (!nolock && nocow)
btrfs_end_write_no_snapshoting(root);
if (nocow)
btrfs_dec_nocow_writers(root->fs_info,
disk_bytenr);
goto next_slot;
}
if (!nocow) {
@@ -1411,6 +1422,9 @@ out_check:
if (ret) {
if (!nolock && nocow)
btrfs_end_write_no_snapshoting(root);
if (nocow)
btrfs_dec_nocow_writers(root->fs_info,
disk_bytenr);
goto error;
}
cow_start = (u64)-1;
@@ -1453,6 +1467,8 @@ out_check:
ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr,
num_bytes, num_bytes, type);
if (nocow)
btrfs_dec_nocow_writers(root->fs_info, disk_bytenr);
BUG_ON(ret); /* -ENOMEM */
if (root->root_key.objectid ==
@@ -7129,6 +7145,43 @@ out:
return em;
}
static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
const u64 start,
const u64 len,
const u64 orig_start,
const u64 block_start,
const u64 block_len,
const u64 orig_block_len,
const u64 ram_bytes,
const int type)
{
struct extent_map *em = NULL;
int ret;
down_read(&BTRFS_I(inode)->dio_sem);
if (type != BTRFS_ORDERED_NOCOW) {
em = create_pinned_em(inode, start, len, orig_start,
block_start, block_len, orig_block_len,
ram_bytes, type);
if (IS_ERR(em))
goto out;
}
ret = btrfs_add_ordered_extent_dio(inode, start, block_start,
len, block_len, type);
if (ret) {
if (em) {
free_extent_map(em);
btrfs_drop_extent_cache(inode, start,
start + len - 1, 0);
}
em = ERR_PTR(ret);
}
out:
up_read(&BTRFS_I(inode)->dio_sem);
return em;
}
static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
u64 start, u64 len)
{
@@ -7144,41 +7197,13 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
if (ret)
return ERR_PTR(ret);
/*
* Create the ordered extent before the extent map. This is to avoid
* races with the fast fsync path that would lead to it logging file
* extent items that point to disk extents that were not yet written to.
* The fast fsync path collects ordered extents into a local list and
* then collects all the new extent maps, so we must create the ordered
* extent first and make sure the fast fsync path collects any new
* ordered extents after collecting new extent maps as well.
* The fsync path simply can not rely on inode_dio_wait() because it
* causes deadlock with AIO.
*/
ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
ins.offset, ins.offset, 0);
if (ret) {
em = btrfs_create_dio_extent(inode, start, ins.offset, start,
ins.objectid, ins.offset, ins.offset,
ins.offset, 0);
btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
if (IS_ERR(em))
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
return ERR_PTR(ret);
}
em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
ins.offset, ins.offset, ins.offset, 0);
if (IS_ERR(em)) {
struct btrfs_ordered_extent *oe;
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
oe = btrfs_lookup_ordered_extent(inode, start);
ASSERT(oe);
if (WARN_ON(!oe))
return em;
set_bit(BTRFS_ORDERED_IOERR, &oe->flags);
set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags);
btrfs_remove_ordered_extent(inode, oe);
/* Once for our lookup and once for the ordered extents tree. */
btrfs_put_ordered_extent(oe);
btrfs_put_ordered_extent(oe);
}
return em;
}
@@ -7650,24 +7675,21 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
block_start = em->block_start + (start - em->start);
if (can_nocow_extent(inode, start, &len, &orig_start,
&orig_block_len, &ram_bytes) == 1) {
&orig_block_len, &ram_bytes) == 1 &&
btrfs_inc_nocow_writers(root->fs_info, block_start)) {
struct extent_map *em2;
em2 = btrfs_create_dio_extent(inode, start, len,
orig_start, block_start,
len, orig_block_len,
ram_bytes, type);
btrfs_dec_nocow_writers(root->fs_info, block_start);
if (type == BTRFS_ORDERED_PREALLOC) {
free_extent_map(em);
em = create_pinned_em(inode, start, len,
orig_start,
block_start, len,
orig_block_len,
ram_bytes, type);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto unlock_err;
}
em = em2;
}
ret = btrfs_add_ordered_extent_dio(inode, start,
block_start, len, len, type);
if (ret) {
free_extent_map(em);
if (em2 && IS_ERR(em2)) {
ret = PTR_ERR(em2);
goto unlock_err;
}
goto unlock;
@@ -9230,6 +9252,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
INIT_LIST_HEAD(&ei->delalloc_inodes);
INIT_LIST_HEAD(&ei->delayed_iput);
RB_CLEAR_NODE(&ei->rb_node);
init_rwsem(&ei->dio_sem);
return inode;
}
@@ -9387,18 +9410,290 @@ static int btrfs_getattr(struct vfsmount *mnt,
return 0;
}
static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
static int btrfs_rename_exchange(struct inode *old_dir,
struct dentry *old_dentry,
struct inode *new_dir,
struct dentry *new_dentry)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(old_dir)->root;
struct btrfs_root *dest = BTRFS_I(new_dir)->root;
struct inode *new_inode = new_dentry->d_inode;
struct inode *old_inode = old_dentry->d_inode;
struct timespec ctime = CURRENT_TIME;
struct dentry *parent;
u64 old_ino = btrfs_ino(old_inode);
u64 new_ino = btrfs_ino(new_inode);
u64 old_idx = 0;
u64 new_idx = 0;
u64 root_objectid;
int ret;
bool root_log_pinned = false;
bool dest_log_pinned = false;
/* we only allow rename subvolume link between subvolumes */
if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
return -EXDEV;
/* close the race window with snapshot create/destroy ioctl */
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
down_read(&root->fs_info->subvol_sem);
if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
down_read(&dest->fs_info->subvol_sem);
/*
* We want to reserve the absolute worst case amount of items. So if
* both inodes are subvols and we need to unlink them then that would
* require 4 item modifications, but if they are both normal inodes it
* would require 5 item modifications, so we'll assume their normal
* inodes. So 5 * 2 is 10, plus 2 for the new links, so 12 total items
* should cover the worst case number of items we'll modify.
*/
trans = btrfs_start_transaction(root, 12);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out_notrans;
}
/*
* We need to find a free sequence number both in the source and
* in the destination directory for the exchange.
*/
ret = btrfs_set_inode_index(new_dir, &old_idx);
if (ret)
goto out_fail;
ret = btrfs_set_inode_index(old_dir, &new_idx);
if (ret)
goto out_fail;
BTRFS_I(old_inode)->dir_index = 0ULL;
BTRFS_I(new_inode)->dir_index = 0ULL;
/* Reference for the source. */
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
/* force full log commit if subvolume involved. */
btrfs_set_log_full_commit(root->fs_info, trans);
} else {
btrfs_pin_log_trans(root);
root_log_pinned = true;
ret = btrfs_insert_inode_ref(trans, dest,
new_dentry->d_name.name,
new_dentry->d_name.len,
old_ino,
btrfs_ino(new_dir), old_idx);
if (ret)
goto out_fail;
}
/* And now for the dest. */
if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
/* force full log commit if subvolume involved. */
btrfs_set_log_full_commit(dest->fs_info, trans);
} else {
btrfs_pin_log_trans(dest);
dest_log_pinned = true;
ret = btrfs_insert_inode_ref(trans, root,
old_dentry->d_name.name,
old_dentry->d_name.len,
new_ino,
btrfs_ino(old_dir), new_idx);
if (ret)
goto out_fail;
}
/* Update inode version and ctime/mtime. */
inode_inc_iversion(old_dir);
inode_inc_iversion(new_dir);
inode_inc_iversion(old_inode);
inode_inc_iversion(new_inode);
old_dir->i_ctime = old_dir->i_mtime = ctime;
new_dir->i_ctime = new_dir->i_mtime = ctime;
old_inode->i_ctime = ctime;
new_inode->i_ctime = ctime;
if (old_dentry->d_parent != new_dentry->d_parent) {
btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
btrfs_record_unlink_dir(trans, new_dir, new_inode, 1);
}
/* src is a subvolume */
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
ret = btrfs_unlink_subvol(trans, root, old_dir,
root_objectid,
old_dentry->d_name.name,
old_dentry->d_name.len);
} else { /* src is an inode */
ret = __btrfs_unlink_inode(trans, root, old_dir,
old_dentry->d_inode,
old_dentry->d_name.name,
old_dentry->d_name.len);
if (!ret)
ret = btrfs_update_inode(trans, root, old_inode);
}
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto out_fail;
}
/* dest is a subvolume */
if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
root_objectid = BTRFS_I(new_inode)->root->root_key.objectid;
ret = btrfs_unlink_subvol(trans, dest, new_dir,
root_objectid,
new_dentry->d_name.name,
new_dentry->d_name.len);
} else { /* dest is an inode */
ret = __btrfs_unlink_inode(trans, dest, new_dir,
new_dentry->d_inode,
new_dentry->d_name.name,
new_dentry->d_name.len);
if (!ret)
ret = btrfs_update_inode(trans, dest, new_inode);
}
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto out_fail;
}
ret = btrfs_add_link(trans, new_dir, old_inode,
new_dentry->d_name.name,
new_dentry->d_name.len, 0, old_idx);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto out_fail;
}
ret = btrfs_add_link(trans, old_dir, new_inode,
old_dentry->d_name.name,
old_dentry->d_name.len, 0, new_idx);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto out_fail;
}
if (old_inode->i_nlink == 1)
BTRFS_I(old_inode)->dir_index = old_idx;
if (new_inode->i_nlink == 1)
BTRFS_I(new_inode)->dir_index = new_idx;
if (root_log_pinned) {
parent = new_dentry->d_parent;
btrfs_log_new_name(trans, old_inode, old_dir, parent);
btrfs_end_log_trans(root);
root_log_pinned = false;
}
if (dest_log_pinned) {
parent = old_dentry->d_parent;
btrfs_log_new_name(trans, new_inode, new_dir, parent);
btrfs_end_log_trans(dest);
dest_log_pinned = false;
}
out_fail:
/*
* If we have pinned a log and an error happened, we unpin tasks
* trying to sync the log and force them to fallback to a transaction
* commit if the log currently contains any of the inodes involved in
* this rename operation (to ensure we do not persist a log with an
* inconsistent state for any of these inodes or leading to any
* inconsistencies when replayed). If the transaction was aborted, the
* abortion reason is propagated to userspace when attempting to commit
* the transaction. If the log does not contain any of these inodes, we
* allow the tasks to sync it.
*/
if (ret && (root_log_pinned || dest_log_pinned)) {
if (btrfs_inode_in_log(old_dir, root->fs_info->generation) ||
btrfs_inode_in_log(new_dir, root->fs_info->generation) ||
btrfs_inode_in_log(old_inode, root->fs_info->generation) ||
(new_inode &&
btrfs_inode_in_log(new_inode, root->fs_info->generation)))
btrfs_set_log_full_commit(root->fs_info, trans);
if (root_log_pinned) {
btrfs_end_log_trans(root);
root_log_pinned = false;
}
if (dest_log_pinned) {
btrfs_end_log_trans(dest);
dest_log_pinned = false;
}
}
ret = btrfs_end_transaction(trans, root);
out_notrans:
if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
up_read(&dest->fs_info->subvol_sem);
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
up_read(&root->fs_info->subvol_sem);
return ret;
}
static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *dir,
struct dentry *dentry)
{
int ret;
struct inode *inode;
u64 objectid;
u64 index;
ret = btrfs_find_free_ino(root, &objectid);
if (ret)
return ret;
inode = btrfs_new_inode(trans, root, dir,
dentry->d_name.name,
dentry->d_name.len,
btrfs_ino(dir),
objectid,
S_IFCHR | WHITEOUT_MODE,
&index);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
return ret;
}
inode->i_op = &btrfs_special_inode_operations;
init_special_inode(inode, inode->i_mode,
WHITEOUT_DEV);
ret = btrfs_init_inode_security(trans, inode, dir,
&dentry->d_name);
if (ret)
goto out;
ret = btrfs_add_nondir(trans, dir, dentry,
inode, 0, index);
if (ret)
goto out;
ret = btrfs_update_inode(trans, root, inode);
out:
unlock_new_inode(inode);
if (ret)
inode_dec_link_count(inode);
iput(inode);
return ret;
}
static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
struct btrfs_trans_handle *trans;
unsigned int trans_num_items;
struct btrfs_root *root = BTRFS_I(old_dir)->root;
struct btrfs_root *dest = BTRFS_I(new_dir)->root;
struct inode *new_inode = d_inode(new_dentry);
struct inode *old_inode = d_inode(old_dentry);
u64 index = 0;
u64 root_objectid;
int ret;
u64 old_ino = btrfs_ino(old_inode);
bool log_pinned = false;
if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
return -EPERM;
@@ -9449,15 +9744,21 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
* We want to reserve the absolute worst case amount of items. So if
* both inodes are subvols and we need to unlink them then that would
* require 4 item modifications, but if they are both normal inodes it
* would require 5 item modifications, so we'll assume their normal
* would require 5 item modifications, so we'll assume they are normal
* inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items
* should cover the worst case number of items we'll modify.
* If our rename has the whiteout flag, we need more 5 units for the
* new inode (1 inode item, 1 inode ref, 2 dir items and 1 xattr item
* when selinux is enabled).
*/
trans = btrfs_start_transaction(root, 11);
trans_num_items = 11;
if (flags & RENAME_WHITEOUT)
trans_num_items += 5;
trans = btrfs_start_transaction(root, trans_num_items);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out_notrans;
}
ret = PTR_ERR(trans);
goto out_notrans;
}
if (dest != root)
btrfs_record_root_in_trans(trans, dest);
@@ -9471,6 +9772,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
/* force full log commit if subvolume involved. */
btrfs_set_log_full_commit(root->fs_info, trans);
} else {
btrfs_pin_log_trans(root);
log_pinned = true;
ret = btrfs_insert_inode_ref(trans, dest,
new_dentry->d_name.name,
new_dentry->d_name.len,
@@ -9478,14 +9781,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
btrfs_ino(new_dir), index);
if (ret)
goto out_fail;
/*
* this is an ugly little race, but the rename is required
* to make sure that if we crash, the inode is either at the
* old name or the new one. pinning the log transaction lets
* us make sure we don't allow a log commit to come in after
* we unlink the name but before we add the new name back in.
*/
btrfs_pin_log_trans(root);
}
inode_inc_iversion(old_dir);
@@ -9552,12 +9847,46 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (old_inode->i_nlink == 1)
BTRFS_I(old_inode)->dir_index = index;
if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
if (log_pinned) {
struct dentry *parent = new_dentry->d_parent;
btrfs_log_new_name(trans, old_inode, old_dir, parent);
btrfs_end_log_trans(root);
log_pinned = false;
}
if (flags & RENAME_WHITEOUT) {
ret = btrfs_whiteout_for_rename(trans, root, old_dir,
old_dentry);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto out_fail;
}
}
out_fail:
/*
* If we have pinned the log and an error happened, we unpin tasks
* trying to sync the log and force them to fallback to a transaction
* commit if the log currently contains any of the inodes involved in
* this rename operation (to ensure we do not persist a log with an
* inconsistent state for any of these inodes or leading to any
* inconsistencies when replayed). If the transaction was aborted, the
* abortion reason is propagated to userspace when attempting to commit
* the transaction. If the log does not contain any of these inodes, we
* allow the tasks to sync it.
*/
if (ret && log_pinned) {
if (btrfs_inode_in_log(old_dir, root->fs_info->generation) ||
btrfs_inode_in_log(new_dir, root->fs_info->generation) ||
btrfs_inode_in_log(old_inode, root->fs_info->generation) ||
(new_inode &&
btrfs_inode_in_log(new_inode, root->fs_info->generation)))
btrfs_set_log_full_commit(root->fs_info, trans);
btrfs_end_log_trans(root);
log_pinned = false;
}
btrfs_end_transaction(trans, root);
out_notrans:
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
@@ -9570,10 +9899,14 @@ static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
if (flags & ~RENAME_NOREPLACE)
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
return -EINVAL;
return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry);
if (flags & RENAME_EXCHANGE)
return btrfs_rename_exchange(old_dir, old_dentry, new_dir,
new_dentry);
return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
}
static void btrfs_run_delalloc_work(struct btrfs_work *work)
@@ -9942,6 +10275,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
btrfs_end_transaction(trans, root);
break;
}
btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
last_alloc = ins.offset;
ret = insert_reserved_file_extent(trans, inode,