Merge branch 'for-chris-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/fdmanana/linux into for-linus-4.7
Signed-off-by: Chris Mason <clm@fb.com>
This commit is contained in:
@@ -196,6 +196,16 @@ struct btrfs_inode {
|
||||
struct list_head delayed_iput;
|
||||
long delayed_iput_count;
|
||||
|
||||
/*
|
||||
* To avoid races between lockless (i_mutex not held) direct IO writes
|
||||
* and concurrent fsync requests. Direct IO writes must acquire read
|
||||
* access on this semaphore for creating an extent map and its
|
||||
* corresponding ordered extent. The fast fsync path must acquire write
|
||||
* access on this semaphore before it collects ordered extents and
|
||||
* extent maps.
|
||||
*/
|
||||
struct rw_semaphore dio_sem;
|
||||
|
||||
struct inode vfs_inode;
|
||||
};
|
||||
|
||||
|
@@ -618,6 +618,27 @@ struct btrfs_block_group_cache {
|
||||
|
||||
struct btrfs_io_ctl io_ctl;
|
||||
|
||||
/*
|
||||
* Incremented when doing extent allocations and holding a read lock
|
||||
* on the space_info's groups_sem semaphore.
|
||||
* Decremented when an ordered extent that represents an IO against this
|
||||
* block group's range is created (after it's added to its inode's
|
||||
* root's list of ordered extents) or immediately after the allocation
|
||||
* if it's a metadata extent or fallocate extent (for these cases we
|
||||
* don't create ordered extents).
|
||||
*/
|
||||
atomic_t reservations;
|
||||
|
||||
/*
|
||||
* Incremented while holding the spinlock *lock* by a task checking if
|
||||
* it can perform a nocow write (incremented if the value for the *ro*
|
||||
* field is 0). Decremented by such tasks once they create an ordered
|
||||
* extent or before that if some error happens before reaching that step.
|
||||
* This is to prevent races between block group relocation and nocow
|
||||
* writes through direct IO.
|
||||
*/
|
||||
atomic_t nocow_writers;
|
||||
|
||||
/* Lock for free space tree operations. */
|
||||
struct mutex free_space_lock;
|
||||
|
||||
@@ -2487,6 +2508,12 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root);
|
||||
int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root);
|
||||
void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
|
||||
const u64 start);
|
||||
void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg);
|
||||
bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr);
|
||||
void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr);
|
||||
void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg);
|
||||
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
|
||||
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, unsigned long count);
|
||||
|
@@ -385,7 +385,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, char *tgtdev_name,
|
||||
if (ret)
|
||||
btrfs_err(fs_info, "kobj add dev failed %d\n", ret);
|
||||
|
||||
btrfs_wait_ordered_roots(fs_info, -1);
|
||||
btrfs_wait_ordered_roots(root->fs_info, -1, 0, (u64)-1);
|
||||
|
||||
/* force writing the updated state information to disk */
|
||||
trans = btrfs_start_transaction(root, 0);
|
||||
@@ -504,7 +504,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
||||
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
|
||||
return ret;
|
||||
}
|
||||
btrfs_wait_ordered_roots(root->fs_info, -1);
|
||||
btrfs_wait_ordered_roots(root->fs_info, -1, 0, (u64)-1);
|
||||
|
||||
trans = btrfs_start_transaction(root, 0);
|
||||
if (IS_ERR(trans)) {
|
||||
|
@@ -3824,6 +3824,59 @@ int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
|
||||
return readonly;
|
||||
}
|
||||
|
||||
bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
|
||||
{
|
||||
struct btrfs_block_group_cache *bg;
|
||||
bool ret = true;
|
||||
|
||||
bg = btrfs_lookup_block_group(fs_info, bytenr);
|
||||
if (!bg)
|
||||
return false;
|
||||
|
||||
spin_lock(&bg->lock);
|
||||
if (bg->ro)
|
||||
ret = false;
|
||||
else
|
||||
atomic_inc(&bg->nocow_writers);
|
||||
spin_unlock(&bg->lock);
|
||||
|
||||
/* no put on block group, done by btrfs_dec_nocow_writers */
|
||||
if (!ret)
|
||||
btrfs_put_block_group(bg);
|
||||
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
|
||||
{
|
||||
struct btrfs_block_group_cache *bg;
|
||||
|
||||
bg = btrfs_lookup_block_group(fs_info, bytenr);
|
||||
ASSERT(bg);
|
||||
if (atomic_dec_and_test(&bg->nocow_writers))
|
||||
wake_up_atomic_t(&bg->nocow_writers);
|
||||
/*
|
||||
* Once for our lookup and once for the lookup done by a previous call
|
||||
* to btrfs_inc_nocow_writers()
|
||||
*/
|
||||
btrfs_put_block_group(bg);
|
||||
btrfs_put_block_group(bg);
|
||||
}
|
||||
|
||||
static int btrfs_wait_nocow_writers_atomic_t(atomic_t *a)
|
||||
{
|
||||
schedule();
|
||||
return 0;
|
||||
}
|
||||
|
||||
void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg)
|
||||
{
|
||||
wait_on_atomic_t(&bg->nocow_writers,
|
||||
btrfs_wait_nocow_writers_atomic_t,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
static const char *alloc_name(u64 flags)
|
||||
{
|
||||
switch (flags) {
|
||||
@@ -4141,7 +4194,7 @@ commit_trans:
|
||||
|
||||
if (need_commit > 0) {
|
||||
btrfs_start_delalloc_roots(fs_info, 0, -1);
|
||||
btrfs_wait_ordered_roots(fs_info, -1);
|
||||
btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
|
||||
}
|
||||
|
||||
trans = btrfs_join_transaction(root);
|
||||
@@ -4583,7 +4636,8 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
|
||||
*/
|
||||
btrfs_start_delalloc_roots(root->fs_info, 0, nr_items);
|
||||
if (!current->journal_info)
|
||||
btrfs_wait_ordered_roots(root->fs_info, nr_items);
|
||||
btrfs_wait_ordered_roots(root->fs_info, nr_items,
|
||||
0, (u64)-1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4632,7 +4686,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
|
||||
if (trans)
|
||||
return;
|
||||
if (wait_ordered)
|
||||
btrfs_wait_ordered_roots(root->fs_info, items);
|
||||
btrfs_wait_ordered_roots(root->fs_info, items,
|
||||
0, (u64)-1);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -4671,7 +4726,8 @@ skip_async:
|
||||
|
||||
loops++;
|
||||
if (wait_ordered && !trans) {
|
||||
btrfs_wait_ordered_roots(root->fs_info, items);
|
||||
btrfs_wait_ordered_roots(root->fs_info, items,
|
||||
0, (u64)-1);
|
||||
} else {
|
||||
time_left = schedule_timeout_killable(1);
|
||||
if (time_left)
|
||||
@@ -6172,6 +6228,57 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
btrfs_inc_block_group_reservations(struct btrfs_block_group_cache *bg)
|
||||
{
|
||||
atomic_inc(&bg->reservations);
|
||||
}
|
||||
|
||||
void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
|
||||
const u64 start)
|
||||
{
|
||||
struct btrfs_block_group_cache *bg;
|
||||
|
||||
bg = btrfs_lookup_block_group(fs_info, start);
|
||||
ASSERT(bg);
|
||||
if (atomic_dec_and_test(&bg->reservations))
|
||||
wake_up_atomic_t(&bg->reservations);
|
||||
btrfs_put_block_group(bg);
|
||||
}
|
||||
|
||||
static int btrfs_wait_bg_reservations_atomic_t(atomic_t *a)
|
||||
{
|
||||
schedule();
|
||||
return 0;
|
||||
}
|
||||
|
||||
void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
|
||||
{
|
||||
struct btrfs_space_info *space_info = bg->space_info;
|
||||
|
||||
ASSERT(bg->ro);
|
||||
|
||||
if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Our block group is read only but before we set it to read only,
|
||||
* some task might have had allocated an extent from it already, but it
|
||||
* has not yet created a respective ordered extent (and added it to a
|
||||
* root's list of ordered extents).
|
||||
* Therefore wait for any task currently allocating extents, since the
|
||||
* block group's reservations counter is incremented while a read lock
|
||||
* on the groups' semaphore is held and decremented after releasing
|
||||
* the read access on that semaphore and creating the ordered extent.
|
||||
*/
|
||||
down_write(&space_info->groups_sem);
|
||||
up_write(&space_info->groups_sem);
|
||||
|
||||
wait_on_atomic_t(&bg->reservations,
|
||||
btrfs_wait_bg_reservations_atomic_t,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_update_reserved_bytes - update the block_group and space info counters
|
||||
* @cache: The cache we are manipulating
|
||||
@@ -7430,6 +7537,7 @@ checks:
|
||||
btrfs_add_free_space(block_group, offset, num_bytes);
|
||||
goto loop;
|
||||
}
|
||||
btrfs_inc_block_group_reservations(block_group);
|
||||
|
||||
/* we are all good, lets return */
|
||||
ins->objectid = search_start;
|
||||
@@ -7611,8 +7719,10 @@ again:
|
||||
WARN_ON(num_bytes < root->sectorsize);
|
||||
ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
|
||||
flags, delalloc);
|
||||
|
||||
if (ret == -ENOSPC) {
|
||||
if (!ret && !is_data) {
|
||||
btrfs_dec_block_group_reservations(root->fs_info,
|
||||
ins->objectid);
|
||||
} else if (ret == -ENOSPC) {
|
||||
if (!final_tried && ins->offset) {
|
||||
num_bytes = min(num_bytes >> 1, ins->offset);
|
||||
num_bytes = round_down(num_bytes, root->sectorsize);
|
||||
|
466
fs/btrfs/inode.c
466
fs/btrfs/inode.c
@@ -824,6 +824,7 @@ retry:
|
||||
async_extent->ram_size - 1, 0);
|
||||
goto out_free_reserve;
|
||||
}
|
||||
btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
|
||||
|
||||
/*
|
||||
* clear dirty, set writeback and unlock the pages.
|
||||
@@ -861,6 +862,7 @@ retry:
|
||||
}
|
||||
return;
|
||||
out_free_reserve:
|
||||
btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
|
||||
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
|
||||
out_free:
|
||||
extent_clear_unlock_delalloc(inode, async_extent->start,
|
||||
@@ -1038,6 +1040,8 @@ static noinline int cow_file_range(struct inode *inode,
|
||||
goto out_drop_extent_cache;
|
||||
}
|
||||
|
||||
btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
|
||||
|
||||
if (disk_num_bytes < cur_alloc_size)
|
||||
break;
|
||||
|
||||
@@ -1066,6 +1070,7 @@ out:
|
||||
out_drop_extent_cache:
|
||||
btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
|
||||
out_reserve:
|
||||
btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
|
||||
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
|
||||
out_unlock:
|
||||
extent_clear_unlock_delalloc(inode, start, end, locked_page,
|
||||
@@ -1377,6 +1382,9 @@ next_slot:
|
||||
*/
|
||||
if (csum_exist_in_range(root, disk_bytenr, num_bytes))
|
||||
goto out_check;
|
||||
if (!btrfs_inc_nocow_writers(root->fs_info,
|
||||
disk_bytenr))
|
||||
goto out_check;
|
||||
nocow = 1;
|
||||
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
extent_end = found_key.offset +
|
||||
@@ -1391,6 +1399,9 @@ out_check:
|
||||
path->slots[0]++;
|
||||
if (!nolock && nocow)
|
||||
btrfs_end_write_no_snapshoting(root);
|
||||
if (nocow)
|
||||
btrfs_dec_nocow_writers(root->fs_info,
|
||||
disk_bytenr);
|
||||
goto next_slot;
|
||||
}
|
||||
if (!nocow) {
|
||||
@@ -1411,6 +1422,9 @@ out_check:
|
||||
if (ret) {
|
||||
if (!nolock && nocow)
|
||||
btrfs_end_write_no_snapshoting(root);
|
||||
if (nocow)
|
||||
btrfs_dec_nocow_writers(root->fs_info,
|
||||
disk_bytenr);
|
||||
goto error;
|
||||
}
|
||||
cow_start = (u64)-1;
|
||||
@@ -1453,6 +1467,8 @@ out_check:
|
||||
|
||||
ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr,
|
||||
num_bytes, num_bytes, type);
|
||||
if (nocow)
|
||||
btrfs_dec_nocow_writers(root->fs_info, disk_bytenr);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
|
||||
if (root->root_key.objectid ==
|
||||
@@ -7129,6 +7145,43 @@ out:
|
||||
return em;
|
||||
}
|
||||
|
||||
static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
|
||||
const u64 start,
|
||||
const u64 len,
|
||||
const u64 orig_start,
|
||||
const u64 block_start,
|
||||
const u64 block_len,
|
||||
const u64 orig_block_len,
|
||||
const u64 ram_bytes,
|
||||
const int type)
|
||||
{
|
||||
struct extent_map *em = NULL;
|
||||
int ret;
|
||||
|
||||
down_read(&BTRFS_I(inode)->dio_sem);
|
||||
if (type != BTRFS_ORDERED_NOCOW) {
|
||||
em = create_pinned_em(inode, start, len, orig_start,
|
||||
block_start, block_len, orig_block_len,
|
||||
ram_bytes, type);
|
||||
if (IS_ERR(em))
|
||||
goto out;
|
||||
}
|
||||
ret = btrfs_add_ordered_extent_dio(inode, start, block_start,
|
||||
len, block_len, type);
|
||||
if (ret) {
|
||||
if (em) {
|
||||
free_extent_map(em);
|
||||
btrfs_drop_extent_cache(inode, start,
|
||||
start + len - 1, 0);
|
||||
}
|
||||
em = ERR_PTR(ret);
|
||||
}
|
||||
out:
|
||||
up_read(&BTRFS_I(inode)->dio_sem);
|
||||
|
||||
return em;
|
||||
}
|
||||
|
||||
static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
|
||||
u64 start, u64 len)
|
||||
{
|
||||
@@ -7144,41 +7197,13 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
/*
|
||||
* Create the ordered extent before the extent map. This is to avoid
|
||||
* races with the fast fsync path that would lead to it logging file
|
||||
* extent items that point to disk extents that were not yet written to.
|
||||
* The fast fsync path collects ordered extents into a local list and
|
||||
* then collects all the new extent maps, so we must create the ordered
|
||||
* extent first and make sure the fast fsync path collects any new
|
||||
* ordered extents after collecting new extent maps as well.
|
||||
* The fsync path simply can not rely on inode_dio_wait() because it
|
||||
* causes deadlock with AIO.
|
||||
*/
|
||||
ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
|
||||
ins.offset, ins.offset, 0);
|
||||
if (ret) {
|
||||
em = btrfs_create_dio_extent(inode, start, ins.offset, start,
|
||||
ins.objectid, ins.offset, ins.offset,
|
||||
ins.offset, 0);
|
||||
btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
|
||||
if (IS_ERR(em))
|
||||
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
|
||||
ins.offset, ins.offset, ins.offset, 0);
|
||||
if (IS_ERR(em)) {
|
||||
struct btrfs_ordered_extent *oe;
|
||||
|
||||
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
|
||||
oe = btrfs_lookup_ordered_extent(inode, start);
|
||||
ASSERT(oe);
|
||||
if (WARN_ON(!oe))
|
||||
return em;
|
||||
set_bit(BTRFS_ORDERED_IOERR, &oe->flags);
|
||||
set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags);
|
||||
btrfs_remove_ordered_extent(inode, oe);
|
||||
/* Once for our lookup and once for the ordered extents tree. */
|
||||
btrfs_put_ordered_extent(oe);
|
||||
btrfs_put_ordered_extent(oe);
|
||||
}
|
||||
return em;
|
||||
}
|
||||
|
||||
@@ -7650,24 +7675,21 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
|
||||
block_start = em->block_start + (start - em->start);
|
||||
|
||||
if (can_nocow_extent(inode, start, &len, &orig_start,
|
||||
&orig_block_len, &ram_bytes) == 1) {
|
||||
&orig_block_len, &ram_bytes) == 1 &&
|
||||
btrfs_inc_nocow_writers(root->fs_info, block_start)) {
|
||||
struct extent_map *em2;
|
||||
|
||||
em2 = btrfs_create_dio_extent(inode, start, len,
|
||||
orig_start, block_start,
|
||||
len, orig_block_len,
|
||||
ram_bytes, type);
|
||||
btrfs_dec_nocow_writers(root->fs_info, block_start);
|
||||
if (type == BTRFS_ORDERED_PREALLOC) {
|
||||
free_extent_map(em);
|
||||
em = create_pinned_em(inode, start, len,
|
||||
orig_start,
|
||||
block_start, len,
|
||||
orig_block_len,
|
||||
ram_bytes, type);
|
||||
if (IS_ERR(em)) {
|
||||
ret = PTR_ERR(em);
|
||||
goto unlock_err;
|
||||
}
|
||||
em = em2;
|
||||
}
|
||||
|
||||
ret = btrfs_add_ordered_extent_dio(inode, start,
|
||||
block_start, len, len, type);
|
||||
if (ret) {
|
||||
free_extent_map(em);
|
||||
if (em2 && IS_ERR(em2)) {
|
||||
ret = PTR_ERR(em2);
|
||||
goto unlock_err;
|
||||
}
|
||||
goto unlock;
|
||||
@@ -9230,6 +9252,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
|
||||
INIT_LIST_HEAD(&ei->delalloc_inodes);
|
||||
INIT_LIST_HEAD(&ei->delayed_iput);
|
||||
RB_CLEAR_NODE(&ei->rb_node);
|
||||
init_rwsem(&ei->dio_sem);
|
||||
|
||||
return inode;
|
||||
}
|
||||
@@ -9387,18 +9410,290 @@ static int btrfs_getattr(struct vfsmount *mnt,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
|
||||
struct inode *new_dir, struct dentry *new_dentry)
|
||||
static int btrfs_rename_exchange(struct inode *old_dir,
|
||||
struct dentry *old_dentry,
|
||||
struct inode *new_dir,
|
||||
struct dentry *new_dentry)
|
||||
{
|
||||
struct btrfs_trans_handle *trans;
|
||||
struct btrfs_root *root = BTRFS_I(old_dir)->root;
|
||||
struct btrfs_root *dest = BTRFS_I(new_dir)->root;
|
||||
struct inode *new_inode = new_dentry->d_inode;
|
||||
struct inode *old_inode = old_dentry->d_inode;
|
||||
struct timespec ctime = CURRENT_TIME;
|
||||
struct dentry *parent;
|
||||
u64 old_ino = btrfs_ino(old_inode);
|
||||
u64 new_ino = btrfs_ino(new_inode);
|
||||
u64 old_idx = 0;
|
||||
u64 new_idx = 0;
|
||||
u64 root_objectid;
|
||||
int ret;
|
||||
bool root_log_pinned = false;
|
||||
bool dest_log_pinned = false;
|
||||
|
||||
/* we only allow rename subvolume link between subvolumes */
|
||||
if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
|
||||
return -EXDEV;
|
||||
|
||||
/* close the race window with snapshot create/destroy ioctl */
|
||||
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
|
||||
down_read(&root->fs_info->subvol_sem);
|
||||
if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
|
||||
down_read(&dest->fs_info->subvol_sem);
|
||||
|
||||
/*
|
||||
* We want to reserve the absolute worst case amount of items. So if
|
||||
* both inodes are subvols and we need to unlink them then that would
|
||||
* require 4 item modifications, but if they are both normal inodes it
|
||||
* would require 5 item modifications, so we'll assume their normal
|
||||
* inodes. So 5 * 2 is 10, plus 2 for the new links, so 12 total items
|
||||
* should cover the worst case number of items we'll modify.
|
||||
*/
|
||||
trans = btrfs_start_transaction(root, 12);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
goto out_notrans;
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to find a free sequence number both in the source and
|
||||
* in the destination directory for the exchange.
|
||||
*/
|
||||
ret = btrfs_set_inode_index(new_dir, &old_idx);
|
||||
if (ret)
|
||||
goto out_fail;
|
||||
ret = btrfs_set_inode_index(old_dir, &new_idx);
|
||||
if (ret)
|
||||
goto out_fail;
|
||||
|
||||
BTRFS_I(old_inode)->dir_index = 0ULL;
|
||||
BTRFS_I(new_inode)->dir_index = 0ULL;
|
||||
|
||||
/* Reference for the source. */
|
||||
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
|
||||
/* force full log commit if subvolume involved. */
|
||||
btrfs_set_log_full_commit(root->fs_info, trans);
|
||||
} else {
|
||||
btrfs_pin_log_trans(root);
|
||||
root_log_pinned = true;
|
||||
ret = btrfs_insert_inode_ref(trans, dest,
|
||||
new_dentry->d_name.name,
|
||||
new_dentry->d_name.len,
|
||||
old_ino,
|
||||
btrfs_ino(new_dir), old_idx);
|
||||
if (ret)
|
||||
goto out_fail;
|
||||
}
|
||||
|
||||
/* And now for the dest. */
|
||||
if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
|
||||
/* force full log commit if subvolume involved. */
|
||||
btrfs_set_log_full_commit(dest->fs_info, trans);
|
||||
} else {
|
||||
btrfs_pin_log_trans(dest);
|
||||
dest_log_pinned = true;
|
||||
ret = btrfs_insert_inode_ref(trans, root,
|
||||
old_dentry->d_name.name,
|
||||
old_dentry->d_name.len,
|
||||
new_ino,
|
||||
btrfs_ino(old_dir), new_idx);
|
||||
if (ret)
|
||||
goto out_fail;
|
||||
}
|
||||
|
||||
/* Update inode version and ctime/mtime. */
|
||||
inode_inc_iversion(old_dir);
|
||||
inode_inc_iversion(new_dir);
|
||||
inode_inc_iversion(old_inode);
|
||||
inode_inc_iversion(new_inode);
|
||||
old_dir->i_ctime = old_dir->i_mtime = ctime;
|
||||
new_dir->i_ctime = new_dir->i_mtime = ctime;
|
||||
old_inode->i_ctime = ctime;
|
||||
new_inode->i_ctime = ctime;
|
||||
|
||||
if (old_dentry->d_parent != new_dentry->d_parent) {
|
||||
btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
|
||||
btrfs_record_unlink_dir(trans, new_dir, new_inode, 1);
|
||||
}
|
||||
|
||||
/* src is a subvolume */
|
||||
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
|
||||
root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
|
||||
ret = btrfs_unlink_subvol(trans, root, old_dir,
|
||||
root_objectid,
|
||||
old_dentry->d_name.name,
|
||||
old_dentry->d_name.len);
|
||||
} else { /* src is an inode */
|
||||
ret = __btrfs_unlink_inode(trans, root, old_dir,
|
||||
old_dentry->d_inode,
|
||||
old_dentry->d_name.name,
|
||||
old_dentry->d_name.len);
|
||||
if (!ret)
|
||||
ret = btrfs_update_inode(trans, root, old_inode);
|
||||
}
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, root, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
|
||||
/* dest is a subvolume */
|
||||
if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
|
||||
root_objectid = BTRFS_I(new_inode)->root->root_key.objectid;
|
||||
ret = btrfs_unlink_subvol(trans, dest, new_dir,
|
||||
root_objectid,
|
||||
new_dentry->d_name.name,
|
||||
new_dentry->d_name.len);
|
||||
} else { /* dest is an inode */
|
||||
ret = __btrfs_unlink_inode(trans, dest, new_dir,
|
||||
new_dentry->d_inode,
|
||||
new_dentry->d_name.name,
|
||||
new_dentry->d_name.len);
|
||||
if (!ret)
|
||||
ret = btrfs_update_inode(trans, dest, new_inode);
|
||||
}
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, root, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
|
||||
ret = btrfs_add_link(trans, new_dir, old_inode,
|
||||
new_dentry->d_name.name,
|
||||
new_dentry->d_name.len, 0, old_idx);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, root, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
|
||||
ret = btrfs_add_link(trans, old_dir, new_inode,
|
||||
old_dentry->d_name.name,
|
||||
old_dentry->d_name.len, 0, new_idx);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, root, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
|
||||
if (old_inode->i_nlink == 1)
|
||||
BTRFS_I(old_inode)->dir_index = old_idx;
|
||||
if (new_inode->i_nlink == 1)
|
||||
BTRFS_I(new_inode)->dir_index = new_idx;
|
||||
|
||||
if (root_log_pinned) {
|
||||
parent = new_dentry->d_parent;
|
||||
btrfs_log_new_name(trans, old_inode, old_dir, parent);
|
||||
btrfs_end_log_trans(root);
|
||||
root_log_pinned = false;
|
||||
}
|
||||
if (dest_log_pinned) {
|
||||
parent = old_dentry->d_parent;
|
||||
btrfs_log_new_name(trans, new_inode, new_dir, parent);
|
||||
btrfs_end_log_trans(dest);
|
||||
dest_log_pinned = false;
|
||||
}
|
||||
out_fail:
|
||||
/*
|
||||
* If we have pinned a log and an error happened, we unpin tasks
|
||||
* trying to sync the log and force them to fallback to a transaction
|
||||
* commit if the log currently contains any of the inodes involved in
|
||||
* this rename operation (to ensure we do not persist a log with an
|
||||
* inconsistent state for any of these inodes or leading to any
|
||||
* inconsistencies when replayed). If the transaction was aborted, the
|
||||
* abortion reason is propagated to userspace when attempting to commit
|
||||
* the transaction. If the log does not contain any of these inodes, we
|
||||
* allow the tasks to sync it.
|
||||
*/
|
||||
if (ret && (root_log_pinned || dest_log_pinned)) {
|
||||
if (btrfs_inode_in_log(old_dir, root->fs_info->generation) ||
|
||||
btrfs_inode_in_log(new_dir, root->fs_info->generation) ||
|
||||
btrfs_inode_in_log(old_inode, root->fs_info->generation) ||
|
||||
(new_inode &&
|
||||
btrfs_inode_in_log(new_inode, root->fs_info->generation)))
|
||||
btrfs_set_log_full_commit(root->fs_info, trans);
|
||||
|
||||
if (root_log_pinned) {
|
||||
btrfs_end_log_trans(root);
|
||||
root_log_pinned = false;
|
||||
}
|
||||
if (dest_log_pinned) {
|
||||
btrfs_end_log_trans(dest);
|
||||
dest_log_pinned = false;
|
||||
}
|
||||
}
|
||||
ret = btrfs_end_transaction(trans, root);
|
||||
out_notrans:
|
||||
if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
|
||||
up_read(&dest->fs_info->subvol_sem);
|
||||
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
|
||||
up_read(&root->fs_info->subvol_sem);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct inode *dir,
|
||||
struct dentry *dentry)
|
||||
{
|
||||
int ret;
|
||||
struct inode *inode;
|
||||
u64 objectid;
|
||||
u64 index;
|
||||
|
||||
ret = btrfs_find_free_ino(root, &objectid);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
inode = btrfs_new_inode(trans, root, dir,
|
||||
dentry->d_name.name,
|
||||
dentry->d_name.len,
|
||||
btrfs_ino(dir),
|
||||
objectid,
|
||||
S_IFCHR | WHITEOUT_MODE,
|
||||
&index);
|
||||
|
||||
if (IS_ERR(inode)) {
|
||||
ret = PTR_ERR(inode);
|
||||
return ret;
|
||||
}
|
||||
|
||||
inode->i_op = &btrfs_special_inode_operations;
|
||||
init_special_inode(inode, inode->i_mode,
|
||||
WHITEOUT_DEV);
|
||||
|
||||
ret = btrfs_init_inode_security(trans, inode, dir,
|
||||
&dentry->d_name);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = btrfs_add_nondir(trans, dir, dentry,
|
||||
inode, 0, index);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = btrfs_update_inode(trans, root, inode);
|
||||
out:
|
||||
unlock_new_inode(inode);
|
||||
if (ret)
|
||||
inode_dec_link_count(inode);
|
||||
iput(inode);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
|
||||
struct inode *new_dir, struct dentry *new_dentry,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct btrfs_trans_handle *trans;
|
||||
unsigned int trans_num_items;
|
||||
struct btrfs_root *root = BTRFS_I(old_dir)->root;
|
||||
struct btrfs_root *dest = BTRFS_I(new_dir)->root;
|
||||
struct inode *new_inode = d_inode(new_dentry);
|
||||
struct inode *old_inode = d_inode(old_dentry);
|
||||
u64 index = 0;
|
||||
u64 root_objectid;
|
||||
int ret;
|
||||
u64 old_ino = btrfs_ino(old_inode);
|
||||
bool log_pinned = false;
|
||||
|
||||
if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
|
||||
return -EPERM;
|
||||
@@ -9449,15 +9744,21 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
|
||||
* We want to reserve the absolute worst case amount of items. So if
|
||||
* both inodes are subvols and we need to unlink them then that would
|
||||
* require 4 item modifications, but if they are both normal inodes it
|
||||
* would require 5 item modifications, so we'll assume their normal
|
||||
* would require 5 item modifications, so we'll assume they are normal
|
||||
* inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items
|
||||
* should cover the worst case number of items we'll modify.
|
||||
* If our rename has the whiteout flag, we need more 5 units for the
|
||||
* new inode (1 inode item, 1 inode ref, 2 dir items and 1 xattr item
|
||||
* when selinux is enabled).
|
||||
*/
|
||||
trans = btrfs_start_transaction(root, 11);
|
||||
trans_num_items = 11;
|
||||
if (flags & RENAME_WHITEOUT)
|
||||
trans_num_items += 5;
|
||||
trans = btrfs_start_transaction(root, trans_num_items);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
goto out_notrans;
|
||||
}
|
||||
ret = PTR_ERR(trans);
|
||||
goto out_notrans;
|
||||
}
|
||||
|
||||
if (dest != root)
|
||||
btrfs_record_root_in_trans(trans, dest);
|
||||
@@ -9471,6 +9772,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
|
||||
/* force full log commit if subvolume involved. */
|
||||
btrfs_set_log_full_commit(root->fs_info, trans);
|
||||
} else {
|
||||
btrfs_pin_log_trans(root);
|
||||
log_pinned = true;
|
||||
ret = btrfs_insert_inode_ref(trans, dest,
|
||||
new_dentry->d_name.name,
|
||||
new_dentry->d_name.len,
|
||||
@@ -9478,14 +9781,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
|
||||
btrfs_ino(new_dir), index);
|
||||
if (ret)
|
||||
goto out_fail;
|
||||
/*
|
||||
* this is an ugly little race, but the rename is required
|
||||
* to make sure that if we crash, the inode is either at the
|
||||
* old name or the new one. pinning the log transaction lets
|
||||
* us make sure we don't allow a log commit to come in after
|
||||
* we unlink the name but before we add the new name back in.
|
||||
*/
|
||||
btrfs_pin_log_trans(root);
|
||||
}
|
||||
|
||||
inode_inc_iversion(old_dir);
|
||||
@@ -9552,12 +9847,46 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
|
||||
if (old_inode->i_nlink == 1)
|
||||
BTRFS_I(old_inode)->dir_index = index;
|
||||
|
||||
if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
|
||||
if (log_pinned) {
|
||||
struct dentry *parent = new_dentry->d_parent;
|
||||
|
||||
btrfs_log_new_name(trans, old_inode, old_dir, parent);
|
||||
btrfs_end_log_trans(root);
|
||||
log_pinned = false;
|
||||
}
|
||||
|
||||
if (flags & RENAME_WHITEOUT) {
|
||||
ret = btrfs_whiteout_for_rename(trans, root, old_dir,
|
||||
old_dentry);
|
||||
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, root, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
}
|
||||
out_fail:
|
||||
/*
|
||||
* If we have pinned the log and an error happened, we unpin tasks
|
||||
* trying to sync the log and force them to fallback to a transaction
|
||||
* commit if the log currently contains any of the inodes involved in
|
||||
* this rename operation (to ensure we do not persist a log with an
|
||||
* inconsistent state for any of these inodes or leading to any
|
||||
* inconsistencies when replayed). If the transaction was aborted, the
|
||||
* abortion reason is propagated to userspace when attempting to commit
|
||||
* the transaction. If the log does not contain any of these inodes, we
|
||||
* allow the tasks to sync it.
|
||||
*/
|
||||
if (ret && log_pinned) {
|
||||
if (btrfs_inode_in_log(old_dir, root->fs_info->generation) ||
|
||||
btrfs_inode_in_log(new_dir, root->fs_info->generation) ||
|
||||
btrfs_inode_in_log(old_inode, root->fs_info->generation) ||
|
||||
(new_inode &&
|
||||
btrfs_inode_in_log(new_inode, root->fs_info->generation)))
|
||||
btrfs_set_log_full_commit(root->fs_info, trans);
|
||||
|
||||
btrfs_end_log_trans(root);
|
||||
log_pinned = false;
|
||||
}
|
||||
btrfs_end_transaction(trans, root);
|
||||
out_notrans:
|
||||
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
|
||||
@@ -9570,10 +9899,14 @@ static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
|
||||
struct inode *new_dir, struct dentry *new_dentry,
|
||||
unsigned int flags)
|
||||
{
|
||||
if (flags & ~RENAME_NOREPLACE)
|
||||
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
|
||||
return -EINVAL;
|
||||
|
||||
return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry);
|
||||
if (flags & RENAME_EXCHANGE)
|
||||
return btrfs_rename_exchange(old_dir, old_dentry, new_dir,
|
||||
new_dentry);
|
||||
|
||||
return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
|
||||
}
|
||||
|
||||
static void btrfs_run_delalloc_work(struct btrfs_work *work)
|
||||
@@ -9942,6 +10275,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
|
||||
btrfs_end_transaction(trans, root);
|
||||
break;
|
||||
}
|
||||
btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
|
||||
|
||||
last_alloc = ins.offset;
|
||||
ret = insert_reserved_file_extent(trans, inode,
|
||||
|
@@ -690,7 +690,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
|
||||
if (ret)
|
||||
goto dec_and_free;
|
||||
|
||||
btrfs_wait_ordered_extents(root, -1);
|
||||
btrfs_wait_ordered_extents(root, -1, 0, (u64)-1);
|
||||
|
||||
btrfs_init_block_rsv(&pending_snapshot->block_rsv,
|
||||
BTRFS_BLOCK_RSV_TEMP);
|
||||
|
@@ -661,14 +661,15 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
|
||||
* wait for all the ordered extents in a root. This is done when balancing
|
||||
* space between drives.
|
||||
*/
|
||||
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
|
||||
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
|
||||
const u64 range_start, const u64 range_len)
|
||||
{
|
||||
struct list_head splice, works;
|
||||
LIST_HEAD(splice);
|
||||
LIST_HEAD(skipped);
|
||||
LIST_HEAD(works);
|
||||
struct btrfs_ordered_extent *ordered, *next;
|
||||
int count = 0;
|
||||
|
||||
INIT_LIST_HEAD(&splice);
|
||||
INIT_LIST_HEAD(&works);
|
||||
const u64 range_end = range_start + range_len;
|
||||
|
||||
mutex_lock(&root->ordered_extent_mutex);
|
||||
spin_lock(&root->ordered_extent_lock);
|
||||
@@ -676,6 +677,14 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
|
||||
while (!list_empty(&splice) && nr) {
|
||||
ordered = list_first_entry(&splice, struct btrfs_ordered_extent,
|
||||
root_extent_list);
|
||||
|
||||
if (range_end <= ordered->start ||
|
||||
ordered->start + ordered->disk_len <= range_start) {
|
||||
list_move_tail(&ordered->root_extent_list, &skipped);
|
||||
cond_resched_lock(&root->ordered_extent_lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
list_move_tail(&ordered->root_extent_list,
|
||||
&root->ordered_extents);
|
||||
atomic_inc(&ordered->refs);
|
||||
@@ -694,6 +703,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
|
||||
nr--;
|
||||
count++;
|
||||
}
|
||||
list_splice_tail(&skipped, &root->ordered_extents);
|
||||
list_splice_tail(&splice, &root->ordered_extents);
|
||||
spin_unlock(&root->ordered_extent_lock);
|
||||
|
||||
@@ -708,7 +718,8 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
|
||||
return count;
|
||||
}
|
||||
|
||||
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
|
||||
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
|
||||
const u64 range_start, const u64 range_len)
|
||||
{
|
||||
struct btrfs_root *root;
|
||||
struct list_head splice;
|
||||
@@ -728,7 +739,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
|
||||
&fs_info->ordered_roots);
|
||||
spin_unlock(&fs_info->ordered_root_lock);
|
||||
|
||||
done = btrfs_wait_ordered_extents(root, nr);
|
||||
done = btrfs_wait_ordered_extents(root, nr,
|
||||
range_start, range_len);
|
||||
btrfs_put_fs_root(root);
|
||||
|
||||
spin_lock(&fs_info->ordered_root_lock);
|
||||
|
@@ -197,8 +197,10 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
|
||||
struct btrfs_ordered_extent *ordered);
|
||||
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
|
||||
u32 *sum, int len);
|
||||
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr);
|
||||
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr);
|
||||
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
|
||||
const u64 range_start, const u64 range_len);
|
||||
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
|
||||
const u64 range_start, const u64 range_len);
|
||||
void btrfs_get_logged_extents(struct inode *inode,
|
||||
struct list_head *logged_list,
|
||||
const loff_t start,
|
||||
|
@@ -4254,12 +4254,11 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
|
||||
btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu",
|
||||
rc->block_group->key.objectid, rc->block_group->flags);
|
||||
|
||||
ret = btrfs_start_delalloc_roots(fs_info, 0, -1);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
goto out;
|
||||
}
|
||||
btrfs_wait_ordered_roots(fs_info, -1);
|
||||
btrfs_wait_block_group_reservations(rc->block_group);
|
||||
btrfs_wait_nocow_writers(rc->block_group);
|
||||
btrfs_wait_ordered_roots(fs_info, -1,
|
||||
rc->block_group->key.objectid,
|
||||
rc->block_group->key.offset);
|
||||
|
||||
while (1) {
|
||||
mutex_lock(&fs_info->cleaner_mutex);
|
||||
|
@@ -1156,7 +1156,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
|
||||
return 0;
|
||||
}
|
||||
|
||||
btrfs_wait_ordered_roots(fs_info, -1);
|
||||
btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
|
||||
|
||||
trans = btrfs_attach_transaction_barrier(root);
|
||||
if (IS_ERR(trans)) {
|
||||
|
@@ -1907,7 +1907,7 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
|
||||
static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
|
||||
btrfs_wait_ordered_roots(fs_info, -1);
|
||||
btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
@@ -4141,6 +4141,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
|
||||
|
||||
INIT_LIST_HEAD(&extents);
|
||||
|
||||
down_write(&BTRFS_I(inode)->dio_sem);
|
||||
write_lock(&tree->lock);
|
||||
test_gen = root->fs_info->last_trans_committed;
|
||||
|
||||
@@ -4169,13 +4170,20 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
list_sort(NULL, &extents, extent_cmp);
|
||||
/*
|
||||
* Collect any new ordered extents within the range. This is to
|
||||
* prevent logging file extent items without waiting for the disk
|
||||
* location they point to being written. We do this only to deal
|
||||
* with races against concurrent lockless direct IO writes.
|
||||
*/
|
||||
btrfs_get_logged_extents(inode, logged_list, start, end);
|
||||
/*
|
||||
* Some ordered extents started by fsync might have completed
|
||||
* before we could collect them into the list logged_list, which
|
||||
* means they're gone, not in our logged_list nor in the inode's
|
||||
* ordered tree. We want the application/user space to know an
|
||||
* error happened while attempting to persist file data so that
|
||||
* it can take proper action. If such error happened, we leave
|
||||
* without writing to the log tree and the fsync must report the
|
||||
* file data write error and not commit the current transaction.
|
||||
*/
|
||||
ret = btrfs_inode_check_errors(inode);
|
||||
if (ret)
|
||||
ctx->io_err = ret;
|
||||
process:
|
||||
while (!list_empty(&extents)) {
|
||||
em = list_entry(extents.next, struct extent_map, list);
|
||||
@@ -4202,6 +4210,7 @@ process:
|
||||
}
|
||||
WARN_ON(!list_empty(&extents));
|
||||
write_unlock(&tree->lock);
|
||||
up_write(&BTRFS_I(inode)->dio_sem);
|
||||
|
||||
btrfs_release_path(path);
|
||||
return ret;
|
||||
@@ -4622,23 +4631,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
|
||||
|
||||
mutex_lock(&BTRFS_I(inode)->log_mutex);
|
||||
|
||||
/*
|
||||
* Collect ordered extents only if we are logging data. This is to
|
||||
* ensure a subsequent request to log this inode in LOG_INODE_ALL mode
|
||||
* will process the ordered extents if they still exists at the time,
|
||||
* because when we collect them we test and set for the flag
|
||||
* BTRFS_ORDERED_LOGGED to prevent multiple log requests to process the
|
||||
* same ordered extents. The consequence for the LOG_INODE_ALL log mode
|
||||
* not processing the ordered extents is that we end up logging the
|
||||
* corresponding file extent items, based on the extent maps in the
|
||||
* inode's extent_map_tree's modified_list, without logging the
|
||||
* respective checksums (since the may still be only attached to the
|
||||
* ordered extents and have not been inserted in the csum tree by
|
||||
* btrfs_finish_ordered_io() yet).
|
||||
*/
|
||||
if (inode_only == LOG_INODE_ALL)
|
||||
btrfs_get_logged_extents(inode, &logged_list, start, end);
|
||||
|
||||
/*
|
||||
* a brute force approach to making sure we get the most uptodate
|
||||
* copies of everything.
|
||||
@@ -4846,21 +4838,6 @@ log_extents:
|
||||
goto out_unlock;
|
||||
}
|
||||
if (fast_search) {
|
||||
/*
|
||||
* Some ordered extents started by fsync might have completed
|
||||
* before we collected the ordered extents in logged_list, which
|
||||
* means they're gone, not in our logged_list nor in the inode's
|
||||
* ordered tree. We want the application/user space to know an
|
||||
* error happened while attempting to persist file data so that
|
||||
* it can take proper action. If such error happened, we leave
|
||||
* without writing to the log tree and the fsync must report the
|
||||
* file data write error and not commit the current transaction.
|
||||
*/
|
||||
err = btrfs_inode_check_errors(inode);
|
||||
if (err) {
|
||||
ctx->io_err = err;
|
||||
goto out_unlock;
|
||||
}
|
||||
ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
|
||||
&logged_list, ctx, start, end);
|
||||
if (ret) {
|
||||
@@ -5158,7 +5135,7 @@ process_leaf:
|
||||
}
|
||||
|
||||
ctx->log_new_dentries = false;
|
||||
if (type == BTRFS_FT_DIR)
|
||||
if (type == BTRFS_FT_DIR || type == BTRFS_FT_SYMLINK)
|
||||
log_mode = LOG_INODE_ALL;
|
||||
btrfs_release_path(path);
|
||||
ret = btrfs_log_inode(trans, root, di_inode,
|
||||
@@ -5278,11 +5255,16 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
|
||||
if (IS_ERR(dir_inode))
|
||||
continue;
|
||||
|
||||
if (ctx)
|
||||
ctx->log_new_dentries = false;
|
||||
ret = btrfs_log_inode(trans, root, dir_inode,
|
||||
LOG_INODE_ALL, 0, LLONG_MAX, ctx);
|
||||
if (!ret &&
|
||||
btrfs_must_commit_transaction(trans, dir_inode))
|
||||
ret = 1;
|
||||
if (!ret && ctx && ctx->log_new_dentries)
|
||||
ret = log_new_dir_dentries(trans, root,
|
||||
dir_inode, ctx);
|
||||
iput(dir_inode);
|
||||
if (ret)
|
||||
goto out;
|
||||
@@ -5652,11 +5634,9 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
|
||||
* into the file. When the file is logged we check it and
|
||||
* don't log the parents if the file is fully on disk.
|
||||
*/
|
||||
if (S_ISREG(inode->i_mode)) {
|
||||
mutex_lock(&BTRFS_I(inode)->log_mutex);
|
||||
BTRFS_I(inode)->last_unlink_trans = trans->transid;
|
||||
mutex_unlock(&BTRFS_I(inode)->log_mutex);
|
||||
}
|
||||
mutex_lock(&BTRFS_I(inode)->log_mutex);
|
||||
BTRFS_I(inode)->last_unlink_trans = trans->transid;
|
||||
mutex_unlock(&BTRFS_I(inode)->log_mutex);
|
||||
|
||||
/*
|
||||
* if this directory was already logged any new
|
||||
|
Reference in New Issue
Block a user