Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs fixes from Chris Mason: "Most of these are fixing extent reservation accounting, or corners with tree writeback during commit. Josef's set does add a test, which isn't strictly a fix, but it'll keep us from making this same mistake again" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: Btrfs: fix outstanding_extents accounting in DIO Btrfs: add sanity test for outstanding_extents accounting Btrfs: just free dummy extent buffers Btrfs: account merges/splits properly Btrfs: prepare block group cache before writing Btrfs: fix ASSERT(list_empty(&cur_trans->dirty_bgs_list) Btrfs: account for the correct number of extents for delalloc reservations Btrfs: fix merge delalloc logic Btrfs: fix comp_oper to get right order Btrfs: catch transaction abortion after waiting for it btrfs: fix sizeof format specifier in btrfs_check_super_valid()
This commit is contained in:
112
fs/btrfs/inode.c
112
fs/btrfs/inode.c
@@ -108,6 +108,13 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
|
||||
|
||||
static int btrfs_dirty_inode(struct inode *inode);
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
void btrfs_test_inode_set_ops(struct inode *inode)
|
||||
{
|
||||
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
|
||||
struct inode *inode, struct inode *dir,
|
||||
const struct qstr *qstr)
|
||||
@@ -1542,30 +1549,17 @@ static void btrfs_split_extent_hook(struct inode *inode,
|
||||
u64 new_size;
|
||||
|
||||
/*
|
||||
* We need the largest size of the remaining extent to see if we
|
||||
* need to add a new outstanding extent. Think of the following
|
||||
* case
|
||||
*
|
||||
* [MEAX_EXTENT_SIZEx2 - 4k][4k]
|
||||
*
|
||||
* The new_size would just be 4k and we'd think we had enough
|
||||
* outstanding extents for this if we only took one side of the
|
||||
* split, same goes for the other direction. We need to see if
|
||||
* the larger size still is the same amount of extents as the
|
||||
* original size, because if it is we need to add a new
|
||||
* outstanding extent. But if we split up and the larger size
|
||||
* is less than the original then we are good to go since we've
|
||||
* already accounted for the extra extent in our original
|
||||
* accounting.
|
||||
* See the explanation in btrfs_merge_extent_hook, the same
|
||||
* applies here, just in reverse.
|
||||
*/
|
||||
new_size = orig->end - split + 1;
|
||||
if ((split - orig->start) > new_size)
|
||||
new_size = split - orig->start;
|
||||
|
||||
num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
|
||||
num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
|
||||
BTRFS_MAX_EXTENT_SIZE);
|
||||
if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
|
||||
BTRFS_MAX_EXTENT_SIZE) < num_extents)
|
||||
new_size = split - orig->start;
|
||||
num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
|
||||
BTRFS_MAX_EXTENT_SIZE);
|
||||
if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
|
||||
BTRFS_MAX_EXTENT_SIZE) >= num_extents)
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1591,8 +1585,10 @@ static void btrfs_merge_extent_hook(struct inode *inode,
|
||||
if (!(other->state & EXTENT_DELALLOC))
|
||||
return;
|
||||
|
||||
old_size = other->end - other->start + 1;
|
||||
new_size = old_size + (new->end - new->start + 1);
|
||||
if (new->start > other->start)
|
||||
new_size = new->end - other->start + 1;
|
||||
else
|
||||
new_size = other->end - new->start + 1;
|
||||
|
||||
/* we're not bigger than the max, unreserve the space and go */
|
||||
if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
|
||||
@@ -1603,13 +1599,32 @@ static void btrfs_merge_extent_hook(struct inode *inode,
|
||||
}
|
||||
|
||||
/*
|
||||
* If we grew by another max_extent, just return, we want to keep that
|
||||
* reserved amount.
|
||||
* We have to add up either side to figure out how many extents were
|
||||
* accounted for before we merged into one big extent. If the number of
|
||||
* extents we accounted for is <= the amount we need for the new range
|
||||
* then we can return, otherwise drop. Think of it like this
|
||||
*
|
||||
* [ 4k][MAX_SIZE]
|
||||
*
|
||||
* So we've grown the extent by a MAX_SIZE extent, this would mean we
|
||||
* need 2 outstanding extents, on one side we have 1 and the other side
|
||||
* we have 1 so they are == and we can return. But in this case
|
||||
*
|
||||
* [MAX_SIZE+4k][MAX_SIZE+4k]
|
||||
*
|
||||
* Each range on their own accounts for 2 extents, but merged together
|
||||
* they are only 3 extents worth of accounting, so we need to drop in
|
||||
* this case.
|
||||
*/
|
||||
old_size = other->end - other->start + 1;
|
||||
num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
|
||||
BTRFS_MAX_EXTENT_SIZE);
|
||||
old_size = new->end - new->start + 1;
|
||||
num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
|
||||
BTRFS_MAX_EXTENT_SIZE);
|
||||
|
||||
if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
|
||||
BTRFS_MAX_EXTENT_SIZE) > num_extents)
|
||||
BTRFS_MAX_EXTENT_SIZE) >= num_extents)
|
||||
return;
|
||||
|
||||
spin_lock(&BTRFS_I(inode)->lock);
|
||||
@@ -1686,6 +1701,10 @@ static void btrfs_set_bit_hook(struct inode *inode,
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
}
|
||||
|
||||
/* For sanity tests */
|
||||
if (btrfs_test_is_dummy_root(root))
|
||||
return;
|
||||
|
||||
__percpu_counter_add(&root->fs_info->delalloc_bytes, len,
|
||||
root->fs_info->delalloc_batch);
|
||||
spin_lock(&BTRFS_I(inode)->lock);
|
||||
@@ -1741,6 +1760,10 @@ static void btrfs_clear_bit_hook(struct inode *inode,
|
||||
root != root->fs_info->tree_root)
|
||||
btrfs_delalloc_release_metadata(inode, len);
|
||||
|
||||
/* For sanity tests. */
|
||||
if (btrfs_test_is_dummy_root(root))
|
||||
return;
|
||||
|
||||
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
|
||||
&& do_list && !(state->state & EXTENT_NORESERVE))
|
||||
btrfs_free_reserved_data_space(inode, len);
|
||||
@@ -7213,7 +7236,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
|
||||
u64 start = iblock << inode->i_blkbits;
|
||||
u64 lockstart, lockend;
|
||||
u64 len = bh_result->b_size;
|
||||
u64 orig_len = len;
|
||||
u64 *outstanding_extents = NULL;
|
||||
int unlock_bits = EXTENT_LOCKED;
|
||||
int ret = 0;
|
||||
|
||||
@@ -7225,6 +7248,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
|
||||
lockstart = start;
|
||||
lockend = start + len - 1;
|
||||
|
||||
if (current->journal_info) {
|
||||
/*
|
||||
* Need to pull our outstanding extents and set journal_info to NULL so
|
||||
* that anything that needs to check if there's a transction doesn't get
|
||||
* confused.
|
||||
*/
|
||||
outstanding_extents = current->journal_info;
|
||||
current->journal_info = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this errors out it's because we couldn't invalidate pagecache for
|
||||
* this range and we need to fallback to buffered.
|
||||
@@ -7348,11 +7381,20 @@ unlock:
|
||||
if (start + len > i_size_read(inode))
|
||||
i_size_write(inode, start + len);
|
||||
|
||||
if (len < orig_len) {
|
||||
/*
|
||||
* If we have an outstanding_extents count still set then we're
|
||||
* within our reservation, otherwise we need to adjust our inode
|
||||
* counter appropriately.
|
||||
*/
|
||||
if (*outstanding_extents) {
|
||||
(*outstanding_extents)--;
|
||||
} else {
|
||||
spin_lock(&BTRFS_I(inode)->lock);
|
||||
BTRFS_I(inode)->outstanding_extents++;
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
}
|
||||
|
||||
current->journal_info = outstanding_extents;
|
||||
btrfs_free_reserved_data_space(inode, len);
|
||||
}
|
||||
|
||||
@@ -7376,6 +7418,8 @@ unlock:
|
||||
unlock_err:
|
||||
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
|
||||
unlock_bits, 1, 0, &cached_state, GFP_NOFS);
|
||||
if (outstanding_extents)
|
||||
current->journal_info = outstanding_extents;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -8075,6 +8119,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
u64 outstanding_extents = 0;
|
||||
size_t count = 0;
|
||||
int flags = 0;
|
||||
bool wakeup = true;
|
||||
@@ -8112,6 +8157,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
|
||||
ret = btrfs_delalloc_reserve_space(inode, count);
|
||||
if (ret)
|
||||
goto out;
|
||||
outstanding_extents = div64_u64(count +
|
||||
BTRFS_MAX_EXTENT_SIZE - 1,
|
||||
BTRFS_MAX_EXTENT_SIZE);
|
||||
|
||||
/*
|
||||
* We need to know how many extents we reserved so that we can
|
||||
* do the accounting properly if we go over the number we
|
||||
* originally calculated. Abuse current->journal_info for this.
|
||||
*/
|
||||
current->journal_info = &outstanding_extents;
|
||||
} else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
|
||||
&BTRFS_I(inode)->runtime_flags)) {
|
||||
inode_dio_done(inode);
|
||||
@@ -8124,6 +8179,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
|
||||
iter, offset, btrfs_get_blocks_direct, NULL,
|
||||
btrfs_submit_direct, flags);
|
||||
if (rw & WRITE) {
|
||||
current->journal_info = NULL;
|
||||
if (ret < 0 && ret != -EIOCBQUEUED)
|
||||
btrfs_delalloc_release_space(inode, count);
|
||||
else if (ret >= 0 && (size_t)ret < count)
|
||||
|
Reference in New Issue
Block a user