Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
 "Most of these are fixing extent reservation accounting, or corners
  with tree writeback during commit.

  Josef's set does add a test, which isn't strictly a fix, but it'll
  keep us from making this same mistake again"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: fix outstanding_extents accounting in DIO
  Btrfs: add sanity test for outstanding_extents accounting
  Btrfs: just free dummy extent buffers
  Btrfs: account merges/splits properly
  Btrfs: prepare block group cache before writing
  Btrfs: fix ASSERT(list_empty(&cur_trans->dirty_bgs_list)
  Btrfs: account for the correct number of extents for delalloc reservations
  Btrfs: fix merge delalloc logic
  Btrfs: fix comp_oper to get right order
  Btrfs: catch transaction abortion after waiting for it
  btrfs: fix sizeof format specifier in btrfs_check_super_valid()
This commit is contained in:
Linus Torvalds
2015-03-21 10:53:37 -07:00
8 changed files with 352 additions and 46 deletions

View File

@@ -108,6 +108,13 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
static int btrfs_dirty_inode(struct inode *inode);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
void btrfs_test_inode_set_ops(struct inode *inode)
{
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
}
#endif
static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir,
const struct qstr *qstr)
@@ -1542,30 +1549,17 @@ static void btrfs_split_extent_hook(struct inode *inode,
u64 new_size;
/*
* We need the largest size of the remaining extent to see if we
* need to add a new outstanding extent. Think of the following
* case
*
* [MEAX_EXTENT_SIZEx2 - 4k][4k]
*
* The new_size would just be 4k and we'd think we had enough
* outstanding extents for this if we only took one side of the
* split, same goes for the other direction. We need to see if
* the larger size still is the same amount of extents as the
* original size, because if it is we need to add a new
* outstanding extent. But if we split up and the larger size
* is less than the original then we are good to go since we've
* already accounted for the extra extent in our original
* accounting.
* See the explanation in btrfs_merge_extent_hook, the same
* applies here, just in reverse.
*/
new_size = orig->end - split + 1;
if ((split - orig->start) > new_size)
new_size = split - orig->start;
num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE);
if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE) < num_extents)
new_size = split - orig->start;
num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE);
if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE) >= num_extents)
return;
}
@@ -1591,8 +1585,10 @@ static void btrfs_merge_extent_hook(struct inode *inode,
if (!(other->state & EXTENT_DELALLOC))
return;
old_size = other->end - other->start + 1;
new_size = old_size + (new->end - new->start + 1);
if (new->start > other->start)
new_size = new->end - other->start + 1;
else
new_size = other->end - new->start + 1;
/* we're not bigger than the max, unreserve the space and go */
if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
@@ -1603,13 +1599,32 @@ static void btrfs_merge_extent_hook(struct inode *inode,
}
/*
* If we grew by another max_extent, just return, we want to keep that
* reserved amount.
* We have to add up either side to figure out how many extents were
* accounted for before we merged into one big extent. If the number of
* extents we accounted for is <= the amount we need for the new range
* then we can return, otherwise drop. Think of it like this
*
* [ 4k][MAX_SIZE]
*
* So we've grown the extent by a MAX_SIZE extent, this would mean we
* need 2 outstanding extents, on one side we have 1 and the other side
* we have 1 so they are == and we can return. But in this case
*
* [MAX_SIZE+4k][MAX_SIZE+4k]
*
* Each range on their own accounts for 2 extents, but merged together
* they are only 3 extents worth of accounting, so we need to drop in
* this case.
*/
old_size = other->end - other->start + 1;
num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE);
old_size = new->end - new->start + 1;
num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE);
if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE) > num_extents)
BTRFS_MAX_EXTENT_SIZE) >= num_extents)
return;
spin_lock(&BTRFS_I(inode)->lock);
@@ -1686,6 +1701,10 @@ static void btrfs_set_bit_hook(struct inode *inode,
spin_unlock(&BTRFS_I(inode)->lock);
}
/* For sanity tests */
if (btrfs_test_is_dummy_root(root))
return;
__percpu_counter_add(&root->fs_info->delalloc_bytes, len,
root->fs_info->delalloc_batch);
spin_lock(&BTRFS_I(inode)->lock);
@@ -1741,6 +1760,10 @@ static void btrfs_clear_bit_hook(struct inode *inode,
root != root->fs_info->tree_root)
btrfs_delalloc_release_metadata(inode, len);
/* For sanity tests. */
if (btrfs_test_is_dummy_root(root))
return;
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
&& do_list && !(state->state & EXTENT_NORESERVE))
btrfs_free_reserved_data_space(inode, len);
@@ -7213,7 +7236,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
u64 start = iblock << inode->i_blkbits;
u64 lockstart, lockend;
u64 len = bh_result->b_size;
u64 orig_len = len;
u64 *outstanding_extents = NULL;
int unlock_bits = EXTENT_LOCKED;
int ret = 0;
@@ -7225,6 +7248,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
lockstart = start;
lockend = start + len - 1;
if (current->journal_info) {
/*
* Need to pull our outstanding extents and set journal_info to NULL so
* that anything that needs to check if there's a transction doesn't get
* confused.
*/
outstanding_extents = current->journal_info;
current->journal_info = NULL;
}
/*
* If this errors out it's because we couldn't invalidate pagecache for
* this range and we need to fallback to buffered.
@@ -7348,11 +7381,20 @@ unlock:
if (start + len > i_size_read(inode))
i_size_write(inode, start + len);
if (len < orig_len) {
/*
* If we have an outstanding_extents count still set then we're
* within our reservation, otherwise we need to adjust our inode
* counter appropriately.
*/
if (*outstanding_extents) {
(*outstanding_extents)--;
} else {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
}
current->journal_info = outstanding_extents;
btrfs_free_reserved_data_space(inode, len);
}
@@ -7376,6 +7418,8 @@ unlock:
unlock_err:
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
unlock_bits, 1, 0, &cached_state, GFP_NOFS);
if (outstanding_extents)
current->journal_info = outstanding_extents;
return ret;
}
@@ -8075,6 +8119,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
u64 outstanding_extents = 0;
size_t count = 0;
int flags = 0;
bool wakeup = true;
@@ -8112,6 +8157,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
ret = btrfs_delalloc_reserve_space(inode, count);
if (ret)
goto out;
outstanding_extents = div64_u64(count +
BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE);
/*
* We need to know how many extents we reserved so that we can
* do the accounting properly if we go over the number we
* originally calculated. Abuse current->journal_info for this.
*/
current->journal_info = &outstanding_extents;
} else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
&BTRFS_I(inode)->runtime_flags)) {
inode_dio_done(inode);
@@ -8124,6 +8179,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
iter, offset, btrfs_get_blocks_direct, NULL,
btrfs_submit_direct, flags);
if (rw & WRITE) {
current->journal_info = NULL;
if (ret < 0 && ret != -EIOCBQUEUED)
btrfs_delalloc_release_space(inode, count);
else if (ret >= 0 && (size_t)ret < count)