Btrfs: add better -ENOSPC handling

This is a step in the direction of better -ENOSPC handling.  Instead of
checking the global bytes counter we check the space_info bytes counters to
make sure we have enough space.

If we don't we go ahead and try to allocate a new chunk, and then if that fails
we return -ENOSPC.  This patch adds two counters to btrfs_space_info,
bytes_delalloc and bytes_may_use.

bytes_delalloc account for extents we've actually setup for delalloc and will
be allocated at some point down the line. 

bytes_may_use is to keep track of how many bytes we may use for delalloc at
some point.  When we actually set the extent_bit for the delalloc bytes we
subtract the reserved bytes from the bytes_may_use counter.  This keeps us from
not actually being able to allocate space for any delalloc bytes.

Signed-off-by: Josef Bacik <jbacik@redhat.com>
This commit is contained in:
Josef Bacik
2009-02-20 11:00:09 -05:00
committed by Chris Mason
parent 2cfbd50b53
commit 6a63209fc0
6 changed files with 271 additions and 76 deletions

View File

@@ -60,6 +60,10 @@ static int update_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, int alloc,
int mark_free);
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force);
static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
{
return (cache->flags & bits) == bits;
@@ -1909,6 +1913,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->bytes_pinned = 0;
found->bytes_reserved = 0;
found->bytes_readonly = 0;
found->bytes_delalloc = 0;
found->full = 0;
found->force_alloc = 0;
*space_info = found;
@@ -1972,6 +1977,196 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
return flags;
}
static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data)
{
struct btrfs_fs_info *info = root->fs_info;
u64 alloc_profile;
if (data) {
alloc_profile = info->avail_data_alloc_bits &
info->data_alloc_profile;
data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
} else if (root == root->fs_info->chunk_root) {
alloc_profile = info->avail_system_alloc_bits &
info->system_alloc_profile;
data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
} else {
alloc_profile = info->avail_metadata_alloc_bits &
info->metadata_alloc_profile;
data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
}
return btrfs_reduce_alloc_profile(root, data);
}
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
{
u64 alloc_target;
alloc_target = btrfs_get_alloc_profile(root, 1);
BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
alloc_target);
}
/*
* for now this just makes sure we have at least 5% of our metadata space free
* for use.
*/
int btrfs_check_metadata_free_space(struct btrfs_root *root)
{
struct btrfs_fs_info *info = root->fs_info;
struct btrfs_space_info *meta_sinfo;
u64 alloc_target, thresh;
/* get the space info for where the metadata will live */
alloc_target = btrfs_get_alloc_profile(root, 0);
meta_sinfo = __find_space_info(info, alloc_target);
/*
* if the metadata area isn't maxed out then there is no sense in
* checking how much is used, since we can always allocate a new chunk
*/
if (!meta_sinfo->full)
return 0;
spin_lock(&meta_sinfo->lock);
thresh = meta_sinfo->total_bytes * 95;
do_div(thresh, 100);
if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) {
spin_unlock(&meta_sinfo->lock);
return -ENOSPC;
}
spin_unlock(&meta_sinfo->lock);
return 0;
}
/*
* This will check the space that the inode allocates from to make sure we have
* enough space for bytes.
*/
int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes)
{
struct btrfs_space_info *data_sinfo;
int ret = 0;
/* make sure bytes are sectorsize aligned */
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
data_sinfo = BTRFS_I(inode)->space_info;
again:
/* make sure we have enough space to handle the data first */
spin_lock(&data_sinfo->lock);
if (data_sinfo->total_bytes - data_sinfo->bytes_used -
data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
data_sinfo->bytes_may_use < bytes) {
/*
* if we don't have enough free bytes in this space then we need
* to alloc a new chunk.
*/
if (!data_sinfo->full) {
u64 alloc_target;
struct btrfs_trans_handle *trans;
data_sinfo->force_alloc = 1;
spin_unlock(&data_sinfo->lock);
alloc_target = btrfs_get_alloc_profile(root, 1);
trans = btrfs_start_transaction(root, 1);
if (!trans)
return -ENOMEM;
ret = do_chunk_alloc(trans, root->fs_info->extent_root,
bytes + 2 * 1024 * 1024,
alloc_target, 0);
btrfs_end_transaction(trans, root);
if (ret)
return ret;
goto again;
}
spin_unlock(&data_sinfo->lock);
printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
", %llu bytes_used, %llu bytes_reserved, "
"%llu bytes_pinned, %llu bytes_readonly, %llu may use"
"%llu total\n", bytes, data_sinfo->bytes_delalloc,
data_sinfo->bytes_used, data_sinfo->bytes_reserved,
data_sinfo->bytes_pinned, data_sinfo->bytes_readonly,
data_sinfo->bytes_may_use, data_sinfo->total_bytes);
return -ENOSPC;
}
data_sinfo->bytes_may_use += bytes;
BTRFS_I(inode)->reserved_bytes += bytes;
spin_unlock(&data_sinfo->lock);
return btrfs_check_metadata_free_space(root);
}
/*
* if there was an error for whatever reason after calling
* btrfs_check_data_free_space, call this so we can cleanup the counters.
*/
void btrfs_free_reserved_data_space(struct btrfs_root *root,
struct inode *inode, u64 bytes)
{
struct btrfs_space_info *data_sinfo;
/* make sure bytes are sectorsize aligned */
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
data_sinfo = BTRFS_I(inode)->space_info;
spin_lock(&data_sinfo->lock);
data_sinfo->bytes_may_use -= bytes;
BTRFS_I(inode)->reserved_bytes -= bytes;
spin_unlock(&data_sinfo->lock);
}
/* called when we are adding a delalloc extent to the inode's io_tree */
void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
u64 bytes)
{
struct btrfs_space_info *data_sinfo;
/* get the space info for where this inode will be storing its data */
data_sinfo = BTRFS_I(inode)->space_info;
/* make sure we have enough space to handle the data first */
spin_lock(&data_sinfo->lock);
data_sinfo->bytes_delalloc += bytes;
/*
* we are adding a delalloc extent without calling
* btrfs_check_data_free_space first. This happens on a weird
* writepage condition, but shouldn't hurt our accounting
*/
if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) {
data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes;
BTRFS_I(inode)->reserved_bytes = 0;
} else {
data_sinfo->bytes_may_use -= bytes;
BTRFS_I(inode)->reserved_bytes -= bytes;
}
spin_unlock(&data_sinfo->lock);
}
/* called when we are clearing an delalloc extent from the inode's io_tree */
void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes)
{
struct btrfs_space_info *info;
info = BTRFS_I(inode)->space_info;
spin_lock(&info->lock);
info->bytes_delalloc -= bytes;
spin_unlock(&info->lock);
}
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force)
@@ -3105,6 +3300,10 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
(unsigned long long)(info->total_bytes - info->bytes_used -
info->bytes_pinned - info->bytes_reserved),
(info->full) ? "" : "not ");
printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
" may_use=%llu, used=%llu\n", info->total_bytes,
info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use,
info->bytes_used);
down_read(&info->groups_sem);
list_for_each_entry(cache, &info->block_groups, list) {
@@ -3131,24 +3330,10 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
{
int ret;
u64 search_start = 0;
u64 alloc_profile;
struct btrfs_fs_info *info = root->fs_info;
if (data) {
alloc_profile = info->avail_data_alloc_bits &
info->data_alloc_profile;
data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
} else if (root == root->fs_info->chunk_root) {
alloc_profile = info->avail_system_alloc_bits &
info->system_alloc_profile;
data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
} else {
alloc_profile = info->avail_metadata_alloc_bits &
info->metadata_alloc_profile;
data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
}
data = btrfs_get_alloc_profile(root, data);
again:
data = btrfs_reduce_alloc_profile(root, data);
/*
* the only place that sets empty_size is btrfs_realloc_node, which
* is not called recursively on allocations