Btrfs: add better -ENOSPC handling
This is a step in the direction of better -ENOSPC handling. Instead of checking the global bytes counter we check the space_info bytes counters to make sure we have enough space. If we don't we go ahead and try to allocate a new chunk, and then if that fails we return -ENOSPC. This patch adds two counters to btrfs_space_info, bytes_delalloc and bytes_may_use. bytes_delalloc account for extents we've actually setup for delalloc and will be allocated at some point down the line. bytes_may_use is to keep track of how many bytes we may use for delalloc at some point. When we actually set the extent_bit for the delalloc bytes we subtract the reserved bytes from the bytes_may_use counter. This keeps us from not actually being able to allocate space for any delalloc bytes. Signed-off-by: Josef Bacik <jbacik@redhat.com>
This commit is contained in:
@@ -60,6 +60,10 @@ static int update_block_group(struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes, int alloc,
|
||||
int mark_free);
|
||||
|
||||
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *extent_root, u64 alloc_bytes,
|
||||
u64 flags, int force);
|
||||
|
||||
static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
|
||||
{
|
||||
return (cache->flags & bits) == bits;
|
||||
@@ -1909,6 +1913,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
|
||||
found->bytes_pinned = 0;
|
||||
found->bytes_reserved = 0;
|
||||
found->bytes_readonly = 0;
|
||||
found->bytes_delalloc = 0;
|
||||
found->full = 0;
|
||||
found->force_alloc = 0;
|
||||
*space_info = found;
|
||||
@@ -1972,6 +1977,196 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
|
||||
return flags;
|
||||
}
|
||||
|
||||
static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data)
|
||||
{
|
||||
struct btrfs_fs_info *info = root->fs_info;
|
||||
u64 alloc_profile;
|
||||
|
||||
if (data) {
|
||||
alloc_profile = info->avail_data_alloc_bits &
|
||||
info->data_alloc_profile;
|
||||
data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
|
||||
} else if (root == root->fs_info->chunk_root) {
|
||||
alloc_profile = info->avail_system_alloc_bits &
|
||||
info->system_alloc_profile;
|
||||
data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
|
||||
} else {
|
||||
alloc_profile = info->avail_metadata_alloc_bits &
|
||||
info->metadata_alloc_profile;
|
||||
data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
|
||||
}
|
||||
|
||||
return btrfs_reduce_alloc_profile(root, data);
|
||||
}
|
||||
|
||||
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
|
||||
{
|
||||
u64 alloc_target;
|
||||
|
||||
alloc_target = btrfs_get_alloc_profile(root, 1);
|
||||
BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
|
||||
alloc_target);
|
||||
}
|
||||
|
||||
/*
|
||||
* for now this just makes sure we have at least 5% of our metadata space free
|
||||
* for use.
|
||||
*/
|
||||
int btrfs_check_metadata_free_space(struct btrfs_root *root)
|
||||
{
|
||||
struct btrfs_fs_info *info = root->fs_info;
|
||||
struct btrfs_space_info *meta_sinfo;
|
||||
u64 alloc_target, thresh;
|
||||
|
||||
/* get the space info for where the metadata will live */
|
||||
alloc_target = btrfs_get_alloc_profile(root, 0);
|
||||
meta_sinfo = __find_space_info(info, alloc_target);
|
||||
|
||||
/*
|
||||
* if the metadata area isn't maxed out then there is no sense in
|
||||
* checking how much is used, since we can always allocate a new chunk
|
||||
*/
|
||||
if (!meta_sinfo->full)
|
||||
return 0;
|
||||
|
||||
spin_lock(&meta_sinfo->lock);
|
||||
thresh = meta_sinfo->total_bytes * 95;
|
||||
|
||||
do_div(thresh, 100);
|
||||
|
||||
if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
|
||||
meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) {
|
||||
spin_unlock(&meta_sinfo->lock);
|
||||
return -ENOSPC;
|
||||
}
|
||||
spin_unlock(&meta_sinfo->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This will check the space that the inode allocates from to make sure we have
|
||||
* enough space for bytes.
|
||||
*/
|
||||
int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
|
||||
u64 bytes)
|
||||
{
|
||||
struct btrfs_space_info *data_sinfo;
|
||||
int ret = 0;
|
||||
|
||||
/* make sure bytes are sectorsize aligned */
|
||||
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
|
||||
|
||||
data_sinfo = BTRFS_I(inode)->space_info;
|
||||
again:
|
||||
/* make sure we have enough space to handle the data first */
|
||||
spin_lock(&data_sinfo->lock);
|
||||
if (data_sinfo->total_bytes - data_sinfo->bytes_used -
|
||||
data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
|
||||
data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
|
||||
data_sinfo->bytes_may_use < bytes) {
|
||||
/*
|
||||
* if we don't have enough free bytes in this space then we need
|
||||
* to alloc a new chunk.
|
||||
*/
|
||||
if (!data_sinfo->full) {
|
||||
u64 alloc_target;
|
||||
struct btrfs_trans_handle *trans;
|
||||
|
||||
data_sinfo->force_alloc = 1;
|
||||
spin_unlock(&data_sinfo->lock);
|
||||
|
||||
alloc_target = btrfs_get_alloc_profile(root, 1);
|
||||
trans = btrfs_start_transaction(root, 1);
|
||||
if (!trans)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = do_chunk_alloc(trans, root->fs_info->extent_root,
|
||||
bytes + 2 * 1024 * 1024,
|
||||
alloc_target, 0);
|
||||
btrfs_end_transaction(trans, root);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto again;
|
||||
}
|
||||
spin_unlock(&data_sinfo->lock);
|
||||
printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
|
||||
", %llu bytes_used, %llu bytes_reserved, "
|
||||
"%llu bytes_pinned, %llu bytes_readonly, %llu may use"
|
||||
"%llu total\n", bytes, data_sinfo->bytes_delalloc,
|
||||
data_sinfo->bytes_used, data_sinfo->bytes_reserved,
|
||||
data_sinfo->bytes_pinned, data_sinfo->bytes_readonly,
|
||||
data_sinfo->bytes_may_use, data_sinfo->total_bytes);
|
||||
return -ENOSPC;
|
||||
}
|
||||
data_sinfo->bytes_may_use += bytes;
|
||||
BTRFS_I(inode)->reserved_bytes += bytes;
|
||||
spin_unlock(&data_sinfo->lock);
|
||||
|
||||
return btrfs_check_metadata_free_space(root);
|
||||
}
|
||||
|
||||
/*
|
||||
* if there was an error for whatever reason after calling
|
||||
* btrfs_check_data_free_space, call this so we can cleanup the counters.
|
||||
*/
|
||||
void btrfs_free_reserved_data_space(struct btrfs_root *root,
|
||||
struct inode *inode, u64 bytes)
|
||||
{
|
||||
struct btrfs_space_info *data_sinfo;
|
||||
|
||||
/* make sure bytes are sectorsize aligned */
|
||||
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
|
||||
|
||||
data_sinfo = BTRFS_I(inode)->space_info;
|
||||
spin_lock(&data_sinfo->lock);
|
||||
data_sinfo->bytes_may_use -= bytes;
|
||||
BTRFS_I(inode)->reserved_bytes -= bytes;
|
||||
spin_unlock(&data_sinfo->lock);
|
||||
}
|
||||
|
||||
/* called when we are adding a delalloc extent to the inode's io_tree */
|
||||
void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
|
||||
u64 bytes)
|
||||
{
|
||||
struct btrfs_space_info *data_sinfo;
|
||||
|
||||
/* get the space info for where this inode will be storing its data */
|
||||
data_sinfo = BTRFS_I(inode)->space_info;
|
||||
|
||||
/* make sure we have enough space to handle the data first */
|
||||
spin_lock(&data_sinfo->lock);
|
||||
data_sinfo->bytes_delalloc += bytes;
|
||||
|
||||
/*
|
||||
* we are adding a delalloc extent without calling
|
||||
* btrfs_check_data_free_space first. This happens on a weird
|
||||
* writepage condition, but shouldn't hurt our accounting
|
||||
*/
|
||||
if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) {
|
||||
data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes;
|
||||
BTRFS_I(inode)->reserved_bytes = 0;
|
||||
} else {
|
||||
data_sinfo->bytes_may_use -= bytes;
|
||||
BTRFS_I(inode)->reserved_bytes -= bytes;
|
||||
}
|
||||
|
||||
spin_unlock(&data_sinfo->lock);
|
||||
}
|
||||
|
||||
/* called when we are clearing an delalloc extent from the inode's io_tree */
|
||||
void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
|
||||
u64 bytes)
|
||||
{
|
||||
struct btrfs_space_info *info;
|
||||
|
||||
info = BTRFS_I(inode)->space_info;
|
||||
|
||||
spin_lock(&info->lock);
|
||||
info->bytes_delalloc -= bytes;
|
||||
spin_unlock(&info->lock);
|
||||
}
|
||||
|
||||
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *extent_root, u64 alloc_bytes,
|
||||
u64 flags, int force)
|
||||
@@ -3105,6 +3300,10 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
|
||||
(unsigned long long)(info->total_bytes - info->bytes_used -
|
||||
info->bytes_pinned - info->bytes_reserved),
|
||||
(info->full) ? "" : "not ");
|
||||
printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
|
||||
" may_use=%llu, used=%llu\n", info->total_bytes,
|
||||
info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use,
|
||||
info->bytes_used);
|
||||
|
||||
down_read(&info->groups_sem);
|
||||
list_for_each_entry(cache, &info->block_groups, list) {
|
||||
@@ -3131,24 +3330,10 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
|
||||
{
|
||||
int ret;
|
||||
u64 search_start = 0;
|
||||
u64 alloc_profile;
|
||||
struct btrfs_fs_info *info = root->fs_info;
|
||||
|
||||
if (data) {
|
||||
alloc_profile = info->avail_data_alloc_bits &
|
||||
info->data_alloc_profile;
|
||||
data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
|
||||
} else if (root == root->fs_info->chunk_root) {
|
||||
alloc_profile = info->avail_system_alloc_bits &
|
||||
info->system_alloc_profile;
|
||||
data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
|
||||
} else {
|
||||
alloc_profile = info->avail_metadata_alloc_bits &
|
||||
info->metadata_alloc_profile;
|
||||
data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
|
||||
}
|
||||
data = btrfs_get_alloc_profile(root, data);
|
||||
again:
|
||||
data = btrfs_reduce_alloc_profile(root, data);
|
||||
/*
|
||||
* the only place that sets empty_size is btrfs_realloc_node, which
|
||||
* is not called recursively on allocations
|
||||
|
Reference in New Issue
Block a user