ext4: make sure allocate pending entry not fail
[ Upstream commit 8e387c89e96b9543a339f84043cf9df15fed2632 ] __insert_pending() allocate memory in atomic context, so the allocation could fail, but we are not handling that failure now. It could lead ext4_es_remove_extent() to get wrong reserved clusters, and the global data blocks reservation count will be incorrect. The same to extents_status entry preallocation, preallocate pending entry out of the i_es_lock with __GFP_NOFAIL, make sure __insert_pending() and __revise_pending() always succeeds. Signed-off-by: Zhang Yi <yi.zhang@huawei.com> Cc: stable@kernel.org Link: https://lore.kernel.org/r/20230824092619.1327976-3-yi.zhang@huaweicloud.com Reviewed-by: Jan Kara <jack@suse.cz> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:

committed by
Greg Kroah-Hartman

parent
10341e77e4
commit
ec75d1d0cd
@@ -152,8 +152,9 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
|||||||
static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
|
static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
|
||||||
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
|
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
|
||||||
struct ext4_inode_info *locked_ei);
|
struct ext4_inode_info *locked_ei);
|
||||||
static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
|
static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
|
||||||
ext4_lblk_t len);
|
ext4_lblk_t len,
|
||||||
|
struct pending_reservation **prealloc);
|
||||||
|
|
||||||
int __init ext4_init_es(void)
|
int __init ext4_init_es(void)
|
||||||
{
|
{
|
||||||
@@ -450,6 +451,19 @@ static void ext4_es_list_del(struct inode *inode)
|
|||||||
spin_unlock(&sbi->s_es_lock);
|
spin_unlock(&sbi->s_es_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline struct pending_reservation *__alloc_pending(bool nofail)
|
||||||
|
{
|
||||||
|
if (!nofail)
|
||||||
|
return kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
|
||||||
|
|
||||||
|
return kmem_cache_zalloc(ext4_pending_cachep, GFP_KERNEL | __GFP_NOFAIL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void __free_pending(struct pending_reservation *pr)
|
||||||
|
{
|
||||||
|
kmem_cache_free(ext4_pending_cachep, pr);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns true if we cannot fail to allocate memory for this extent_status
|
* Returns true if we cannot fail to allocate memory for this extent_status
|
||||||
* entry and cannot reclaim it until its status changes.
|
* entry and cannot reclaim it until its status changes.
|
||||||
@@ -841,11 +855,12 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
|
|||||||
{
|
{
|
||||||
struct extent_status newes;
|
struct extent_status newes;
|
||||||
ext4_lblk_t end = lblk + len - 1;
|
ext4_lblk_t end = lblk + len - 1;
|
||||||
int err1 = 0;
|
int err1 = 0, err2 = 0, err3 = 0;
|
||||||
int err2 = 0;
|
|
||||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||||
struct extent_status *es1 = NULL;
|
struct extent_status *es1 = NULL;
|
||||||
struct extent_status *es2 = NULL;
|
struct extent_status *es2 = NULL;
|
||||||
|
struct pending_reservation *pr = NULL;
|
||||||
|
bool revise_pending = false;
|
||||||
|
|
||||||
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
|
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
|
||||||
return 0;
|
return 0;
|
||||||
@@ -873,11 +888,17 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
|
|||||||
|
|
||||||
ext4_es_insert_extent_check(inode, &newes);
|
ext4_es_insert_extent_check(inode, &newes);
|
||||||
|
|
||||||
|
revise_pending = sbi->s_cluster_ratio > 1 &&
|
||||||
|
test_opt(inode->i_sb, DELALLOC) &&
|
||||||
|
(status & (EXTENT_STATUS_WRITTEN |
|
||||||
|
EXTENT_STATUS_UNWRITTEN));
|
||||||
retry:
|
retry:
|
||||||
if (err1 && !es1)
|
if (err1 && !es1)
|
||||||
es1 = __es_alloc_extent(true);
|
es1 = __es_alloc_extent(true);
|
||||||
if ((err1 || err2) && !es2)
|
if ((err1 || err2) && !es2)
|
||||||
es2 = __es_alloc_extent(true);
|
es2 = __es_alloc_extent(true);
|
||||||
|
if ((err1 || err2 || err3) && revise_pending && !pr)
|
||||||
|
pr = __alloc_pending(true);
|
||||||
write_lock(&EXT4_I(inode)->i_es_lock);
|
write_lock(&EXT4_I(inode)->i_es_lock);
|
||||||
|
|
||||||
err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
|
err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
|
||||||
@@ -902,13 +923,18 @@ retry:
|
|||||||
es2 = NULL;
|
es2 = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
|
if (revise_pending) {
|
||||||
(status & EXTENT_STATUS_WRITTEN ||
|
err3 = __revise_pending(inode, lblk, len, &pr);
|
||||||
status & EXTENT_STATUS_UNWRITTEN))
|
if (err3 != 0)
|
||||||
__revise_pending(inode, lblk, len);
|
goto error;
|
||||||
|
if (pr) {
|
||||||
|
__free_pending(pr);
|
||||||
|
pr = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
error:
|
error:
|
||||||
write_unlock(&EXT4_I(inode)->i_es_lock);
|
write_unlock(&EXT4_I(inode)->i_es_lock);
|
||||||
if (err1 || err2)
|
if (err1 || err2 || err3)
|
||||||
goto retry;
|
goto retry;
|
||||||
|
|
||||||
ext4_es_print_tree(inode);
|
ext4_es_print_tree(inode);
|
||||||
@@ -1316,7 +1342,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
|
|||||||
rc->ndelonly--;
|
rc->ndelonly--;
|
||||||
node = rb_next(&pr->rb_node);
|
node = rb_next(&pr->rb_node);
|
||||||
rb_erase(&pr->rb_node, &tree->root);
|
rb_erase(&pr->rb_node, &tree->root);
|
||||||
kmem_cache_free(ext4_pending_cachep, pr);
|
__free_pending(pr);
|
||||||
if (!node)
|
if (!node)
|
||||||
break;
|
break;
|
||||||
pr = rb_entry(node, struct pending_reservation,
|
pr = rb_entry(node, struct pending_reservation,
|
||||||
@@ -1913,11 +1939,13 @@ static struct pending_reservation *__get_pending(struct inode *inode,
|
|||||||
*
|
*
|
||||||
* @inode - file containing the cluster
|
* @inode - file containing the cluster
|
||||||
* @lblk - logical block in the cluster to be added
|
* @lblk - logical block in the cluster to be added
|
||||||
|
* @prealloc - preallocated pending entry
|
||||||
*
|
*
|
||||||
* Returns 0 on successful insertion and -ENOMEM on failure. If the
|
* Returns 0 on successful insertion and -ENOMEM on failure. If the
|
||||||
* pending reservation is already in the set, returns successfully.
|
* pending reservation is already in the set, returns successfully.
|
||||||
*/
|
*/
|
||||||
static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
|
static int __insert_pending(struct inode *inode, ext4_lblk_t lblk,
|
||||||
|
struct pending_reservation **prealloc)
|
||||||
{
|
{
|
||||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||||
struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
|
struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
|
||||||
@@ -1943,11 +1971,16 @@ static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pr = kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
|
if (likely(*prealloc == NULL)) {
|
||||||
if (pr == NULL) {
|
pr = __alloc_pending(false);
|
||||||
|
if (!pr) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
pr = *prealloc;
|
||||||
|
*prealloc = NULL;
|
||||||
|
}
|
||||||
pr->lclu = lclu;
|
pr->lclu = lclu;
|
||||||
|
|
||||||
rb_link_node(&pr->rb_node, parent, p);
|
rb_link_node(&pr->rb_node, parent, p);
|
||||||
@@ -1976,7 +2009,7 @@ static void __remove_pending(struct inode *inode, ext4_lblk_t lblk)
|
|||||||
if (pr != NULL) {
|
if (pr != NULL) {
|
||||||
tree = &EXT4_I(inode)->i_pending_tree;
|
tree = &EXT4_I(inode)->i_pending_tree;
|
||||||
rb_erase(&pr->rb_node, &tree->root);
|
rb_erase(&pr->rb_node, &tree->root);
|
||||||
kmem_cache_free(ext4_pending_cachep, pr);
|
__free_pending(pr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2037,10 +2070,10 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
|
|||||||
bool allocated)
|
bool allocated)
|
||||||
{
|
{
|
||||||
struct extent_status newes;
|
struct extent_status newes;
|
||||||
int err1 = 0;
|
int err1 = 0, err2 = 0, err3 = 0;
|
||||||
int err2 = 0;
|
|
||||||
struct extent_status *es1 = NULL;
|
struct extent_status *es1 = NULL;
|
||||||
struct extent_status *es2 = NULL;
|
struct extent_status *es2 = NULL;
|
||||||
|
struct pending_reservation *pr = NULL;
|
||||||
|
|
||||||
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
|
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
|
||||||
return 0;
|
return 0;
|
||||||
@@ -2060,6 +2093,8 @@ retry:
|
|||||||
es1 = __es_alloc_extent(true);
|
es1 = __es_alloc_extent(true);
|
||||||
if ((err1 || err2) && !es2)
|
if ((err1 || err2) && !es2)
|
||||||
es2 = __es_alloc_extent(true);
|
es2 = __es_alloc_extent(true);
|
||||||
|
if ((err1 || err2 || err3) && allocated && !pr)
|
||||||
|
pr = __alloc_pending(true);
|
||||||
write_lock(&EXT4_I(inode)->i_es_lock);
|
write_lock(&EXT4_I(inode)->i_es_lock);
|
||||||
|
|
||||||
err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1);
|
err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1);
|
||||||
@@ -2082,11 +2117,18 @@ retry:
|
|||||||
es2 = NULL;
|
es2 = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (allocated)
|
if (allocated) {
|
||||||
__insert_pending(inode, lblk);
|
err3 = __insert_pending(inode, lblk, &pr);
|
||||||
|
if (err3 != 0)
|
||||||
|
goto error;
|
||||||
|
if (pr) {
|
||||||
|
__free_pending(pr);
|
||||||
|
pr = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
error:
|
error:
|
||||||
write_unlock(&EXT4_I(inode)->i_es_lock);
|
write_unlock(&EXT4_I(inode)->i_es_lock);
|
||||||
if (err1 || err2)
|
if (err1 || err2 || err3)
|
||||||
goto retry;
|
goto retry;
|
||||||
|
|
||||||
ext4_es_print_tree(inode);
|
ext4_es_print_tree(inode);
|
||||||
@@ -2192,21 +2234,24 @@ unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
|
|||||||
* @inode - file containing the range
|
* @inode - file containing the range
|
||||||
* @lblk - logical block defining the start of range
|
* @lblk - logical block defining the start of range
|
||||||
* @len - length of range in blocks
|
* @len - length of range in blocks
|
||||||
|
* @prealloc - preallocated pending entry
|
||||||
*
|
*
|
||||||
* Used after a newly allocated extent is added to the extents status tree.
|
* Used after a newly allocated extent is added to the extents status tree.
|
||||||
* Requires that the extents in the range have either written or unwritten
|
* Requires that the extents in the range have either written or unwritten
|
||||||
* status. Must be called while holding i_es_lock.
|
* status. Must be called while holding i_es_lock.
|
||||||
*/
|
*/
|
||||||
static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
|
static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
|
||||||
ext4_lblk_t len)
|
ext4_lblk_t len,
|
||||||
|
struct pending_reservation **prealloc)
|
||||||
{
|
{
|
||||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||||
ext4_lblk_t end = lblk + len - 1;
|
ext4_lblk_t end = lblk + len - 1;
|
||||||
ext4_lblk_t first, last;
|
ext4_lblk_t first, last;
|
||||||
bool f_del = false, l_del = false;
|
bool f_del = false, l_del = false;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
if (len == 0)
|
if (len == 0)
|
||||||
return;
|
return 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Two cases - block range within single cluster and block range
|
* Two cases - block range within single cluster and block range
|
||||||
@@ -2227,7 +2272,9 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
|
|||||||
f_del = __es_scan_range(inode, &ext4_es_is_delonly,
|
f_del = __es_scan_range(inode, &ext4_es_is_delonly,
|
||||||
first, lblk - 1);
|
first, lblk - 1);
|
||||||
if (f_del) {
|
if (f_del) {
|
||||||
__insert_pending(inode, first);
|
ret = __insert_pending(inode, first, prealloc);
|
||||||
|
if (ret < 0)
|
||||||
|
goto out;
|
||||||
} else {
|
} else {
|
||||||
last = EXT4_LBLK_CMASK(sbi, end) +
|
last = EXT4_LBLK_CMASK(sbi, end) +
|
||||||
sbi->s_cluster_ratio - 1;
|
sbi->s_cluster_ratio - 1;
|
||||||
@@ -2235,9 +2282,11 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
|
|||||||
l_del = __es_scan_range(inode,
|
l_del = __es_scan_range(inode,
|
||||||
&ext4_es_is_delonly,
|
&ext4_es_is_delonly,
|
||||||
end + 1, last);
|
end + 1, last);
|
||||||
if (l_del)
|
if (l_del) {
|
||||||
__insert_pending(inode, last);
|
ret = __insert_pending(inode, last, prealloc);
|
||||||
else
|
if (ret < 0)
|
||||||
|
goto out;
|
||||||
|
} else
|
||||||
__remove_pending(inode, last);
|
__remove_pending(inode, last);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -2245,18 +2294,24 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
|
|||||||
if (first != lblk)
|
if (first != lblk)
|
||||||
f_del = __es_scan_range(inode, &ext4_es_is_delonly,
|
f_del = __es_scan_range(inode, &ext4_es_is_delonly,
|
||||||
first, lblk - 1);
|
first, lblk - 1);
|
||||||
if (f_del)
|
if (f_del) {
|
||||||
__insert_pending(inode, first);
|
ret = __insert_pending(inode, first, prealloc);
|
||||||
else
|
if (ret < 0)
|
||||||
|
goto out;
|
||||||
|
} else
|
||||||
__remove_pending(inode, first);
|
__remove_pending(inode, first);
|
||||||
|
|
||||||
last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1;
|
last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1;
|
||||||
if (last != end)
|
if (last != end)
|
||||||
l_del = __es_scan_range(inode, &ext4_es_is_delonly,
|
l_del = __es_scan_range(inode, &ext4_es_is_delonly,
|
||||||
end + 1, last);
|
end + 1, last);
|
||||||
if (l_del)
|
if (l_del) {
|
||||||
__insert_pending(inode, last);
|
ret = __insert_pending(inode, last, prealloc);
|
||||||
else
|
if (ret < 0)
|
||||||
|
goto out;
|
||||||
|
} else
|
||||||
__remove_pending(inode, last);
|
__remove_pending(inode, last);
|
||||||
}
|
}
|
||||||
|
out:
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user