md/r5cache: handle alloc_page failure
RMW of r5c write back cache uses an extra page to store old data for prexor. handle_stripe_dirtying() allocates this page by calling alloc_page(). However, alloc_page() may fail. To handle alloc_page() failures, this patch adds an extra page to disk_info. When alloc_page fails, handle_stripe() trys to use these pages. When these pages are used by other stripe (R5C_EXTRA_PAGE_IN_USE), the stripe is added to delayed_list. Signed-off-by: Song Liu <songliubraving@fb.com> Reviewed-by: NeilBrown <neilb@suse.com> Signed-off-by: Shaohua Li <shli@fb.com>
This commit is contained in:
@@ -2326,15 +2326,40 @@ int r5c_try_caching_write(struct r5conf *conf,
|
|||||||
*/
|
*/
|
||||||
void r5c_release_extra_page(struct stripe_head *sh)
|
void r5c_release_extra_page(struct stripe_head *sh)
|
||||||
{
|
{
|
||||||
|
struct r5conf *conf = sh->raid_conf;
|
||||||
int i;
|
int i;
|
||||||
|
bool using_disk_info_extra_page;
|
||||||
|
|
||||||
|
using_disk_info_extra_page =
|
||||||
|
sh->dev[0].orig_page == conf->disks[0].extra_page;
|
||||||
|
|
||||||
for (i = sh->disks; i--; )
|
for (i = sh->disks; i--; )
|
||||||
if (sh->dev[i].page != sh->dev[i].orig_page) {
|
if (sh->dev[i].page != sh->dev[i].orig_page) {
|
||||||
struct page *p = sh->dev[i].orig_page;
|
struct page *p = sh->dev[i].orig_page;
|
||||||
|
|
||||||
sh->dev[i].orig_page = sh->dev[i].page;
|
sh->dev[i].orig_page = sh->dev[i].page;
|
||||||
put_page(p);
|
if (!using_disk_info_extra_page)
|
||||||
|
put_page(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (using_disk_info_extra_page) {
|
||||||
|
clear_bit(R5C_EXTRA_PAGE_IN_USE, &conf->cache_state);
|
||||||
|
md_wakeup_thread(conf->mddev->thread);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void r5c_use_extra_page(struct stripe_head *sh)
|
||||||
|
{
|
||||||
|
struct r5conf *conf = sh->raid_conf;
|
||||||
|
int i;
|
||||||
|
struct r5dev *dev;
|
||||||
|
|
||||||
|
for (i = sh->disks; i--; ) {
|
||||||
|
dev = &sh->dev[i];
|
||||||
|
if (dev->orig_page != dev->page)
|
||||||
|
put_page(dev->orig_page);
|
||||||
|
dev->orig_page = conf->disks[i].extra_page;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@@ -876,6 +876,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
|||||||
|
|
||||||
if (!test_bit(STRIPE_R5C_CACHING, &sh->state)) {
|
if (!test_bit(STRIPE_R5C_CACHING, &sh->state)) {
|
||||||
/* writing out phase */
|
/* writing out phase */
|
||||||
|
if (s->waiting_extra_page)
|
||||||
|
return;
|
||||||
if (r5l_write_stripe(conf->log, sh) == 0)
|
if (r5l_write_stripe(conf->log, sh) == 0)
|
||||||
return;
|
return;
|
||||||
} else { /* caching phase */
|
} else { /* caching phase */
|
||||||
@@ -2007,6 +2009,7 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
|
|||||||
INIT_LIST_HEAD(&sh->batch_list);
|
INIT_LIST_HEAD(&sh->batch_list);
|
||||||
INIT_LIST_HEAD(&sh->lru);
|
INIT_LIST_HEAD(&sh->lru);
|
||||||
INIT_LIST_HEAD(&sh->r5c);
|
INIT_LIST_HEAD(&sh->r5c);
|
||||||
|
INIT_LIST_HEAD(&sh->log_list);
|
||||||
atomic_set(&sh->count, 1);
|
atomic_set(&sh->count, 1);
|
||||||
sh->log_start = MaxSector;
|
sh->log_start = MaxSector;
|
||||||
for (i = 0; i < disks; i++) {
|
for (i = 0; i < disks; i++) {
|
||||||
@@ -2253,10 +2256,24 @@ static int resize_stripes(struct r5conf *conf, int newsize)
|
|||||||
*/
|
*/
|
||||||
ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
|
ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
|
||||||
if (ndisks) {
|
if (ndisks) {
|
||||||
for (i=0; i<conf->raid_disks; i++)
|
for (i = 0; i < conf->pool_size; i++)
|
||||||
ndisks[i] = conf->disks[i];
|
ndisks[i] = conf->disks[i];
|
||||||
kfree(conf->disks);
|
|
||||||
conf->disks = ndisks;
|
for (i = conf->pool_size; i < newsize; i++) {
|
||||||
|
ndisks[i].extra_page = alloc_page(GFP_NOIO);
|
||||||
|
if (!ndisks[i].extra_page)
|
||||||
|
err = -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (err) {
|
||||||
|
for (i = conf->pool_size; i < newsize; i++)
|
||||||
|
if (ndisks[i].extra_page)
|
||||||
|
put_page(ndisks[i].extra_page);
|
||||||
|
kfree(ndisks);
|
||||||
|
} else {
|
||||||
|
kfree(conf->disks);
|
||||||
|
conf->disks = ndisks;
|
||||||
|
}
|
||||||
} else
|
} else
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
|
|
||||||
@@ -3580,10 +3597,10 @@ unhash:
|
|||||||
break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS);
|
break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void handle_stripe_dirtying(struct r5conf *conf,
|
static int handle_stripe_dirtying(struct r5conf *conf,
|
||||||
struct stripe_head *sh,
|
struct stripe_head *sh,
|
||||||
struct stripe_head_state *s,
|
struct stripe_head_state *s,
|
||||||
int disks)
|
int disks)
|
||||||
{
|
{
|
||||||
int rmw = 0, rcw = 0, i;
|
int rmw = 0, rcw = 0, i;
|
||||||
sector_t recovery_cp = conf->mddev->recovery_cp;
|
sector_t recovery_cp = conf->mddev->recovery_cp;
|
||||||
@@ -3649,12 +3666,32 @@ static void handle_stripe_dirtying(struct r5conf *conf,
|
|||||||
dev->page == dev->orig_page &&
|
dev->page == dev->orig_page &&
|
||||||
!test_bit(R5_LOCKED, &sh->dev[sh->pd_idx].flags)) {
|
!test_bit(R5_LOCKED, &sh->dev[sh->pd_idx].flags)) {
|
||||||
/* alloc page for prexor */
|
/* alloc page for prexor */
|
||||||
dev->orig_page = alloc_page(GFP_NOIO);
|
struct page *p = alloc_page(GFP_NOIO);
|
||||||
|
|
||||||
/* will handle failure in a later patch*/
|
if (p) {
|
||||||
BUG_ON(!dev->orig_page);
|
dev->orig_page = p;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* alloc_page() failed, try use
|
||||||
|
* disk_info->extra_page
|
||||||
|
*/
|
||||||
|
if (!test_and_set_bit(R5C_EXTRA_PAGE_IN_USE,
|
||||||
|
&conf->cache_state)) {
|
||||||
|
r5c_use_extra_page(sh);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* extra_page in use, add to delayed_list */
|
||||||
|
set_bit(STRIPE_DELAYED, &sh->state);
|
||||||
|
s->waiting_extra_page = 1;
|
||||||
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = disks; i--; ) {
|
||||||
|
struct r5dev *dev = &sh->dev[i];
|
||||||
if ((dev->towrite ||
|
if ((dev->towrite ||
|
||||||
i == sh->pd_idx || i == sh->qd_idx ||
|
i == sh->pd_idx || i == sh->qd_idx ||
|
||||||
test_bit(R5_InJournal, &dev->flags)) &&
|
test_bit(R5_InJournal, &dev->flags)) &&
|
||||||
@@ -3730,6 +3767,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
|
|||||||
(s->locked == 0 && (rcw == 0 || rmw == 0) &&
|
(s->locked == 0 && (rcw == 0 || rmw == 0) &&
|
||||||
!test_bit(STRIPE_BIT_DELAY, &sh->state)))
|
!test_bit(STRIPE_BIT_DELAY, &sh->state)))
|
||||||
schedule_reconstruction(sh, s, rcw == 0, 0);
|
schedule_reconstruction(sh, s, rcw == 0, 0);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh,
|
static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh,
|
||||||
@@ -4545,8 +4583,12 @@ static void handle_stripe(struct stripe_head *sh)
|
|||||||
if (ret == -EAGAIN ||
|
if (ret == -EAGAIN ||
|
||||||
/* stripe under reclaim: !caching && injournal */
|
/* stripe under reclaim: !caching && injournal */
|
||||||
(!test_bit(STRIPE_R5C_CACHING, &sh->state) &&
|
(!test_bit(STRIPE_R5C_CACHING, &sh->state) &&
|
||||||
s.injournal > 0))
|
s.injournal > 0)) {
|
||||||
handle_stripe_dirtying(conf, sh, &s, disks);
|
ret = handle_stripe_dirtying(conf, sh, &s,
|
||||||
|
disks);
|
||||||
|
if (ret == -EAGAIN)
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -6458,6 +6500,8 @@ static void raid5_free_percpu(struct r5conf *conf)
|
|||||||
|
|
||||||
static void free_conf(struct r5conf *conf)
|
static void free_conf(struct r5conf *conf)
|
||||||
{
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
if (conf->log)
|
if (conf->log)
|
||||||
r5l_exit_log(conf->log);
|
r5l_exit_log(conf->log);
|
||||||
if (conf->shrinker.nr_deferred)
|
if (conf->shrinker.nr_deferred)
|
||||||
@@ -6466,6 +6510,9 @@ static void free_conf(struct r5conf *conf)
|
|||||||
free_thread_groups(conf);
|
free_thread_groups(conf);
|
||||||
shrink_stripes(conf);
|
shrink_stripes(conf);
|
||||||
raid5_free_percpu(conf);
|
raid5_free_percpu(conf);
|
||||||
|
for (i = 0; i < conf->pool_size; i++)
|
||||||
|
if (conf->disks[i].extra_page)
|
||||||
|
put_page(conf->disks[i].extra_page);
|
||||||
kfree(conf->disks);
|
kfree(conf->disks);
|
||||||
kfree(conf->stripe_hashtbl);
|
kfree(conf->stripe_hashtbl);
|
||||||
kfree(conf);
|
kfree(conf);
|
||||||
@@ -6612,9 +6659,16 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
|||||||
|
|
||||||
conf->disks = kzalloc(max_disks * sizeof(struct disk_info),
|
conf->disks = kzalloc(max_disks * sizeof(struct disk_info),
|
||||||
GFP_KERNEL);
|
GFP_KERNEL);
|
||||||
|
|
||||||
if (!conf->disks)
|
if (!conf->disks)
|
||||||
goto abort;
|
goto abort;
|
||||||
|
|
||||||
|
for (i = 0; i < max_disks; i++) {
|
||||||
|
conf->disks[i].extra_page = alloc_page(GFP_KERNEL);
|
||||||
|
if (!conf->disks[i].extra_page)
|
||||||
|
goto abort;
|
||||||
|
}
|
||||||
|
|
||||||
conf->mddev = mddev;
|
conf->mddev = mddev;
|
||||||
|
|
||||||
if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
|
if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
|
||||||
|
@@ -276,6 +276,7 @@ struct stripe_head_state {
|
|||||||
struct md_rdev *blocked_rdev;
|
struct md_rdev *blocked_rdev;
|
||||||
int handle_bad_blocks;
|
int handle_bad_blocks;
|
||||||
int log_failed;
|
int log_failed;
|
||||||
|
int waiting_extra_page;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Flags for struct r5dev.flags */
|
/* Flags for struct r5dev.flags */
|
||||||
@@ -439,6 +440,7 @@ enum {
|
|||||||
|
|
||||||
struct disk_info {
|
struct disk_info {
|
||||||
struct md_rdev *rdev, *replacement;
|
struct md_rdev *rdev, *replacement;
|
||||||
|
struct page *extra_page; /* extra page to use in prexor */
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -559,6 +561,9 @@ enum r5_cache_state {
|
|||||||
* only process stripes that are already
|
* only process stripes that are already
|
||||||
* occupying the log
|
* occupying the log
|
||||||
*/
|
*/
|
||||||
|
R5C_EXTRA_PAGE_IN_USE, /* a stripe is using disk_info.extra_page
|
||||||
|
* for prexor
|
||||||
|
*/
|
||||||
};
|
};
|
||||||
|
|
||||||
struct r5conf {
|
struct r5conf {
|
||||||
@@ -765,6 +770,7 @@ extern void
|
|||||||
r5c_finish_stripe_write_out(struct r5conf *conf, struct stripe_head *sh,
|
r5c_finish_stripe_write_out(struct r5conf *conf, struct stripe_head *sh,
|
||||||
struct stripe_head_state *s);
|
struct stripe_head_state *s);
|
||||||
extern void r5c_release_extra_page(struct stripe_head *sh);
|
extern void r5c_release_extra_page(struct stripe_head *sh);
|
||||||
|
extern void r5c_use_extra_page(struct stripe_head *sh);
|
||||||
extern void r5l_wake_reclaim(struct r5l_log *log, sector_t space);
|
extern void r5l_wake_reclaim(struct r5l_log *log, sector_t space);
|
||||||
extern void r5c_handle_cached_data_endio(struct r5conf *conf,
|
extern void r5c_handle_cached_data_endio(struct r5conf *conf,
|
||||||
struct stripe_head *sh, int disks, struct bio_list *return_bi);
|
struct stripe_head *sh, int disks, struct bio_list *return_bi);
|
||||||
|
Reference in New Issue
Block a user