|
|
|
@@ -292,9 +292,12 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
|
|
|
|
|
BUG_ON(atomic_read(&conf->active_stripes)==0);
|
|
|
|
|
if (test_bit(STRIPE_HANDLE, &sh->state)) {
|
|
|
|
|
if (test_bit(STRIPE_DELAYED, &sh->state) &&
|
|
|
|
|
!test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
|
|
|
|
|
!test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
|
|
|
|
|
list_add_tail(&sh->lru, &conf->delayed_list);
|
|
|
|
|
else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
|
|
|
|
|
if (atomic_read(&conf->preread_active_stripes)
|
|
|
|
|
< IO_THRESHOLD)
|
|
|
|
|
md_wakeup_thread(conf->mddev->thread);
|
|
|
|
|
} else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
|
|
|
|
|
sh->bm_seq - conf->seq_write > 0)
|
|
|
|
|
list_add_tail(&sh->lru, &conf->bitmap_list);
|
|
|
|
|
else {
|
|
|
|
@@ -413,6 +416,11 @@ static void release_stripe(struct stripe_head *sh)
|
|
|
|
|
int hash;
|
|
|
|
|
bool wakeup;
|
|
|
|
|
|
|
|
|
|
/* Avoid release_list until the last reference.
|
|
|
|
|
*/
|
|
|
|
|
if (atomic_add_unless(&sh->count, -1, 1))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (unlikely(!conf->mddev->thread) ||
|
|
|
|
|
test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state))
|
|
|
|
|
goto slow_path;
|
|
|
|
@@ -479,6 +487,7 @@ static void shrink_buffers(struct stripe_head *sh)
|
|
|
|
|
int num = sh->raid_conf->pool_size;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < num ; i++) {
|
|
|
|
|
WARN_ON(sh->dev[i].page != sh->dev[i].orig_page);
|
|
|
|
|
p = sh->dev[i].page;
|
|
|
|
|
if (!p)
|
|
|
|
|
continue;
|
|
|
|
@@ -499,6 +508,7 @@ static int grow_buffers(struct stripe_head *sh)
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
sh->dev[i].page = page;
|
|
|
|
|
sh->dev[i].orig_page = page;
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
@@ -855,6 +865,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
|
|
|
|
if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
|
|
|
|
|
bi->bi_rw |= REQ_NOMERGE;
|
|
|
|
|
|
|
|
|
|
if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
|
|
|
|
|
WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
|
|
|
|
|
sh->dev[i].vec.bv_page = sh->dev[i].page;
|
|
|
|
|
bi->bi_vcnt = 1;
|
|
|
|
|
bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
|
|
|
|
|
bi->bi_io_vec[0].bv_offset = 0;
|
|
|
|
@@ -899,6 +912,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
|
|
|
|
else
|
|
|
|
|
rbi->bi_iter.bi_sector = (sh->sector
|
|
|
|
|
+ rrdev->data_offset);
|
|
|
|
|
if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
|
|
|
|
|
WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
|
|
|
|
|
sh->dev[i].rvec.bv_page = sh->dev[i].page;
|
|
|
|
|
rbi->bi_vcnt = 1;
|
|
|
|
|
rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
|
|
|
|
|
rbi->bi_io_vec[0].bv_offset = 0;
|
|
|
|
@@ -927,8 +943,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct dma_async_tx_descriptor *
|
|
|
|
|
async_copy_data(int frombio, struct bio *bio, struct page *page,
|
|
|
|
|
sector_t sector, struct dma_async_tx_descriptor *tx)
|
|
|
|
|
async_copy_data(int frombio, struct bio *bio, struct page **page,
|
|
|
|
|
sector_t sector, struct dma_async_tx_descriptor *tx,
|
|
|
|
|
struct stripe_head *sh)
|
|
|
|
|
{
|
|
|
|
|
struct bio_vec bvl;
|
|
|
|
|
struct bvec_iter iter;
|
|
|
|
@@ -965,11 +982,16 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
|
|
|
|
|
if (clen > 0) {
|
|
|
|
|
b_offset += bvl.bv_offset;
|
|
|
|
|
bio_page = bvl.bv_page;
|
|
|
|
|
if (frombio)
|
|
|
|
|
tx = async_memcpy(page, bio_page, page_offset,
|
|
|
|
|
if (frombio) {
|
|
|
|
|
if (sh->raid_conf->skip_copy &&
|
|
|
|
|
b_offset == 0 && page_offset == 0 &&
|
|
|
|
|
clen == STRIPE_SIZE)
|
|
|
|
|
*page = bio_page;
|
|
|
|
|
else
|
|
|
|
|
tx = async_memcpy(*page, bio_page, page_offset,
|
|
|
|
|
b_offset, clen, &submit);
|
|
|
|
|
else
|
|
|
|
|
tx = async_memcpy(bio_page, page, b_offset,
|
|
|
|
|
} else
|
|
|
|
|
tx = async_memcpy(bio_page, *page, b_offset,
|
|
|
|
|
page_offset, clen, &submit);
|
|
|
|
|
}
|
|
|
|
|
/* chain the operations */
|
|
|
|
@@ -1045,8 +1067,8 @@ static void ops_run_biofill(struct stripe_head *sh)
|
|
|
|
|
spin_unlock_irq(&sh->stripe_lock);
|
|
|
|
|
while (rbi && rbi->bi_iter.bi_sector <
|
|
|
|
|
dev->sector + STRIPE_SECTORS) {
|
|
|
|
|
tx = async_copy_data(0, rbi, dev->page,
|
|
|
|
|
dev->sector, tx);
|
|
|
|
|
tx = async_copy_data(0, rbi, &dev->page,
|
|
|
|
|
dev->sector, tx, sh);
|
|
|
|
|
rbi = r5_next_bio(rbi, dev->sector);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@@ -1384,6 +1406,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
|
|
|
|
|
BUG_ON(dev->written);
|
|
|
|
|
wbi = dev->written = chosen;
|
|
|
|
|
spin_unlock_irq(&sh->stripe_lock);
|
|
|
|
|
WARN_ON(dev->page != dev->orig_page);
|
|
|
|
|
|
|
|
|
|
while (wbi && wbi->bi_iter.bi_sector <
|
|
|
|
|
dev->sector + STRIPE_SECTORS) {
|
|
|
|
@@ -1393,9 +1416,15 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
|
|
|
|
|
set_bit(R5_SyncIO, &dev->flags);
|
|
|
|
|
if (wbi->bi_rw & REQ_DISCARD)
|
|
|
|
|
set_bit(R5_Discard, &dev->flags);
|
|
|
|
|
else
|
|
|
|
|
tx = async_copy_data(1, wbi, dev->page,
|
|
|
|
|
dev->sector, tx);
|
|
|
|
|
else {
|
|
|
|
|
tx = async_copy_data(1, wbi, &dev->page,
|
|
|
|
|
dev->sector, tx, sh);
|
|
|
|
|
if (dev->page != dev->orig_page) {
|
|
|
|
|
set_bit(R5_SkipCopy, &dev->flags);
|
|
|
|
|
clear_bit(R5_UPTODATE, &dev->flags);
|
|
|
|
|
clear_bit(R5_OVERWRITE, &dev->flags);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
wbi = r5_next_bio(wbi, dev->sector);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@@ -1426,7 +1455,7 @@ static void ops_complete_reconstruct(void *stripe_head_ref)
|
|
|
|
|
struct r5dev *dev = &sh->dev[i];
|
|
|
|
|
|
|
|
|
|
if (dev->written || i == pd_idx || i == qd_idx) {
|
|
|
|
|
if (!discard)
|
|
|
|
|
if (!discard && !test_bit(R5_SkipCopy, &dev->flags))
|
|
|
|
|
set_bit(R5_UPTODATE, &dev->flags);
|
|
|
|
|
if (fua)
|
|
|
|
|
set_bit(R5_WantFUA, &dev->flags);
|
|
|
|
@@ -1839,8 +1868,10 @@ static int resize_stripes(struct r5conf *conf, int newsize)
|
|
|
|
|
osh = get_free_stripe(conf, hash);
|
|
|
|
|
unlock_device_hash_lock(conf, hash);
|
|
|
|
|
atomic_set(&nsh->count, 1);
|
|
|
|
|
for(i=0; i<conf->pool_size; i++)
|
|
|
|
|
for(i=0; i<conf->pool_size; i++) {
|
|
|
|
|
nsh->dev[i].page = osh->dev[i].page;
|
|
|
|
|
nsh->dev[i].orig_page = osh->dev[i].page;
|
|
|
|
|
}
|
|
|
|
|
for( ; i<newsize; i++)
|
|
|
|
|
nsh->dev[i].page = NULL;
|
|
|
|
|
nsh->hash_lock_index = hash;
|
|
|
|
@@ -1896,6 +1927,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
|
|
|
|
|
if (nsh->dev[i].page == NULL) {
|
|
|
|
|
struct page *p = alloc_page(GFP_NOIO);
|
|
|
|
|
nsh->dev[i].page = p;
|
|
|
|
|
nsh->dev[i].orig_page = p;
|
|
|
|
|
if (!p)
|
|
|
|
|
err = -ENOMEM;
|
|
|
|
|
}
|
|
|
|
@@ -2133,24 +2165,20 @@ static void raid5_end_write_request(struct bio *bi, int error)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void raid5_build_block(struct stripe_head *sh, int i, int previous)
|
|
|
|
|
{
|
|
|
|
|
struct r5dev *dev = &sh->dev[i];
|
|
|
|
|
|
|
|
|
|
bio_init(&dev->req);
|
|
|
|
|
dev->req.bi_io_vec = &dev->vec;
|
|
|
|
|
dev->req.bi_vcnt++;
|
|
|
|
|
dev->req.bi_max_vecs++;
|
|
|
|
|
dev->req.bi_max_vecs = 1;
|
|
|
|
|
dev->req.bi_private = sh;
|
|
|
|
|
dev->vec.bv_page = dev->page;
|
|
|
|
|
|
|
|
|
|
bio_init(&dev->rreq);
|
|
|
|
|
dev->rreq.bi_io_vec = &dev->rvec;
|
|
|
|
|
dev->rreq.bi_vcnt++;
|
|
|
|
|
dev->rreq.bi_max_vecs++;
|
|
|
|
|
dev->rreq.bi_max_vecs = 1;
|
|
|
|
|
dev->rreq.bi_private = sh;
|
|
|
|
|
dev->rvec.bv_page = dev->page;
|
|
|
|
|
|
|
|
|
|
dev->flags = 0;
|
|
|
|
|
dev->sector = compute_blocknr(sh, i, previous);
|
|
|
|
@@ -2750,6 +2778,11 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
|
|
|
|
/* and fail all 'written' */
|
|
|
|
|
bi = sh->dev[i].written;
|
|
|
|
|
sh->dev[i].written = NULL;
|
|
|
|
|
if (test_and_clear_bit(R5_SkipCopy, &sh->dev[i].flags)) {
|
|
|
|
|
WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
|
|
|
|
|
sh->dev[i].page = sh->dev[i].orig_page;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (bi) bitmap_end = 1;
|
|
|
|
|
while (bi && bi->bi_iter.bi_sector <
|
|
|
|
|
sh->dev[i].sector + STRIPE_SECTORS) {
|
|
|
|
@@ -2886,8 +2919,11 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
|
|
|
|
|
(s->failed >= 1 && fdev[0]->toread) ||
|
|
|
|
|
(s->failed >= 2 && fdev[1]->toread) ||
|
|
|
|
|
(sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite &&
|
|
|
|
|
(!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) &&
|
|
|
|
|
!test_bit(R5_OVERWRITE, &fdev[0]->flags)) ||
|
|
|
|
|
(sh->raid_conf->level == 6 && s->failed && s->to_write))) {
|
|
|
|
|
(sh->raid_conf->level == 6 && s->failed && s->to_write &&
|
|
|
|
|
s->to_write < sh->raid_conf->raid_disks - 2 &&
|
|
|
|
|
(!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))))) {
|
|
|
|
|
/* we would like to get this block, possibly by computing it,
|
|
|
|
|
* otherwise read it if the backing disk is insync
|
|
|
|
|
*/
|
|
|
|
@@ -2991,12 +3027,17 @@ static void handle_stripe_clean_event(struct r5conf *conf,
|
|
|
|
|
dev = &sh->dev[i];
|
|
|
|
|
if (!test_bit(R5_LOCKED, &dev->flags) &&
|
|
|
|
|
(test_bit(R5_UPTODATE, &dev->flags) ||
|
|
|
|
|
test_bit(R5_Discard, &dev->flags))) {
|
|
|
|
|
test_bit(R5_Discard, &dev->flags) ||
|
|
|
|
|
test_bit(R5_SkipCopy, &dev->flags))) {
|
|
|
|
|
/* We can return any write requests */
|
|
|
|
|
struct bio *wbi, *wbi2;
|
|
|
|
|
pr_debug("Return write for disc %d\n", i);
|
|
|
|
|
if (test_and_clear_bit(R5_Discard, &dev->flags))
|
|
|
|
|
clear_bit(R5_UPTODATE, &dev->flags);
|
|
|
|
|
if (test_and_clear_bit(R5_SkipCopy, &dev->flags)) {
|
|
|
|
|
WARN_ON(test_bit(R5_UPTODATE, &dev->flags));
|
|
|
|
|
dev->page = dev->orig_page;
|
|
|
|
|
}
|
|
|
|
|
wbi = dev->written;
|
|
|
|
|
dev->written = NULL;
|
|
|
|
|
while (wbi && wbi->bi_iter.bi_sector <
|
|
|
|
@@ -3015,6 +3056,8 @@ static void handle_stripe_clean_event(struct r5conf *conf,
|
|
|
|
|
0);
|
|
|
|
|
} else if (test_bit(R5_Discard, &dev->flags))
|
|
|
|
|
discard_pending = 1;
|
|
|
|
|
WARN_ON(test_bit(R5_SkipCopy, &dev->flags));
|
|
|
|
|
WARN_ON(dev->page != dev->orig_page);
|
|
|
|
|
}
|
|
|
|
|
if (!discard_pending &&
|
|
|
|
|
test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
|
|
|
|
@@ -3086,7 +3129,8 @@ static void handle_stripe_dirtying(struct r5conf *conf,
|
|
|
|
|
!test_bit(R5_LOCKED, &dev->flags) &&
|
|
|
|
|
!(test_bit(R5_UPTODATE, &dev->flags) ||
|
|
|
|
|
test_bit(R5_Wantcompute, &dev->flags))) {
|
|
|
|
|
if (test_bit(R5_Insync, &dev->flags)) rcw++;
|
|
|
|
|
if (test_bit(R5_Insync, &dev->flags))
|
|
|
|
|
rcw++;
|
|
|
|
|
else
|
|
|
|
|
rcw += 2*disks;
|
|
|
|
|
}
|
|
|
|
@@ -3107,10 +3151,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,
|
|
|
|
|
!(test_bit(R5_UPTODATE, &dev->flags) ||
|
|
|
|
|
test_bit(R5_Wantcompute, &dev->flags)) &&
|
|
|
|
|
test_bit(R5_Insync, &dev->flags)) {
|
|
|
|
|
if (
|
|
|
|
|
test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
|
|
|
|
|
pr_debug("Read_old block "
|
|
|
|
|
"%d for r-m-w\n", i);
|
|
|
|
|
if (test_bit(STRIPE_PREREAD_ACTIVE,
|
|
|
|
|
&sh->state)) {
|
|
|
|
|
pr_debug("Read_old block %d for r-m-w\n",
|
|
|
|
|
i);
|
|
|
|
|
set_bit(R5_LOCKED, &dev->flags);
|
|
|
|
|
set_bit(R5_Wantread, &dev->flags);
|
|
|
|
|
s->locked++;
|
|
|
|
@@ -3133,10 +3177,9 @@ static void handle_stripe_dirtying(struct r5conf *conf,
|
|
|
|
|
!(test_bit(R5_UPTODATE, &dev->flags) ||
|
|
|
|
|
test_bit(R5_Wantcompute, &dev->flags))) {
|
|
|
|
|
rcw++;
|
|
|
|
|
if (!test_bit(R5_Insync, &dev->flags))
|
|
|
|
|
continue; /* it's a failed drive */
|
|
|
|
|
if (
|
|
|
|
|
test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
|
|
|
|
|
if (test_bit(R5_Insync, &dev->flags) &&
|
|
|
|
|
test_bit(STRIPE_PREREAD_ACTIVE,
|
|
|
|
|
&sh->state)) {
|
|
|
|
|
pr_debug("Read_old block "
|
|
|
|
|
"%d for Reconstruct\n", i);
|
|
|
|
|
set_bit(R5_LOCKED, &dev->flags);
|
|
|
|
@@ -5031,8 +5074,8 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int
|
|
|
|
|
bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded);
|
|
|
|
|
|
|
|
|
|
set_bit(STRIPE_SYNC_REQUESTED, &sh->state);
|
|
|
|
|
set_bit(STRIPE_HANDLE, &sh->state);
|
|
|
|
|
|
|
|
|
|
handle_stripe(sh);
|
|
|
|
|
release_stripe(sh);
|
|
|
|
|
|
|
|
|
|
return STRIPE_SECTORS;
|
|
|
|
@@ -5072,7 +5115,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
|
|
|
|
|
/* already done this stripe */
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
sh = get_active_stripe(conf, sector, 0, 1, 0);
|
|
|
|
|
sh = get_active_stripe(conf, sector, 0, 1, 1);
|
|
|
|
|
|
|
|
|
|
if (!sh) {
|
|
|
|
|
/* failed to get a stripe - must wait */
|
|
|
|
@@ -5354,6 +5397,50 @@ raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold,
|
|
|
|
|
raid5_show_preread_threshold,
|
|
|
|
|
raid5_store_preread_threshold);
|
|
|
|
|
|
|
|
|
|
static ssize_t
|
|
|
|
|
raid5_show_skip_copy(struct mddev *mddev, char *page)
|
|
|
|
|
{
|
|
|
|
|
struct r5conf *conf = mddev->private;
|
|
|
|
|
if (conf)
|
|
|
|
|
return sprintf(page, "%d\n", conf->skip_copy);
|
|
|
|
|
else
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static ssize_t
|
|
|
|
|
raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len)
|
|
|
|
|
{
|
|
|
|
|
struct r5conf *conf = mddev->private;
|
|
|
|
|
unsigned long new;
|
|
|
|
|
if (len >= PAGE_SIZE)
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
if (!conf)
|
|
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
|
|
if (kstrtoul(page, 10, &new))
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
new = !!new;
|
|
|
|
|
if (new == conf->skip_copy)
|
|
|
|
|
return len;
|
|
|
|
|
|
|
|
|
|
mddev_suspend(mddev);
|
|
|
|
|
conf->skip_copy = new;
|
|
|
|
|
if (new)
|
|
|
|
|
mddev->queue->backing_dev_info.capabilities |=
|
|
|
|
|
BDI_CAP_STABLE_WRITES;
|
|
|
|
|
else
|
|
|
|
|
mddev->queue->backing_dev_info.capabilities &=
|
|
|
|
|
~BDI_CAP_STABLE_WRITES;
|
|
|
|
|
mddev_resume(mddev);
|
|
|
|
|
return len;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct md_sysfs_entry
|
|
|
|
|
raid5_skip_copy = __ATTR(skip_copy, S_IRUGO | S_IWUSR,
|
|
|
|
|
raid5_show_skip_copy,
|
|
|
|
|
raid5_store_skip_copy);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static ssize_t
|
|
|
|
|
stripe_cache_active_show(struct mddev *mddev, char *page)
|
|
|
|
|
{
|
|
|
|
@@ -5439,6 +5526,7 @@ static struct attribute *raid5_attrs[] = {
|
|
|
|
|
&raid5_stripecache_active.attr,
|
|
|
|
|
&raid5_preread_bypass_threshold.attr,
|
|
|
|
|
&raid5_group_thread_cnt.attr,
|
|
|
|
|
&raid5_skip_copy.attr,
|
|
|
|
|
NULL,
|
|
|
|
|
};
|
|
|
|
|
static struct attribute_group raid5_attrs_group = {
|
|
|
|
|