Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md

Pull md updates from Shaohua Li:
 "Mainly fixes bugs and improves performance:

   - Improve scalability for raid1 from Coly

   - Improve raid5-cache read performance, disk efficiency and IO
     pattern from Song and me

   - Fix a race condition of disk hotplug for linear from Coly

   - A few cleanup patches from Ming and Byungchul

   - Fix a memory leak from Neil

   - Fix WRITE SAME IO failure from me

   - Add doc for raid5-cache from me"

* 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md: (23 commits)
  md/raid1: fix write behind issues introduced by bio_clone_bioset_partial
  md/raid1: handle flush request correctly
  md/linear: shutup lockdep warnning
  md/raid1: fix a use-after-free bug
  RAID1: avoid unnecessary spin locks in I/O barrier code
  RAID1: a new I/O barrier implementation to remove resync window
  md/raid5: Don't reinvent the wheel but use existing llist API
  md: fast clone bio in bio_clone_mddev()
  md: remove unnecessary check on mddev
  md/raid1: use bio_clone_bioset_partial() in case of write behind
  md: fail if mddev->bio_set can't be created
  block: introduce bio_clone_bioset_partial()
  md: disable WRITE SAME if it fails in underlayer disks
  md/raid5-cache: exclude reclaiming stripes in reclaim check
  md/raid5-cache: stripe reclaim only counts valid stripes
  MD: add doc for raid5-cache
  Documentation: move MD related doc into a separate dir
  md: ensure md devices are freed before module is unloaded.
  md/r5cache: improve journal device efficiency
  md/r5cache: enable chunk_aligned_read with write back cache
  ...
This commit is contained in:
Linus Torvalds
2017-02-24 14:42:19 -08:00
20 changed files with 941 additions and 351 deletions

View File

@@ -53,18 +53,26 @@ static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector)
return conf->disks + lo;
}
/*
* In linear_congested() conf->raid_disks is used as a copy of
* mddev->raid_disks to iterate conf->disks[], because conf->raid_disks
* and conf->disks[] are created in linear_conf(), they are always
* consitent with each other, but mddev->raid_disks does not.
*/
static int linear_congested(struct mddev *mddev, int bits)
{
struct linear_conf *conf;
int i, ret = 0;
conf = mddev->private;
rcu_read_lock();
conf = rcu_dereference(mddev->private);
for (i = 0; i < mddev->raid_disks && !ret ; i++) {
for (i = 0; i < conf->raid_disks && !ret ; i++) {
struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev);
ret |= bdi_congested(q->backing_dev_info, bits);
}
rcu_read_unlock();
return ret;
}
@@ -144,6 +152,19 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
conf->disks[i-1].end_sector +
conf->disks[i].rdev->sectors;
/*
* conf->raid_disks is copy of mddev->raid_disks. The reason to
* keep a copy of mddev->raid_disks in struct linear_conf is,
* mddev->raid_disks may not be consistent with pointers number of
* conf->disks[] when it is updated in linear_add() and used to
* iterate old conf->disks[] earray in linear_congested().
* Here conf->raid_disks is always consitent with number of
* pointers in conf->disks[] array, and mddev->private is updated
* with rcu_assign_pointer() in linear_addr(), such race can be
* avoided.
*/
conf->raid_disks = raid_disks;
return conf;
out:
@@ -196,15 +217,24 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev)
if (!newconf)
return -ENOMEM;
/* newconf->raid_disks already keeps a copy of * the increased
* value of mddev->raid_disks, WARN_ONCE() is just used to make
* sure of this. It is possible that oldconf is still referenced
* in linear_congested(), therefore kfree_rcu() is used to free
* oldconf until no one uses it anymore.
*/
mddev_suspend(mddev);
oldconf = mddev->private;
oldconf = rcu_dereference_protected(mddev->private,
lockdep_is_held(&mddev->reconfig_mutex));
mddev->raid_disks++;
mddev->private = newconf;
WARN_ONCE(mddev->raid_disks != newconf->raid_disks,
"copied raid_disks doesn't match mddev->raid_disks");
rcu_assign_pointer(mddev->private, newconf);
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
set_capacity(mddev->gendisk, mddev->array_sectors);
mddev_resume(mddev);
revalidate_disk(mddev->gendisk);
kfree(oldconf);
kfree_rcu(oldconf, rcu);
return 0;
}
@@ -262,6 +292,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
trace_block_bio_remap(bdev_get_queue(split->bi_bdev),
split, disk_devt(mddev->gendisk),
bio_sector);
mddev_check_writesame(mddev, split);
generic_make_request(split);
}
} while (split != bio);