Merge tag 'md/3.13' of git://neil.brown.name/md
Pull md update from Neil Brown: "Mostly optimisations and obscure bug fixes. - raid5 gets less lock contention - raid1 gets less contention between normal-io and resync-io during resync" * tag 'md/3.13' of git://neil.brown.name/md: md/raid5: Use conf->device_lock protect changing of multi-thread resources. md/raid5: Before freeing old multi-thread worker, it should flush them. md/raid5: For stripe with R5_ReadNoMerge, we replace REQ_FLUSH with REQ_NOMERGE. UAPI: include <asm/byteorder.h> in linux/raid/md_p.h raid1: Rewrite the implementation of iobarrier. raid1: Add some macros to make code clearly. raid1: Replace raise_barrier/lower_barrier with freeze_array/unfreeze_array when reconfiguring the array. raid1: Add a field array_frozen to indicate whether raid in freeze state. md: Convert use of typedef ctl_table to struct ctl_table md/raid5: avoid deadlock when raid5 array has unack badblocks during md_stop_writes. md: use MD_RECOVERY_INTR instead of kthread_should_stop in resync thread. md: fix some places where mddev_lock return value is not checked. raid5: Retry R5_ReadNoMerge flag when hit a read error. raid5: relieve lock contention in get_active_stripe() raid5: relieve lock contention in get_active_stripe() wait: add wait_event_cmd() md/raid5.c: add proper locking to error path of raid5_start_reshape. md: fix calculation of stacking limits on level change. raid5: Use slow_path to release stripe when mddev->thread is null
This commit is contained in:
133
drivers/md/md.c
133
drivers/md/md.c
@@ -112,7 +112,7 @@ static inline int speed_max(struct mddev *mddev)
|
||||
|
||||
static struct ctl_table_header *raid_table_header;
|
||||
|
||||
static ctl_table raid_table[] = {
|
||||
static struct ctl_table raid_table[] = {
|
||||
{
|
||||
.procname = "speed_limit_min",
|
||||
.data = &sysctl_speed_limit_min,
|
||||
@@ -130,7 +130,7 @@ static ctl_table raid_table[] = {
|
||||
{ }
|
||||
};
|
||||
|
||||
static ctl_table raid_dir_table[] = {
|
||||
static struct ctl_table raid_dir_table[] = {
|
||||
{
|
||||
.procname = "raid",
|
||||
.maxlen = 0,
|
||||
@@ -140,7 +140,7 @@ static ctl_table raid_dir_table[] = {
|
||||
{ }
|
||||
};
|
||||
|
||||
static ctl_table raid_root_table[] = {
|
||||
static struct ctl_table raid_root_table[] = {
|
||||
{
|
||||
.procname = "dev",
|
||||
.maxlen = 0,
|
||||
@@ -562,11 +562,19 @@ static struct mddev * mddev_find(dev_t unit)
|
||||
goto retry;
|
||||
}
|
||||
|
||||
static inline int mddev_lock(struct mddev * mddev)
|
||||
static inline int __must_check mddev_lock(struct mddev * mddev)
|
||||
{
|
||||
return mutex_lock_interruptible(&mddev->reconfig_mutex);
|
||||
}
|
||||
|
||||
/* Sometimes we need to take the lock in a situation where
|
||||
* failure due to interrupts is not acceptable.
|
||||
*/
|
||||
static inline void mddev_lock_nointr(struct mddev * mddev)
|
||||
{
|
||||
mutex_lock(&mddev->reconfig_mutex);
|
||||
}
|
||||
|
||||
static inline int mddev_is_locked(struct mddev *mddev)
|
||||
{
|
||||
return mutex_is_locked(&mddev->reconfig_mutex);
|
||||
@@ -2978,7 +2986,7 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
|
||||
for_each_mddev(mddev, tmp) {
|
||||
struct md_rdev *rdev2;
|
||||
|
||||
mddev_lock(mddev);
|
||||
mddev_lock_nointr(mddev);
|
||||
rdev_for_each(rdev2, mddev)
|
||||
if (rdev->bdev == rdev2->bdev &&
|
||||
rdev != rdev2 &&
|
||||
@@ -2994,7 +3002,7 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
|
||||
break;
|
||||
}
|
||||
}
|
||||
mddev_lock(my_mddev);
|
||||
mddev_lock_nointr(my_mddev);
|
||||
if (overlap) {
|
||||
/* Someone else could have slipped in a size
|
||||
* change here, but doing so is just silly.
|
||||
@@ -3580,6 +3588,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
mddev->in_sync = 1;
|
||||
del_timer_sync(&mddev->safemode_timer);
|
||||
}
|
||||
blk_set_stacking_limits(&mddev->queue->limits);
|
||||
pers->run(mddev);
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
mddev_resume(mddev);
|
||||
@@ -5258,7 +5267,7 @@ static void __md_stop_writes(struct mddev *mddev)
|
||||
|
||||
void md_stop_writes(struct mddev *mddev)
|
||||
{
|
||||
mddev_lock(mddev);
|
||||
mddev_lock_nointr(mddev);
|
||||
__md_stop_writes(mddev);
|
||||
mddev_unlock(mddev);
|
||||
}
|
||||
@@ -5291,20 +5300,35 @@ EXPORT_SYMBOL_GPL(md_stop);
|
||||
static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
|
||||
{
|
||||
int err = 0;
|
||||
int did_freeze = 0;
|
||||
|
||||
if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
|
||||
did_freeze = 1;
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
}
|
||||
if (mddev->sync_thread) {
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
/* Thread might be blocked waiting for metadata update
|
||||
* which will now never happen */
|
||||
wake_up_process(mddev->sync_thread->tsk);
|
||||
}
|
||||
mddev_unlock(mddev);
|
||||
wait_event(resync_wait, mddev->sync_thread == NULL);
|
||||
mddev_lock_nointr(mddev);
|
||||
|
||||
mutex_lock(&mddev->open_mutex);
|
||||
if (atomic_read(&mddev->openers) > !!bdev) {
|
||||
if (atomic_read(&mddev->openers) > !!bdev ||
|
||||
mddev->sync_thread ||
|
||||
(bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
|
||||
printk("md: %s still in use.\n",mdname(mddev));
|
||||
if (did_freeze) {
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
}
|
||||
err = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
|
||||
/* Someone opened the device since we flushed it
|
||||
* so page cache could be dirty and it is too late
|
||||
* to flush. So abort
|
||||
*/
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
return -EBUSY;
|
||||
}
|
||||
if (mddev->pers) {
|
||||
__md_stop_writes(mddev);
|
||||
|
||||
@@ -5315,7 +5339,7 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
|
||||
set_disk_ro(mddev->gendisk, 1);
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_state);
|
||||
err = 0;
|
||||
err = 0;
|
||||
}
|
||||
out:
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
@@ -5331,20 +5355,34 @@ static int do_md_stop(struct mddev * mddev, int mode,
|
||||
{
|
||||
struct gendisk *disk = mddev->gendisk;
|
||||
struct md_rdev *rdev;
|
||||
int did_freeze = 0;
|
||||
|
||||
if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
|
||||
did_freeze = 1;
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
}
|
||||
if (mddev->sync_thread) {
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
/* Thread might be blocked waiting for metadata update
|
||||
* which will now never happen */
|
||||
wake_up_process(mddev->sync_thread->tsk);
|
||||
}
|
||||
mddev_unlock(mddev);
|
||||
wait_event(resync_wait, mddev->sync_thread == NULL);
|
||||
mddev_lock_nointr(mddev);
|
||||
|
||||
mutex_lock(&mddev->open_mutex);
|
||||
if (atomic_read(&mddev->openers) > !!bdev ||
|
||||
mddev->sysfs_active) {
|
||||
mddev->sysfs_active ||
|
||||
mddev->sync_thread ||
|
||||
(bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
|
||||
printk("md: %s still in use.\n",mdname(mddev));
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
return -EBUSY;
|
||||
}
|
||||
if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
|
||||
/* Someone opened the device since we flushed it
|
||||
* so page cache could be dirty and it is too late
|
||||
* to flush. So abort
|
||||
*/
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
if (did_freeze) {
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
}
|
||||
return -EBUSY;
|
||||
}
|
||||
if (mddev->pers) {
|
||||
@@ -6551,7 +6589,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
wait_event(mddev->sb_wait,
|
||||
!test_bit(MD_CHANGE_DEVS, &mddev->flags) &&
|
||||
!test_bit(MD_CHANGE_PENDING, &mddev->flags));
|
||||
mddev_lock(mddev);
|
||||
mddev_lock_nointr(mddev);
|
||||
}
|
||||
} else {
|
||||
err = -EROFS;
|
||||
@@ -7361,9 +7399,6 @@ void md_do_sync(struct md_thread *thread)
|
||||
mddev->curr_resync = 2;
|
||||
|
||||
try_again:
|
||||
if (kthread_should_stop())
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
|
||||
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
|
||||
goto skip;
|
||||
for_each_mddev(mddev2, tmp) {
|
||||
@@ -7388,7 +7423,7 @@ void md_do_sync(struct md_thread *thread)
|
||||
* be caught by 'softlockup'
|
||||
*/
|
||||
prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
|
||||
if (!kthread_should_stop() &&
|
||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
|
||||
mddev2->curr_resync >= mddev->curr_resync) {
|
||||
printk(KERN_INFO "md: delaying %s of %s"
|
||||
" until %s has finished (they"
|
||||
@@ -7464,7 +7499,7 @@ void md_do_sync(struct md_thread *thread)
|
||||
last_check = 0;
|
||||
|
||||
if (j>2) {
|
||||
printk(KERN_INFO
|
||||
printk(KERN_INFO
|
||||
"md: resuming %s of %s from checkpoint.\n",
|
||||
desc, mdname(mddev));
|
||||
mddev->curr_resync = j;
|
||||
@@ -7501,7 +7536,8 @@ void md_do_sync(struct md_thread *thread)
|
||||
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
|
||||
}
|
||||
|
||||
while (j >= mddev->resync_max && !kthread_should_stop()) {
|
||||
while (j >= mddev->resync_max &&
|
||||
!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
|
||||
/* As this condition is controlled by user-space,
|
||||
* we can block indefinitely, so use '_interruptible'
|
||||
* to avoid triggering warnings.
|
||||
@@ -7509,17 +7545,18 @@ void md_do_sync(struct md_thread *thread)
|
||||
flush_signals(current); /* just in case */
|
||||
wait_event_interruptible(mddev->recovery_wait,
|
||||
mddev->resync_max > j
|
||||
|| kthread_should_stop());
|
||||
|| test_bit(MD_RECOVERY_INTR,
|
||||
&mddev->recovery));
|
||||
}
|
||||
|
||||
if (kthread_should_stop())
|
||||
goto interrupted;
|
||||
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
|
||||
break;
|
||||
|
||||
sectors = mddev->pers->sync_request(mddev, j, &skipped,
|
||||
currspeed < speed_min(mddev));
|
||||
if (sectors == 0) {
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!skipped) { /* actual IO requested */
|
||||
@@ -7556,10 +7593,8 @@ void md_do_sync(struct md_thread *thread)
|
||||
last_mark = next;
|
||||
}
|
||||
|
||||
|
||||
if (kthread_should_stop())
|
||||
goto interrupted;
|
||||
|
||||
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
|
||||
break;
|
||||
|
||||
/*
|
||||
* this loop exits only if either when we are slower than
|
||||
@@ -7582,11 +7617,12 @@ void md_do_sync(struct md_thread *thread)
|
||||
}
|
||||
}
|
||||
}
|
||||
printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
|
||||
printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc,
|
||||
test_bit(MD_RECOVERY_INTR, &mddev->recovery)
|
||||
? "interrupted" : "done");
|
||||
/*
|
||||
* this also signals 'finished resyncing' to md_stop
|
||||
*/
|
||||
out:
|
||||
blk_finish_plug(&plug);
|
||||
wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
|
||||
|
||||
@@ -7640,16 +7676,6 @@ void md_do_sync(struct md_thread *thread)
|
||||
set_bit(MD_RECOVERY_DONE, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
return;
|
||||
|
||||
interrupted:
|
||||
/*
|
||||
* got a signal, exit.
|
||||
*/
|
||||
printk(KERN_INFO
|
||||
"md: md_do_sync() got signal ... exiting\n");
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
goto out;
|
||||
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_do_sync);
|
||||
|
||||
@@ -7894,6 +7920,7 @@ void md_reap_sync_thread(struct mddev *mddev)
|
||||
|
||||
/* resync has finished, collect result */
|
||||
md_unregister_thread(&mddev->sync_thread);
|
||||
wake_up(&resync_wait);
|
||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
|
||||
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
|
||||
/* success...*/
|
||||
|
Reference in New Issue
Block a user