Merge tag 'for-5.4/block-2019-09-16' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: - Two NVMe pull requests: - ana log parse fix from Anton - nvme quirks support for Apple devices from Ben - fix missing bio completion tracing for multipath stack devices from Hannes and Mikhail - IP TOS settings for nvme rdma and tcp transports from Israel - rq_dma_dir cleanups from Israel - tracing for Get LBA Status command from Minwoo - Some nvme-tcp cleanups from Minwoo, Potnuri and Myself - Some consolidation between the fabrics transports for handling the CAP register - reset race with ns scanning fix for fabrics (move fabrics commands to a dedicated request queue with a different lifetime from the admin request queue)." - controller reset and namespace scan races fixes - nvme discovery log change uevent support - naming improvements from Keith - multiple discovery controllers reject fix from James - some regular cleanups from various people - Series fixing (and re-fixing) null_blk debug printing and nr_devices checks (André) - A few pull requests from Song, with fixes from Andy, Guoqing, Guilherme, Neil, Nigel, and Yufen. - REQ_OP_ZONE_RESET_ALL support (Chaitanya) - Bio merge handling unification (Christoph) - Pick default elevator correctly for devices with special needs (Damien) - Block stats fixes (Hou) - Timeout and support devices nbd fixes (Mike) - Series fixing races around elevator switching and device add/remove (Ming) - sed-opal cleanups (Revanth) - Per device weight support for BFQ (Fam) - Support for blk-iocost, a new model that can properly account cost of IO workloads. (Tejun) - blk-cgroup writeback fixes (Tejun) - paride queue init fixes (zhengbin) - blk_set_runtime_active() cleanup (Stanley) - Block segment mapping optimizations (Bart) - lightnvm fixes (Hans/Minwoo/YueHaibing) - Various little fixes and cleanups * tag 'for-5.4/block-2019-09-16' of git://git.kernel.dk/linux-block: (186 commits) null_blk: format pr_* logs with pr_fmt null_blk: match the type of parameter nr_devices null_blk: do not fail the module load with zero devices block: also check RQF_STATS in blk_mq_need_time_stamp() block: make rq sector size accessible for block stats bfq: Fix bfq linkage error raid5: use bio_end_sector in r5_next_bio raid5: remove STRIPE_OPS_REQ_PENDING md: add feature flag MD_FEATURE_RAID0_LAYOUT md/raid0: avoid RAID0 data corruption due to layout confusion. raid5: don't set STRIPE_HANDLE to stripe which is in batch list raid5: don't increment read_errors on EILSEQ return nvmet: fix a wrong error status returned in error log page nvme: send discovery log page change events to userspace nvme: add uevent variables for controller devices nvme: enable aen regardless of the presence of I/O queues nvme-fabrics: allow discovery subsystems accept a kato nvmet: Use PTR_ERR_OR_ZERO() in nvmet_init_discovery() nvme: Remove redundant assignment of cq vector nvme: Assign subsys instance from first ctrl ...
This commit is contained in:
@@ -105,8 +105,14 @@ struct closure_syncer {
|
||||
|
||||
static void closure_sync_fn(struct closure *cl)
|
||||
{
|
||||
cl->s->done = 1;
|
||||
wake_up_process(cl->s->task);
|
||||
struct closure_syncer *s = cl->s;
|
||||
struct task_struct *p;
|
||||
|
||||
rcu_read_lock();
|
||||
p = READ_ONCE(s->task);
|
||||
s->done = 1;
|
||||
wake_up_process(p);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
void __sched __closure_sync(struct closure *cl)
|
||||
|
@@ -178,10 +178,9 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
|
||||
while (size) {
|
||||
struct keybuf_key *w;
|
||||
unsigned int bytes = min(i->bytes, size);
|
||||
int err = copy_to_user(buf, i->buf, bytes);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
if (copy_to_user(buf, i->buf, bytes))
|
||||
return -EFAULT;
|
||||
|
||||
ret += bytes;
|
||||
buf += bytes;
|
||||
|
@@ -964,6 +964,7 @@ KTYPE(bch_cache_set_internal);
|
||||
|
||||
static int __bch_cache_cmp(const void *l, const void *r)
|
||||
{
|
||||
cond_resched();
|
||||
return *((uint16_t *)r) - *((uint16_t *)l);
|
||||
}
|
||||
|
||||
|
@@ -408,6 +408,7 @@ static int map_request(struct dm_rq_target_io *tio)
|
||||
ret = dm_dispatch_clone_request(clone, rq);
|
||||
if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
|
||||
blk_rq_unprep_clone(clone);
|
||||
blk_mq_cleanup_rq(clone);
|
||||
tio->ti->type->release_clone_rq(clone, &tio->info);
|
||||
tio->clone = NULL;
|
||||
return DM_MAPIO_REQUEUE;
|
||||
@@ -562,7 +563,7 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
|
||||
if (err)
|
||||
goto out_kfree_tag_set;
|
||||
|
||||
q = blk_mq_init_allocated_queue(md->tag_set, md->queue);
|
||||
q = blk_mq_init_allocated_queue(md->tag_set, md->queue, true);
|
||||
if (IS_ERR(q)) {
|
||||
err = PTR_ERR(q);
|
||||
goto out_tag_set;
|
||||
|
@@ -258,6 +258,11 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
|
||||
bio_sector < start_sector))
|
||||
goto out_of_bounds;
|
||||
|
||||
if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) {
|
||||
bio_io_error(bio);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (unlikely(bio_end_sector(bio) > end_sector)) {
|
||||
/* This bio crosses a device boundary, so we have to split it */
|
||||
struct bio *split = bio_split(bio, end_sector - bio_sector,
|
||||
|
@@ -376,6 +376,11 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
|
||||
struct mddev *mddev = q->queuedata;
|
||||
unsigned int sectors;
|
||||
|
||||
if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
|
||||
bio_io_error(bio);
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
|
||||
blk_queue_split(q, &bio);
|
||||
|
||||
if (mddev == NULL || mddev->pers == NULL) {
|
||||
@@ -1232,6 +1237,8 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
|
||||
mddev->new_layout = mddev->layout;
|
||||
mddev->new_chunk_sectors = mddev->chunk_sectors;
|
||||
}
|
||||
if (mddev->level == 0)
|
||||
mddev->layout = -1;
|
||||
|
||||
if (sb->state & (1<<MD_SB_CLEAN))
|
||||
mddev->recovery_cp = MaxSector;
|
||||
@@ -1647,6 +1654,10 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
|
||||
rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
|
||||
}
|
||||
|
||||
if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) &&
|
||||
sb->level != 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (!refdev) {
|
||||
ret = 1;
|
||||
} else {
|
||||
@@ -1757,6 +1768,10 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
|
||||
mddev->new_chunk_sectors = mddev->chunk_sectors;
|
||||
}
|
||||
|
||||
if (mddev->level == 0 &&
|
||||
!(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT))
|
||||
mddev->layout = -1;
|
||||
|
||||
if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
|
||||
set_bit(MD_HAS_JOURNAL, &mddev->flags);
|
||||
|
||||
@@ -1826,8 +1841,15 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
|
||||
if (!(le32_to_cpu(sb->feature_map) &
|
||||
MD_FEATURE_RECOVERY_BITMAP))
|
||||
rdev->saved_raid_disk = -1;
|
||||
} else
|
||||
set_bit(In_sync, &rdev->flags);
|
||||
} else {
|
||||
/*
|
||||
* If the array is FROZEN, then the device can't
|
||||
* be in_sync with rest of array.
|
||||
*/
|
||||
if (!test_bit(MD_RECOVERY_FROZEN,
|
||||
&mddev->recovery))
|
||||
set_bit(In_sync, &rdev->flags);
|
||||
}
|
||||
rdev->raid_disk = role;
|
||||
break;
|
||||
}
|
||||
@@ -3664,11 +3686,7 @@ int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
|
||||
return -EINVAL;
|
||||
if (decimals < 0)
|
||||
decimals = 0;
|
||||
while (decimals < scale) {
|
||||
result *= 10;
|
||||
decimals ++;
|
||||
}
|
||||
*res = result;
|
||||
*res = result * int_pow(10, scale - decimals);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -4155,12 +4173,17 @@ __ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
|
||||
* active-idle
|
||||
* like active, but no writes have been seen for a while (100msec).
|
||||
*
|
||||
* broken
|
||||
* RAID0/LINEAR-only: same as clean, but array is missing a member.
|
||||
* It's useful because RAID0/LINEAR mounted-arrays aren't stopped
|
||||
* when a member is gone, so this state will at least alert the
|
||||
* user that something is wrong.
|
||||
*/
|
||||
enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
|
||||
write_pending, active_idle, bad_word};
|
||||
write_pending, active_idle, broken, bad_word};
|
||||
static char *array_states[] = {
|
||||
"clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
|
||||
"write-pending", "active-idle", NULL };
|
||||
"write-pending", "active-idle", "broken", NULL };
|
||||
|
||||
static int match_word(const char *word, char **list)
|
||||
{
|
||||
@@ -4176,7 +4199,7 @@ array_state_show(struct mddev *mddev, char *page)
|
||||
{
|
||||
enum array_state st = inactive;
|
||||
|
||||
if (mddev->pers)
|
||||
if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
|
||||
switch(mddev->ro) {
|
||||
case 1:
|
||||
st = readonly;
|
||||
@@ -4196,7 +4219,10 @@ array_state_show(struct mddev *mddev, char *page)
|
||||
st = active;
|
||||
spin_unlock(&mddev->lock);
|
||||
}
|
||||
else {
|
||||
|
||||
if (test_bit(MD_BROKEN, &mddev->flags) && st == clean)
|
||||
st = broken;
|
||||
} else {
|
||||
if (list_empty(&mddev->disks) &&
|
||||
mddev->raid_disks == 0 &&
|
||||
mddev->dev_sectors == 0)
|
||||
@@ -4310,6 +4336,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
break;
|
||||
case write_pending:
|
||||
case active_idle:
|
||||
case broken:
|
||||
/* these cannot be set */
|
||||
break;
|
||||
}
|
||||
@@ -5182,6 +5209,34 @@ static struct md_sysfs_entry md_consistency_policy =
|
||||
__ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show,
|
||||
consistency_policy_store);
|
||||
|
||||
static ssize_t fail_last_dev_show(struct mddev *mddev, char *page)
|
||||
{
|
||||
return sprintf(page, "%d\n", mddev->fail_last_dev);
|
||||
}
|
||||
|
||||
/*
|
||||
* Setting fail_last_dev to true to allow last device to be forcibly removed
|
||||
* from RAID1/RAID10.
|
||||
*/
|
||||
static ssize_t
|
||||
fail_last_dev_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
{
|
||||
int ret;
|
||||
bool value;
|
||||
|
||||
ret = kstrtobool(buf, &value);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (value != mddev->fail_last_dev)
|
||||
mddev->fail_last_dev = value;
|
||||
|
||||
return len;
|
||||
}
|
||||
static struct md_sysfs_entry md_fail_last_dev =
|
||||
__ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
|
||||
fail_last_dev_store);
|
||||
|
||||
static struct attribute *md_default_attrs[] = {
|
||||
&md_level.attr,
|
||||
&md_layout.attr,
|
||||
@@ -5198,6 +5253,7 @@ static struct attribute *md_default_attrs[] = {
|
||||
&md_array_size.attr,
|
||||
&max_corr_read_errors.attr,
|
||||
&md_consistency_policy.attr,
|
||||
&md_fail_last_dev.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
@@ -5744,9 +5800,6 @@ int md_run(struct mddev *mddev)
|
||||
md_update_sb(mddev, 0);
|
||||
|
||||
md_new_event(mddev);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_state);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_action);
|
||||
sysfs_notify(&mddev->kobj, NULL, "degraded");
|
||||
return 0;
|
||||
|
||||
bitmap_abort:
|
||||
@@ -5767,6 +5820,7 @@ static int do_md_run(struct mddev *mddev)
|
||||
{
|
||||
int err;
|
||||
|
||||
set_bit(MD_NOT_READY, &mddev->flags);
|
||||
err = md_run(mddev);
|
||||
if (err)
|
||||
goto out;
|
||||
@@ -5787,9 +5841,14 @@ static int do_md_run(struct mddev *mddev)
|
||||
|
||||
set_capacity(mddev->gendisk, mddev->array_sectors);
|
||||
revalidate_disk(mddev->gendisk);
|
||||
clear_bit(MD_NOT_READY, &mddev->flags);
|
||||
mddev->changed = 1;
|
||||
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_state);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_action);
|
||||
sysfs_notify(&mddev->kobj, NULL, "degraded");
|
||||
out:
|
||||
clear_bit(MD_NOT_READY, &mddev->flags);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -6849,6 +6908,9 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
|
||||
mddev->external = 0;
|
||||
|
||||
mddev->layout = info->layout;
|
||||
if (mddev->level == 0)
|
||||
/* Cannot trust RAID0 layout info here */
|
||||
mddev->layout = -1;
|
||||
mddev->chunk_sectors = info->chunk_size >> 9;
|
||||
|
||||
if (mddev->persistent) {
|
||||
@@ -8900,6 +8962,7 @@ void md_check_recovery(struct mddev *mddev)
|
||||
|
||||
if (mddev_trylock(mddev)) {
|
||||
int spares = 0;
|
||||
bool try_set_sync = mddev->safemode != 0;
|
||||
|
||||
if (!mddev->external && mddev->safemode == 1)
|
||||
mddev->safemode = 0;
|
||||
@@ -8945,7 +9008,7 @@ void md_check_recovery(struct mddev *mddev)
|
||||
}
|
||||
}
|
||||
|
||||
if (!mddev->external && !mddev->in_sync) {
|
||||
if (try_set_sync && !mddev->external && !mddev->in_sync) {
|
||||
spin_lock(&mddev->lock);
|
||||
set_in_sync(mddev);
|
||||
spin_unlock(&mddev->lock);
|
||||
@@ -9043,7 +9106,8 @@ void md_reap_sync_thread(struct mddev *mddev)
|
||||
/* resync has finished, collect result */
|
||||
md_unregister_thread(&mddev->sync_thread);
|
||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
|
||||
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
|
||||
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
|
||||
mddev->degraded != mddev->raid_disks) {
|
||||
/* success...*/
|
||||
/* activate any spares */
|
||||
if (mddev->pers->spare_active(mddev)) {
|
||||
|
@@ -248,6 +248,12 @@ enum mddev_flags {
|
||||
MD_UPDATING_SB, /* md_check_recovery is updating the metadata
|
||||
* without explicitly holding reconfig_mutex.
|
||||
*/
|
||||
MD_NOT_READY, /* do_md_run() is active, so 'array_state'
|
||||
* must not report that array is ready yet
|
||||
*/
|
||||
MD_BROKEN, /* This is used in RAID-0/LINEAR only, to stop
|
||||
* I/O in case an array member is gone/failed.
|
||||
*/
|
||||
};
|
||||
|
||||
enum mddev_sb_flags {
|
||||
@@ -487,6 +493,7 @@ struct mddev {
|
||||
unsigned int good_device_nr; /* good device num within cluster raid */
|
||||
|
||||
bool has_superblocks:1;
|
||||
bool fail_last_dev:1;
|
||||
};
|
||||
|
||||
enum recovery_flags {
|
||||
@@ -735,6 +742,19 @@ extern void mddev_create_wb_pool(struct mddev *mddev, struct md_rdev *rdev,
|
||||
struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
|
||||
struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
|
||||
|
||||
static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type)
|
||||
{
|
||||
int flags = rdev->bdev->bd_disk->flags;
|
||||
|
||||
if (!(flags & GENHD_FL_UP)) {
|
||||
if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
|
||||
pr_warn("md: %s: %s array has a missing/failed member\n",
|
||||
mdname(rdev->mddev), md_type);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
|
||||
{
|
||||
int faulty = test_bit(Faulty, &rdev->flags);
|
||||
|
@@ -19,6 +19,9 @@
|
||||
#include "raid0.h"
|
||||
#include "raid5.h"
|
||||
|
||||
static int default_layout = 0;
|
||||
module_param(default_layout, int, 0644);
|
||||
|
||||
#define UNSUPPORTED_MDDEV_FLAGS \
|
||||
((1L << MD_HAS_JOURNAL) | \
|
||||
(1L << MD_JOURNAL_CLEAN) | \
|
||||
@@ -139,6 +142,22 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
|
||||
}
|
||||
pr_debug("md/raid0:%s: FINAL %d zones\n",
|
||||
mdname(mddev), conf->nr_strip_zones);
|
||||
|
||||
if (conf->nr_strip_zones == 1) {
|
||||
conf->layout = RAID0_ORIG_LAYOUT;
|
||||
} else if (mddev->layout == RAID0_ORIG_LAYOUT ||
|
||||
mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
|
||||
conf->layout = mddev->layout;
|
||||
} else if (default_layout == RAID0_ORIG_LAYOUT ||
|
||||
default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
|
||||
conf->layout = default_layout;
|
||||
} else {
|
||||
pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
|
||||
mdname(mddev));
|
||||
pr_err("md/raid0: please set raid.default_layout to 1 or 2\n");
|
||||
err = -ENOTSUPP;
|
||||
goto abort;
|
||||
}
|
||||
/*
|
||||
* now since we have the hard sector sizes, we can make sure
|
||||
* chunk size is a multiple of that sector size
|
||||
@@ -547,10 +566,12 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
|
||||
|
||||
static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
|
||||
{
|
||||
struct r0conf *conf = mddev->private;
|
||||
struct strip_zone *zone;
|
||||
struct md_rdev *tmp_dev;
|
||||
sector_t bio_sector;
|
||||
sector_t sector;
|
||||
sector_t orig_sector;
|
||||
unsigned chunk_sects;
|
||||
unsigned sectors;
|
||||
|
||||
@@ -584,8 +605,26 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
|
||||
bio = split;
|
||||
}
|
||||
|
||||
orig_sector = sector;
|
||||
zone = find_zone(mddev->private, §or);
|
||||
tmp_dev = map_sector(mddev, zone, sector, §or);
|
||||
switch (conf->layout) {
|
||||
case RAID0_ORIG_LAYOUT:
|
||||
tmp_dev = map_sector(mddev, zone, orig_sector, §or);
|
||||
break;
|
||||
case RAID0_ALT_MULTIZONE_LAYOUT:
|
||||
tmp_dev = map_sector(mddev, zone, sector, §or);
|
||||
break;
|
||||
default:
|
||||
WARN("md/raid0:%s: Invalid layout\n", mdname(mddev));
|
||||
bio_io_error(bio);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (unlikely(is_mddev_broken(tmp_dev, "raid0"))) {
|
||||
bio_io_error(bio);
|
||||
return true;
|
||||
}
|
||||
|
||||
bio_set_dev(bio, tmp_dev->bdev);
|
||||
bio->bi_iter.bi_sector = sector + zone->dev_start +
|
||||
tmp_dev->data_offset;
|
||||
|
@@ -8,11 +8,25 @@ struct strip_zone {
|
||||
int nb_dev; /* # of devices attached to the zone */
|
||||
};
|
||||
|
||||
/* Linux 3.14 (20d0189b101) made an unintended change to
|
||||
* the RAID0 layout for multi-zone arrays (where devices aren't all
|
||||
* the same size.
|
||||
* RAID0_ORIG_LAYOUT restores the original layout
|
||||
* RAID0_ALT_MULTIZONE_LAYOUT uses the altered layout
|
||||
* The layouts are identical when there is only one zone (all
|
||||
* devices the same size).
|
||||
*/
|
||||
|
||||
enum r0layout {
|
||||
RAID0_ORIG_LAYOUT = 1,
|
||||
RAID0_ALT_MULTIZONE_LAYOUT = 2,
|
||||
};
|
||||
struct r0conf {
|
||||
struct strip_zone *strip_zone;
|
||||
struct md_rdev **devlist; /* lists of rdevs, pointed to
|
||||
* by strip_zone->dev */
|
||||
int nr_strip_zones;
|
||||
enum r0layout layout;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@@ -447,19 +447,21 @@ static void raid1_end_write_request(struct bio *bio)
|
||||
/* We never try FailFast to WriteMostly devices */
|
||||
!test_bit(WriteMostly, &rdev->flags)) {
|
||||
md_error(r1_bio->mddev, rdev);
|
||||
if (!test_bit(Faulty, &rdev->flags))
|
||||
/* This is the only remaining device,
|
||||
* We need to retry the write without
|
||||
* FailFast
|
||||
*/
|
||||
set_bit(R1BIO_WriteError, &r1_bio->state);
|
||||
else {
|
||||
/* Finished with this branch */
|
||||
r1_bio->bios[mirror] = NULL;
|
||||
to_put = bio;
|
||||
}
|
||||
} else
|
||||
}
|
||||
|
||||
/*
|
||||
* When the device is faulty, it is not necessary to
|
||||
* handle write error.
|
||||
* For failfast, this is the only remaining device,
|
||||
* We need to retry the write without FailFast.
|
||||
*/
|
||||
if (!test_bit(Faulty, &rdev->flags))
|
||||
set_bit(R1BIO_WriteError, &r1_bio->state);
|
||||
else {
|
||||
/* Finished with this branch */
|
||||
r1_bio->bios[mirror] = NULL;
|
||||
to_put = bio;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Set R1BIO_Uptodate in our master bio, so that we
|
||||
@@ -872,8 +874,11 @@ static void flush_pending_writes(struct r1conf *conf)
|
||||
* backgroup IO calls must call raise_barrier. Once that returns
|
||||
* there is no normal IO happeing. It must arrange to call
|
||||
* lower_barrier when the particular background IO completes.
|
||||
*
|
||||
* If resync/recovery is interrupted, returns -EINTR;
|
||||
* Otherwise, returns 0.
|
||||
*/
|
||||
static sector_t raise_barrier(struct r1conf *conf, sector_t sector_nr)
|
||||
static int raise_barrier(struct r1conf *conf, sector_t sector_nr)
|
||||
{
|
||||
int idx = sector_to_idx(sector_nr);
|
||||
|
||||
@@ -1612,12 +1617,12 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
|
||||
/*
|
||||
* If it is not operational, then we have already marked it as dead
|
||||
* else if it is the last working disks, ignore the error, let the
|
||||
* next level up know.
|
||||
* else if it is the last working disks with "fail_last_dev == false",
|
||||
* ignore the error, let the next level up know.
|
||||
* else mark the drive as failed
|
||||
*/
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
if (test_bit(In_sync, &rdev->flags)
|
||||
if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
|
||||
&& (conf->raid_disks - mddev->degraded) == 1) {
|
||||
/*
|
||||
* Don't fail the drive, act as though we were just a
|
||||
@@ -1901,6 +1906,22 @@ static void abort_sync_write(struct mddev *mddev, struct r1bio *r1_bio)
|
||||
} while (sectors_to_go > 0);
|
||||
}
|
||||
|
||||
static void put_sync_write_buf(struct r1bio *r1_bio, int uptodate)
|
||||
{
|
||||
if (atomic_dec_and_test(&r1_bio->remaining)) {
|
||||
struct mddev *mddev = r1_bio->mddev;
|
||||
int s = r1_bio->sectors;
|
||||
|
||||
if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
|
||||
test_bit(R1BIO_WriteError, &r1_bio->state))
|
||||
reschedule_retry(r1_bio);
|
||||
else {
|
||||
put_buf(r1_bio);
|
||||
md_done_sync(mddev, s, uptodate);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void end_sync_write(struct bio *bio)
|
||||
{
|
||||
int uptodate = !bio->bi_status;
|
||||
@@ -1927,16 +1948,7 @@ static void end_sync_write(struct bio *bio)
|
||||
)
|
||||
set_bit(R1BIO_MadeGood, &r1_bio->state);
|
||||
|
||||
if (atomic_dec_and_test(&r1_bio->remaining)) {
|
||||
int s = r1_bio->sectors;
|
||||
if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
|
||||
test_bit(R1BIO_WriteError, &r1_bio->state))
|
||||
reschedule_retry(r1_bio);
|
||||
else {
|
||||
put_buf(r1_bio);
|
||||
md_done_sync(mddev, s, uptodate);
|
||||
}
|
||||
}
|
||||
put_sync_write_buf(r1_bio, uptodate);
|
||||
}
|
||||
|
||||
static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector,
|
||||
@@ -2219,17 +2231,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
|
||||
generic_make_request(wbio);
|
||||
}
|
||||
|
||||
if (atomic_dec_and_test(&r1_bio->remaining)) {
|
||||
/* if we're here, all write(s) have completed, so clean up */
|
||||
int s = r1_bio->sectors;
|
||||
if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
|
||||
test_bit(R1BIO_WriteError, &r1_bio->state))
|
||||
reschedule_retry(r1_bio);
|
||||
else {
|
||||
put_buf(r1_bio);
|
||||
md_done_sync(mddev, s, 1);
|
||||
}
|
||||
}
|
||||
put_sync_write_buf(r1_bio, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -3127,6 +3129,13 @@ static int raid1_run(struct mddev *mddev)
|
||||
!test_bit(In_sync, &conf->mirrors[i].rdev->flags) ||
|
||||
test_bit(Faulty, &conf->mirrors[i].rdev->flags))
|
||||
mddev->degraded++;
|
||||
/*
|
||||
* RAID1 needs at least one disk in active
|
||||
*/
|
||||
if (conf->raid_disks - mddev->degraded < 1) {
|
||||
ret = -EINVAL;
|
||||
goto abort;
|
||||
}
|
||||
|
||||
if (conf->raid_disks - mddev->degraded == 1)
|
||||
mddev->recovery_cp = MaxSector;
|
||||
@@ -3160,8 +3169,12 @@ static int raid1_run(struct mddev *mddev)
|
||||
ret = md_integrity_register(mddev);
|
||||
if (ret) {
|
||||
md_unregister_thread(&mddev->thread);
|
||||
raid1_free(mddev, conf);
|
||||
goto abort;
|
||||
}
|
||||
return 0;
|
||||
|
||||
abort:
|
||||
raid1_free(mddev, conf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@@ -465,19 +465,21 @@ static void raid10_end_write_request(struct bio *bio)
|
||||
if (test_bit(FailFast, &rdev->flags) &&
|
||||
(bio->bi_opf & MD_FAILFAST)) {
|
||||
md_error(rdev->mddev, rdev);
|
||||
if (!test_bit(Faulty, &rdev->flags))
|
||||
/* This is the only remaining device,
|
||||
* We need to retry the write without
|
||||
* FailFast
|
||||
*/
|
||||
set_bit(R10BIO_WriteError, &r10_bio->state);
|
||||
else {
|
||||
r10_bio->devs[slot].bio = NULL;
|
||||
to_put = bio;
|
||||
dec_rdev = 1;
|
||||
}
|
||||
} else
|
||||
}
|
||||
|
||||
/*
|
||||
* When the device is faulty, it is not necessary to
|
||||
* handle write error.
|
||||
* For failfast, this is the only remaining device,
|
||||
* We need to retry the write without FailFast.
|
||||
*/
|
||||
if (!test_bit(Faulty, &rdev->flags))
|
||||
set_bit(R10BIO_WriteError, &r10_bio->state);
|
||||
else {
|
||||
r10_bio->devs[slot].bio = NULL;
|
||||
to_put = bio;
|
||||
dec_rdev = 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
@@ -1638,12 +1640,12 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
|
||||
/*
|
||||
* If it is not operational, then we have already marked it as dead
|
||||
* else if it is the last working disks, ignore the error, let the
|
||||
* next level up know.
|
||||
* else if it is the last working disks with "fail_last_dev == false",
|
||||
* ignore the error, let the next level up know.
|
||||
* else mark the drive as failed
|
||||
*/
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
if (test_bit(In_sync, &rdev->flags)
|
||||
if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
|
||||
&& !enough(conf, rdev->raid_disk)) {
|
||||
/*
|
||||
* Don't fail the drive, just return an IO error.
|
||||
|
@@ -2526,7 +2526,8 @@ static void raid5_end_read_request(struct bio * bi)
|
||||
int set_bad = 0;
|
||||
|
||||
clear_bit(R5_UPTODATE, &sh->dev[i].flags);
|
||||
atomic_inc(&rdev->read_errors);
|
||||
if (!(bi->bi_status == BLK_STS_PROTECTION))
|
||||
atomic_inc(&rdev->read_errors);
|
||||
if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
|
||||
pr_warn_ratelimited(
|
||||
"md/raid:%s: read error on replacement device (sector %llu on %s).\n",
|
||||
@@ -2549,16 +2550,24 @@ static void raid5_end_read_request(struct bio * bi)
|
||||
(unsigned long long)s,
|
||||
bdn);
|
||||
} else if (atomic_read(&rdev->read_errors)
|
||||
> conf->max_nr_stripes)
|
||||
pr_warn("md/raid:%s: Too many read errors, failing device %s.\n",
|
||||
mdname(conf->mddev), bdn);
|
||||
else
|
||||
> conf->max_nr_stripes) {
|
||||
if (!test_bit(Faulty, &rdev->flags)) {
|
||||
pr_warn("md/raid:%s: %d read_errors > %d stripes\n",
|
||||
mdname(conf->mddev),
|
||||
atomic_read(&rdev->read_errors),
|
||||
conf->max_nr_stripes);
|
||||
pr_warn("md/raid:%s: Too many read errors, failing device %s.\n",
|
||||
mdname(conf->mddev), bdn);
|
||||
}
|
||||
} else
|
||||
retry = 1;
|
||||
if (set_bad && test_bit(In_sync, &rdev->flags)
|
||||
&& !test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
|
||||
retry = 1;
|
||||
if (retry)
|
||||
if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) {
|
||||
if (sh->qd_idx >= 0 && sh->pd_idx == i)
|
||||
set_bit(R5_ReadError, &sh->dev[i].flags);
|
||||
else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) {
|
||||
set_bit(R5_ReadError, &sh->dev[i].flags);
|
||||
clear_bit(R5_ReadNoMerge, &sh->dev[i].flags);
|
||||
} else
|
||||
@@ -4612,7 +4621,6 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
|
||||
(1 << STRIPE_FULL_WRITE) |
|
||||
(1 << STRIPE_BIOFILL_RUN) |
|
||||
(1 << STRIPE_COMPUTE_RUN) |
|
||||
(1 << STRIPE_OPS_REQ_PENDING) |
|
||||
(1 << STRIPE_DISCARD) |
|
||||
(1 << STRIPE_BATCH_READY) |
|
||||
(1 << STRIPE_BATCH_ERR) |
|
||||
@@ -5491,7 +5499,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
|
||||
return;
|
||||
|
||||
logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1);
|
||||
last_sector = bi->bi_iter.bi_sector + (bi->bi_iter.bi_size>>9);
|
||||
last_sector = bio_end_sector(bi);
|
||||
|
||||
bi->bi_next = NULL;
|
||||
|
||||
@@ -5718,7 +5726,8 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
|
||||
do_flush = false;
|
||||
}
|
||||
|
||||
set_bit(STRIPE_HANDLE, &sh->state);
|
||||
if (!sh->batch_head)
|
||||
set_bit(STRIPE_HANDLE, &sh->state);
|
||||
clear_bit(STRIPE_DELAYED, &sh->state);
|
||||
if ((!sh->batch_head || sh == sh->batch_head) &&
|
||||
(bi->bi_opf & REQ_SYNC) &&
|
||||
|
@@ -357,7 +357,6 @@ enum {
|
||||
STRIPE_FULL_WRITE, /* all blocks are set to be overwritten */
|
||||
STRIPE_BIOFILL_RUN,
|
||||
STRIPE_COMPUTE_RUN,
|
||||
STRIPE_OPS_REQ_PENDING,
|
||||
STRIPE_ON_UNPLUG_LIST,
|
||||
STRIPE_DISCARD,
|
||||
STRIPE_ON_RELEASE_LIST,
|
||||
@@ -493,9 +492,7 @@ struct disk_info {
|
||||
*/
|
||||
static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector)
|
||||
{
|
||||
int sectors = bio_sectors(bio);
|
||||
|
||||
if (bio->bi_iter.bi_sector + sectors < sector + STRIPE_SECTORS)
|
||||
if (bio_end_sector(bio) < sector + STRIPE_SECTORS)
|
||||
return bio->bi_next;
|
||||
else
|
||||
return NULL;
|
||||
|
Reference in New Issue
Block a user