Merge tag 'for-5.4/block-2019-09-16' of git://git.kernel.dk/linux-block

Pull block updates from Jens Axboe:

 - Two NVMe pull requests:
     - ana log parse fix from Anton
     - nvme quirks support for Apple devices from Ben
     - fix missing bio completion tracing for multipath stack devices
       from Hannes and Mikhail
     - IP TOS settings for nvme rdma and tcp transports from Israel
     - rq_dma_dir cleanups from Israel
     - tracing for Get LBA Status command from Minwoo
     - Some nvme-tcp cleanups from Minwoo, Potnuri and Myself
     - Some consolidation between the fabrics transports for handling
       the CAP register
     - reset race with ns scanning fix for fabrics (move fabrics
       commands to a dedicated request queue with a different lifetime
       from the admin request queue)."
     - controller reset and namespace scan races fixes
     - nvme discovery log change uevent support
     - naming improvements from Keith
     - multiple discovery controllers reject fix from James
     - some regular cleanups from various people

 - Series fixing (and re-fixing) null_blk debug printing and nr_devices
   checks (André)

 - A few pull requests from Song, with fixes from Andy, Guoqing,
   Guilherme, Neil, Nigel, and Yufen.

 - REQ_OP_ZONE_RESET_ALL support (Chaitanya)

 - Bio merge handling unification (Christoph)

 - Pick default elevator correctly for devices with special needs
   (Damien)

 - Block stats fixes (Hou)

 - Timeout and support devices nbd fixes (Mike)

 - Series fixing races around elevator switching and device add/remove
   (Ming)

 - sed-opal cleanups (Revanth)

 - Per device weight support for BFQ (Fam)

 - Support for blk-iocost, a new model that can properly account cost of
   IO workloads. (Tejun)

 - blk-cgroup writeback fixes (Tejun)

 - paride queue init fixes (zhengbin)

 - blk_set_runtime_active() cleanup (Stanley)

 - Block segment mapping optimizations (Bart)

 - lightnvm fixes (Hans/Minwoo/YueHaibing)

 - Various little fixes and cleanups

* tag 'for-5.4/block-2019-09-16' of git://git.kernel.dk/linux-block: (186 commits)
  null_blk: format pr_* logs with pr_fmt
  null_blk: match the type of parameter nr_devices
  null_blk: do not fail the module load with zero devices
  block: also check RQF_STATS in blk_mq_need_time_stamp()
  block: make rq sector size accessible for block stats
  bfq: Fix bfq linkage error
  raid5: use bio_end_sector in r5_next_bio
  raid5: remove STRIPE_OPS_REQ_PENDING
  md: add feature flag MD_FEATURE_RAID0_LAYOUT
  md/raid0: avoid RAID0 data corruption due to layout confusion.
  raid5: don't set STRIPE_HANDLE to stripe which is in batch list
  raid5: don't increment read_errors on EILSEQ return
  nvmet: fix a wrong error status returned in error log page
  nvme: send discovery log page change events to userspace
  nvme: add uevent variables for controller devices
  nvme: enable aen regardless of the presence of I/O queues
  nvme-fabrics: allow discovery subsystems accept a kato
  nvmet: Use PTR_ERR_OR_ZERO() in nvmet_init_discovery()
  nvme: Remove redundant assignment of cq vector
  nvme: Assign subsys instance from first ctrl
  ...
This commit is contained in:
Linus Torvalds
2019-09-17 16:57:47 -07:00
107개의 변경된 파일5894개의 추가작업 그리고 1282개의 파일을 삭제

파일 보기

@@ -105,8 +105,14 @@ struct closure_syncer {
static void closure_sync_fn(struct closure *cl)
{
cl->s->done = 1;
wake_up_process(cl->s->task);
struct closure_syncer *s = cl->s;
struct task_struct *p;
rcu_read_lock();
p = READ_ONCE(s->task);
s->done = 1;
wake_up_process(p);
rcu_read_unlock();
}
void __sched __closure_sync(struct closure *cl)

파일 보기

@@ -178,10 +178,9 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
while (size) {
struct keybuf_key *w;
unsigned int bytes = min(i->bytes, size);
int err = copy_to_user(buf, i->buf, bytes);
if (err)
return err;
if (copy_to_user(buf, i->buf, bytes))
return -EFAULT;
ret += bytes;
buf += bytes;

파일 보기

@@ -964,6 +964,7 @@ KTYPE(bch_cache_set_internal);
static int __bch_cache_cmp(const void *l, const void *r)
{
cond_resched();
return *((uint16_t *)r) - *((uint16_t *)l);
}

파일 보기

@@ -408,6 +408,7 @@ static int map_request(struct dm_rq_target_io *tio)
ret = dm_dispatch_clone_request(clone, rq);
if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
blk_rq_unprep_clone(clone);
blk_mq_cleanup_rq(clone);
tio->ti->type->release_clone_rq(clone, &tio->info);
tio->clone = NULL;
return DM_MAPIO_REQUEUE;
@@ -562,7 +563,7 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
if (err)
goto out_kfree_tag_set;
q = blk_mq_init_allocated_queue(md->tag_set, md->queue);
q = blk_mq_init_allocated_queue(md->tag_set, md->queue, true);
if (IS_ERR(q)) {
err = PTR_ERR(q);
goto out_tag_set;

파일 보기

@@ -258,6 +258,11 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
bio_sector < start_sector))
goto out_of_bounds;
if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) {
bio_io_error(bio);
return true;
}
if (unlikely(bio_end_sector(bio) > end_sector)) {
/* This bio crosses a device boundary, so we have to split it */
struct bio *split = bio_split(bio, end_sector - bio_sector,

파일 보기

@@ -376,6 +376,11 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
struct mddev *mddev = q->queuedata;
unsigned int sectors;
if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
bio_io_error(bio);
return BLK_QC_T_NONE;
}
blk_queue_split(q, &bio);
if (mddev == NULL || mddev->pers == NULL) {
@@ -1232,6 +1237,8 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
mddev->new_layout = mddev->layout;
mddev->new_chunk_sectors = mddev->chunk_sectors;
}
if (mddev->level == 0)
mddev->layout = -1;
if (sb->state & (1<<MD_SB_CLEAN))
mddev->recovery_cp = MaxSector;
@@ -1647,6 +1654,10 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
}
if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) &&
sb->level != 0)
return -EINVAL;
if (!refdev) {
ret = 1;
} else {
@@ -1757,6 +1768,10 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
mddev->new_chunk_sectors = mddev->chunk_sectors;
}
if (mddev->level == 0 &&
!(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT))
mddev->layout = -1;
if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
set_bit(MD_HAS_JOURNAL, &mddev->flags);
@@ -1826,8 +1841,15 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
if (!(le32_to_cpu(sb->feature_map) &
MD_FEATURE_RECOVERY_BITMAP))
rdev->saved_raid_disk = -1;
} else
set_bit(In_sync, &rdev->flags);
} else {
/*
* If the array is FROZEN, then the device can't
* be in_sync with rest of array.
*/
if (!test_bit(MD_RECOVERY_FROZEN,
&mddev->recovery))
set_bit(In_sync, &rdev->flags);
}
rdev->raid_disk = role;
break;
}
@@ -3664,11 +3686,7 @@ int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
return -EINVAL;
if (decimals < 0)
decimals = 0;
while (decimals < scale) {
result *= 10;
decimals ++;
}
*res = result;
*res = result * int_pow(10, scale - decimals);
return 0;
}
@@ -4155,12 +4173,17 @@ __ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
* active-idle
* like active, but no writes have been seen for a while (100msec).
*
* broken
* RAID0/LINEAR-only: same as clean, but array is missing a member.
* It's useful because RAID0/LINEAR mounted-arrays aren't stopped
* when a member is gone, so this state will at least alert the
* user that something is wrong.
*/
enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
write_pending, active_idle, bad_word};
write_pending, active_idle, broken, bad_word};
static char *array_states[] = {
"clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
"write-pending", "active-idle", NULL };
"write-pending", "active-idle", "broken", NULL };
static int match_word(const char *word, char **list)
{
@@ -4176,7 +4199,7 @@ array_state_show(struct mddev *mddev, char *page)
{
enum array_state st = inactive;
if (mddev->pers)
if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
switch(mddev->ro) {
case 1:
st = readonly;
@@ -4196,7 +4219,10 @@ array_state_show(struct mddev *mddev, char *page)
st = active;
spin_unlock(&mddev->lock);
}
else {
if (test_bit(MD_BROKEN, &mddev->flags) && st == clean)
st = broken;
} else {
if (list_empty(&mddev->disks) &&
mddev->raid_disks == 0 &&
mddev->dev_sectors == 0)
@@ -4310,6 +4336,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
break;
case write_pending:
case active_idle:
case broken:
/* these cannot be set */
break;
}
@@ -5182,6 +5209,34 @@ static struct md_sysfs_entry md_consistency_policy =
__ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show,
consistency_policy_store);
static ssize_t fail_last_dev_show(struct mddev *mddev, char *page)
{
return sprintf(page, "%d\n", mddev->fail_last_dev);
}
/*
* Setting fail_last_dev to true to allow last device to be forcibly removed
* from RAID1/RAID10.
*/
static ssize_t
fail_last_dev_store(struct mddev *mddev, const char *buf, size_t len)
{
int ret;
bool value;
ret = kstrtobool(buf, &value);
if (ret)
return ret;
if (value != mddev->fail_last_dev)
mddev->fail_last_dev = value;
return len;
}
static struct md_sysfs_entry md_fail_last_dev =
__ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
fail_last_dev_store);
static struct attribute *md_default_attrs[] = {
&md_level.attr,
&md_layout.attr,
@@ -5198,6 +5253,7 @@ static struct attribute *md_default_attrs[] = {
&md_array_size.attr,
&max_corr_read_errors.attr,
&md_consistency_policy.attr,
&md_fail_last_dev.attr,
NULL,
};
@@ -5744,9 +5800,6 @@ int md_run(struct mddev *mddev)
md_update_sb(mddev, 0);
md_new_event(mddev);
sysfs_notify_dirent_safe(mddev->sysfs_state);
sysfs_notify_dirent_safe(mddev->sysfs_action);
sysfs_notify(&mddev->kobj, NULL, "degraded");
return 0;
bitmap_abort:
@@ -5767,6 +5820,7 @@ static int do_md_run(struct mddev *mddev)
{
int err;
set_bit(MD_NOT_READY, &mddev->flags);
err = md_run(mddev);
if (err)
goto out;
@@ -5787,9 +5841,14 @@ static int do_md_run(struct mddev *mddev)
set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk);
clear_bit(MD_NOT_READY, &mddev->flags);
mddev->changed = 1;
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
sysfs_notify_dirent_safe(mddev->sysfs_state);
sysfs_notify_dirent_safe(mddev->sysfs_action);
sysfs_notify(&mddev->kobj, NULL, "degraded");
out:
clear_bit(MD_NOT_READY, &mddev->flags);
return err;
}
@@ -6849,6 +6908,9 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
mddev->external = 0;
mddev->layout = info->layout;
if (mddev->level == 0)
/* Cannot trust RAID0 layout info here */
mddev->layout = -1;
mddev->chunk_sectors = info->chunk_size >> 9;
if (mddev->persistent) {
@@ -8900,6 +8962,7 @@ void md_check_recovery(struct mddev *mddev)
if (mddev_trylock(mddev)) {
int spares = 0;
bool try_set_sync = mddev->safemode != 0;
if (!mddev->external && mddev->safemode == 1)
mddev->safemode = 0;
@@ -8945,7 +9008,7 @@ void md_check_recovery(struct mddev *mddev)
}
}
if (!mddev->external && !mddev->in_sync) {
if (try_set_sync && !mddev->external && !mddev->in_sync) {
spin_lock(&mddev->lock);
set_in_sync(mddev);
spin_unlock(&mddev->lock);
@@ -9043,7 +9106,8 @@ void md_reap_sync_thread(struct mddev *mddev)
/* resync has finished, collect result */
md_unregister_thread(&mddev->sync_thread);
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
mddev->degraded != mddev->raid_disks) {
/* success...*/
/* activate any spares */
if (mddev->pers->spare_active(mddev)) {

파일 보기

@@ -248,6 +248,12 @@ enum mddev_flags {
MD_UPDATING_SB, /* md_check_recovery is updating the metadata
* without explicitly holding reconfig_mutex.
*/
MD_NOT_READY, /* do_md_run() is active, so 'array_state'
* must not report that array is ready yet
*/
MD_BROKEN, /* This is used in RAID-0/LINEAR only, to stop
* I/O in case an array member is gone/failed.
*/
};
enum mddev_sb_flags {
@@ -487,6 +493,7 @@ struct mddev {
unsigned int good_device_nr; /* good device num within cluster raid */
bool has_superblocks:1;
bool fail_last_dev:1;
};
enum recovery_flags {
@@ -735,6 +742,19 @@ extern void mddev_create_wb_pool(struct mddev *mddev, struct md_rdev *rdev,
struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type)
{
int flags = rdev->bdev->bd_disk->flags;
if (!(flags & GENHD_FL_UP)) {
if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
pr_warn("md: %s: %s array has a missing/failed member\n",
mdname(rdev->mddev), md_type);
return true;
}
return false;
}
static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
{
int faulty = test_bit(Faulty, &rdev->flags);

파일 보기

@@ -19,6 +19,9 @@
#include "raid0.h"
#include "raid5.h"
static int default_layout = 0;
module_param(default_layout, int, 0644);
#define UNSUPPORTED_MDDEV_FLAGS \
((1L << MD_HAS_JOURNAL) | \
(1L << MD_JOURNAL_CLEAN) | \
@@ -139,6 +142,22 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
}
pr_debug("md/raid0:%s: FINAL %d zones\n",
mdname(mddev), conf->nr_strip_zones);
if (conf->nr_strip_zones == 1) {
conf->layout = RAID0_ORIG_LAYOUT;
} else if (mddev->layout == RAID0_ORIG_LAYOUT ||
mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
conf->layout = mddev->layout;
} else if (default_layout == RAID0_ORIG_LAYOUT ||
default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
conf->layout = default_layout;
} else {
pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
mdname(mddev));
pr_err("md/raid0: please set raid.default_layout to 1 or 2\n");
err = -ENOTSUPP;
goto abort;
}
/*
* now since we have the hard sector sizes, we can make sure
* chunk size is a multiple of that sector size
@@ -547,10 +566,12 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
{
struct r0conf *conf = mddev->private;
struct strip_zone *zone;
struct md_rdev *tmp_dev;
sector_t bio_sector;
sector_t sector;
sector_t orig_sector;
unsigned chunk_sects;
unsigned sectors;
@@ -584,8 +605,26 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
bio = split;
}
orig_sector = sector;
zone = find_zone(mddev->private, &sector);
tmp_dev = map_sector(mddev, zone, sector, &sector);
switch (conf->layout) {
case RAID0_ORIG_LAYOUT:
tmp_dev = map_sector(mddev, zone, orig_sector, &sector);
break;
case RAID0_ALT_MULTIZONE_LAYOUT:
tmp_dev = map_sector(mddev, zone, sector, &sector);
break;
default:
WARN("md/raid0:%s: Invalid layout\n", mdname(mddev));
bio_io_error(bio);
return true;
}
if (unlikely(is_mddev_broken(tmp_dev, "raid0"))) {
bio_io_error(bio);
return true;
}
bio_set_dev(bio, tmp_dev->bdev);
bio->bi_iter.bi_sector = sector + zone->dev_start +
tmp_dev->data_offset;

파일 보기

@@ -8,11 +8,25 @@ struct strip_zone {
int nb_dev; /* # of devices attached to the zone */
};
/* Linux 3.14 (20d0189b101) made an unintended change to
* the RAID0 layout for multi-zone arrays (where devices aren't all
* the same size.
* RAID0_ORIG_LAYOUT restores the original layout
* RAID0_ALT_MULTIZONE_LAYOUT uses the altered layout
* The layouts are identical when there is only one zone (all
* devices the same size).
*/
enum r0layout {
RAID0_ORIG_LAYOUT = 1,
RAID0_ALT_MULTIZONE_LAYOUT = 2,
};
struct r0conf {
struct strip_zone *strip_zone;
struct md_rdev **devlist; /* lists of rdevs, pointed to
* by strip_zone->dev */
int nr_strip_zones;
enum r0layout layout;
};
#endif

파일 보기

@@ -447,19 +447,21 @@ static void raid1_end_write_request(struct bio *bio)
/* We never try FailFast to WriteMostly devices */
!test_bit(WriteMostly, &rdev->flags)) {
md_error(r1_bio->mddev, rdev);
if (!test_bit(Faulty, &rdev->flags))
/* This is the only remaining device,
* We need to retry the write without
* FailFast
*/
set_bit(R1BIO_WriteError, &r1_bio->state);
else {
/* Finished with this branch */
r1_bio->bios[mirror] = NULL;
to_put = bio;
}
} else
}
/*
* When the device is faulty, it is not necessary to
* handle write error.
* For failfast, this is the only remaining device,
* We need to retry the write without FailFast.
*/
if (!test_bit(Faulty, &rdev->flags))
set_bit(R1BIO_WriteError, &r1_bio->state);
else {
/* Finished with this branch */
r1_bio->bios[mirror] = NULL;
to_put = bio;
}
} else {
/*
* Set R1BIO_Uptodate in our master bio, so that we
@@ -872,8 +874,11 @@ static void flush_pending_writes(struct r1conf *conf)
* backgroup IO calls must call raise_barrier. Once that returns
* there is no normal IO happeing. It must arrange to call
* lower_barrier when the particular background IO completes.
*
* If resync/recovery is interrupted, returns -EINTR;
* Otherwise, returns 0.
*/
static sector_t raise_barrier(struct r1conf *conf, sector_t sector_nr)
static int raise_barrier(struct r1conf *conf, sector_t sector_nr)
{
int idx = sector_to_idx(sector_nr);
@@ -1612,12 +1617,12 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
/*
* If it is not operational, then we have already marked it as dead
* else if it is the last working disks, ignore the error, let the
* next level up know.
* else if it is the last working disks with "fail_last_dev == false",
* ignore the error, let the next level up know.
* else mark the drive as failed
*/
spin_lock_irqsave(&conf->device_lock, flags);
if (test_bit(In_sync, &rdev->flags)
if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
&& (conf->raid_disks - mddev->degraded) == 1) {
/*
* Don't fail the drive, act as though we were just a
@@ -1901,6 +1906,22 @@ static void abort_sync_write(struct mddev *mddev, struct r1bio *r1_bio)
} while (sectors_to_go > 0);
}
static void put_sync_write_buf(struct r1bio *r1_bio, int uptodate)
{
if (atomic_dec_and_test(&r1_bio->remaining)) {
struct mddev *mddev = r1_bio->mddev;
int s = r1_bio->sectors;
if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
test_bit(R1BIO_WriteError, &r1_bio->state))
reschedule_retry(r1_bio);
else {
put_buf(r1_bio);
md_done_sync(mddev, s, uptodate);
}
}
}
static void end_sync_write(struct bio *bio)
{
int uptodate = !bio->bi_status;
@@ -1927,16 +1948,7 @@ static void end_sync_write(struct bio *bio)
)
set_bit(R1BIO_MadeGood, &r1_bio->state);
if (atomic_dec_and_test(&r1_bio->remaining)) {
int s = r1_bio->sectors;
if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
test_bit(R1BIO_WriteError, &r1_bio->state))
reschedule_retry(r1_bio);
else {
put_buf(r1_bio);
md_done_sync(mddev, s, uptodate);
}
}
put_sync_write_buf(r1_bio, uptodate);
}
static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector,
@@ -2219,17 +2231,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
generic_make_request(wbio);
}
if (atomic_dec_and_test(&r1_bio->remaining)) {
/* if we're here, all write(s) have completed, so clean up */
int s = r1_bio->sectors;
if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
test_bit(R1BIO_WriteError, &r1_bio->state))
reschedule_retry(r1_bio);
else {
put_buf(r1_bio);
md_done_sync(mddev, s, 1);
}
}
put_sync_write_buf(r1_bio, 1);
}
/*
@@ -3127,6 +3129,13 @@ static int raid1_run(struct mddev *mddev)
!test_bit(In_sync, &conf->mirrors[i].rdev->flags) ||
test_bit(Faulty, &conf->mirrors[i].rdev->flags))
mddev->degraded++;
/*
* RAID1 needs at least one disk in active
*/
if (conf->raid_disks - mddev->degraded < 1) {
ret = -EINVAL;
goto abort;
}
if (conf->raid_disks - mddev->degraded == 1)
mddev->recovery_cp = MaxSector;
@@ -3160,8 +3169,12 @@ static int raid1_run(struct mddev *mddev)
ret = md_integrity_register(mddev);
if (ret) {
md_unregister_thread(&mddev->thread);
raid1_free(mddev, conf);
goto abort;
}
return 0;
abort:
raid1_free(mddev, conf);
return ret;
}

파일 보기

@@ -465,19 +465,21 @@ static void raid10_end_write_request(struct bio *bio)
if (test_bit(FailFast, &rdev->flags) &&
(bio->bi_opf & MD_FAILFAST)) {
md_error(rdev->mddev, rdev);
if (!test_bit(Faulty, &rdev->flags))
/* This is the only remaining device,
* We need to retry the write without
* FailFast
*/
set_bit(R10BIO_WriteError, &r10_bio->state);
else {
r10_bio->devs[slot].bio = NULL;
to_put = bio;
dec_rdev = 1;
}
} else
}
/*
* When the device is faulty, it is not necessary to
* handle write error.
* For failfast, this is the only remaining device,
* We need to retry the write without FailFast.
*/
if (!test_bit(Faulty, &rdev->flags))
set_bit(R10BIO_WriteError, &r10_bio->state);
else {
r10_bio->devs[slot].bio = NULL;
to_put = bio;
dec_rdev = 1;
}
}
} else {
/*
@@ -1638,12 +1640,12 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
/*
* If it is not operational, then we have already marked it as dead
* else if it is the last working disks, ignore the error, let the
* next level up know.
* else if it is the last working disks with "fail_last_dev == false",
* ignore the error, let the next level up know.
* else mark the drive as failed
*/
spin_lock_irqsave(&conf->device_lock, flags);
if (test_bit(In_sync, &rdev->flags)
if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
&& !enough(conf, rdev->raid_disk)) {
/*
* Don't fail the drive, just return an IO error.

파일 보기

@@ -2526,7 +2526,8 @@ static void raid5_end_read_request(struct bio * bi)
int set_bad = 0;
clear_bit(R5_UPTODATE, &sh->dev[i].flags);
atomic_inc(&rdev->read_errors);
if (!(bi->bi_status == BLK_STS_PROTECTION))
atomic_inc(&rdev->read_errors);
if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
pr_warn_ratelimited(
"md/raid:%s: read error on replacement device (sector %llu on %s).\n",
@@ -2549,16 +2550,24 @@ static void raid5_end_read_request(struct bio * bi)
(unsigned long long)s,
bdn);
} else if (atomic_read(&rdev->read_errors)
> conf->max_nr_stripes)
pr_warn("md/raid:%s: Too many read errors, failing device %s.\n",
mdname(conf->mddev), bdn);
else
> conf->max_nr_stripes) {
if (!test_bit(Faulty, &rdev->flags)) {
pr_warn("md/raid:%s: %d read_errors > %d stripes\n",
mdname(conf->mddev),
atomic_read(&rdev->read_errors),
conf->max_nr_stripes);
pr_warn("md/raid:%s: Too many read errors, failing device %s.\n",
mdname(conf->mddev), bdn);
}
} else
retry = 1;
if (set_bad && test_bit(In_sync, &rdev->flags)
&& !test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
retry = 1;
if (retry)
if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) {
if (sh->qd_idx >= 0 && sh->pd_idx == i)
set_bit(R5_ReadError, &sh->dev[i].flags);
else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) {
set_bit(R5_ReadError, &sh->dev[i].flags);
clear_bit(R5_ReadNoMerge, &sh->dev[i].flags);
} else
@@ -4612,7 +4621,6 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
(1 << STRIPE_FULL_WRITE) |
(1 << STRIPE_BIOFILL_RUN) |
(1 << STRIPE_COMPUTE_RUN) |
(1 << STRIPE_OPS_REQ_PENDING) |
(1 << STRIPE_DISCARD) |
(1 << STRIPE_BATCH_READY) |
(1 << STRIPE_BATCH_ERR) |
@@ -5491,7 +5499,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
return;
logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1);
last_sector = bi->bi_iter.bi_sector + (bi->bi_iter.bi_size>>9);
last_sector = bio_end_sector(bi);
bi->bi_next = NULL;
@@ -5718,7 +5726,8 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
do_flush = false;
}
set_bit(STRIPE_HANDLE, &sh->state);
if (!sh->batch_head)
set_bit(STRIPE_HANDLE, &sh->state);
clear_bit(STRIPE_DELAYED, &sh->state);
if ((!sh->batch_head || sh == sh->batch_head) &&
(bi->bi_opf & REQ_SYNC) &&

파일 보기

@@ -357,7 +357,6 @@ enum {
STRIPE_FULL_WRITE, /* all blocks are set to be overwritten */
STRIPE_BIOFILL_RUN,
STRIPE_COMPUTE_RUN,
STRIPE_OPS_REQ_PENDING,
STRIPE_ON_UNPLUG_LIST,
STRIPE_DISCARD,
STRIPE_ON_RELEASE_LIST,
@@ -493,9 +492,7 @@ struct disk_info {
*/
static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector)
{
int sectors = bio_sectors(bio);
if (bio->bi_iter.bi_sector + sectors < sector + STRIPE_SECTORS)
if (bio_end_sector(bio) < sector + STRIPE_SECTORS)
return bio->bi_next;
else
return NULL;