Merge branch 'for-3.14/core' of git://git.kernel.dk/linux-block
Pull core block IO changes from Jens Axboe: "The major piece in here is the immutable bio_ve series from Kent, the rest is fairly minor. It was supposed to go in last round, but various issues pushed it to this release instead. The pull request contains: - Various smaller blk-mq fixes from different folks. Nothing major here, just minor fixes and cleanups. - Fix for a memory leak in the error path in the block ioctl code from Christian Engelmayer. - Header export fix from CaiZhiyong. - Finally the immutable biovec changes from Kent Overstreet. This enables some nice future work on making arbitrarily sized bios possible, and splitting more efficient. Related fixes to immutable bio_vecs: - dm-cache immutable fixup from Mike Snitzer. - btrfs immutable fixup from Muthu Kumar. - bio-integrity fix from Nic Bellinger, which is also going to stable" * 'for-3.14/core' of git://git.kernel.dk/linux-block: (44 commits) xtensa: fixup simdisk driver to work with immutable bio_vecs block/blk-mq-cpu.c: use hotcpu_notifier() blk-mq: for_each_* macro correctness block: Fix memory leak in rw_copy_check_uvector() handling bio-integrity: Fix bio_integrity_verify segment start bug block: remove unrelated header files and export symbol blk-mq: uses page->list incorrectly blk-mq: use __smp_call_function_single directly btrfs: fix missing increment of bi_remaining Revert "block: Warn and free bio if bi_end_io is not set" block: Warn and free bio if bi_end_io is not set blk-mq: fix initializing request's start time block: blk-mq: don't export blk_mq_free_queue() block: blk-mq: make blk_sync_queue support mq block: blk-mq: support draining mq queue dm cache: increment bi_remaining when bi_end_io is restored block: fixup for generic bio chaining block: Really silence spurious compiler warnings block: Silence spurious compiler warnings block: Kill bio_pair_split() ...
This commit is contained in:
@@ -1152,14 +1152,12 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
||||
kfree(plug);
|
||||
}
|
||||
|
||||
static void make_request(struct mddev *mddev, struct bio * bio)
|
||||
static void __make_request(struct mddev *mddev, struct bio *bio)
|
||||
{
|
||||
struct r10conf *conf = mddev->private;
|
||||
struct r10bio *r10_bio;
|
||||
struct bio *read_bio;
|
||||
int i;
|
||||
sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
|
||||
int chunk_sects = chunk_mask + 1;
|
||||
const int rw = bio_data_dir(bio);
|
||||
const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
|
||||
const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
|
||||
@@ -1174,88 +1172,27 @@ static void make_request(struct mddev *mddev, struct bio * bio)
|
||||
int max_sectors;
|
||||
int sectors;
|
||||
|
||||
if (unlikely(bio->bi_rw & REQ_FLUSH)) {
|
||||
md_flush_request(mddev, bio);
|
||||
return;
|
||||
}
|
||||
|
||||
/* If this request crosses a chunk boundary, we need to
|
||||
* split it. This will only happen for 1 PAGE (or less) requests.
|
||||
*/
|
||||
if (unlikely((bio->bi_sector & chunk_mask) + bio_sectors(bio)
|
||||
> chunk_sects
|
||||
&& (conf->geo.near_copies < conf->geo.raid_disks
|
||||
|| conf->prev.near_copies < conf->prev.raid_disks))) {
|
||||
struct bio_pair *bp;
|
||||
/* Sanity check -- queue functions should prevent this happening */
|
||||
if (bio_segments(bio) > 1)
|
||||
goto bad_map;
|
||||
/* This is a one page bio that upper layers
|
||||
* refuse to split for us, so we need to split it.
|
||||
*/
|
||||
bp = bio_split(bio,
|
||||
chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
|
||||
|
||||
/* Each of these 'make_request' calls will call 'wait_barrier'.
|
||||
* If the first succeeds but the second blocks due to the resync
|
||||
* thread raising the barrier, we will deadlock because the
|
||||
* IO to the underlying device will be queued in generic_make_request
|
||||
* and will never complete, so will never reduce nr_pending.
|
||||
* So increment nr_waiting here so no new raise_barriers will
|
||||
* succeed, and so the second wait_barrier cannot block.
|
||||
*/
|
||||
spin_lock_irq(&conf->resync_lock);
|
||||
conf->nr_waiting++;
|
||||
spin_unlock_irq(&conf->resync_lock);
|
||||
|
||||
make_request(mddev, &bp->bio1);
|
||||
make_request(mddev, &bp->bio2);
|
||||
|
||||
spin_lock_irq(&conf->resync_lock);
|
||||
conf->nr_waiting--;
|
||||
wake_up(&conf->wait_barrier);
|
||||
spin_unlock_irq(&conf->resync_lock);
|
||||
|
||||
bio_pair_release(bp);
|
||||
return;
|
||||
bad_map:
|
||||
printk("md/raid10:%s: make_request bug: can't convert block across chunks"
|
||||
" or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2,
|
||||
(unsigned long long)bio->bi_sector, bio_sectors(bio) / 2);
|
||||
|
||||
bio_io_error(bio);
|
||||
return;
|
||||
}
|
||||
|
||||
md_write_start(mddev, bio);
|
||||
|
||||
/*
|
||||
* Register the new request and wait if the reconstruction
|
||||
* thread has put up a bar for new requests.
|
||||
* Continue immediately if no resync is active currently.
|
||||
*/
|
||||
wait_barrier(conf);
|
||||
|
||||
sectors = bio_sectors(bio);
|
||||
while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
||||
bio->bi_sector < conf->reshape_progress &&
|
||||
bio->bi_sector + sectors > conf->reshape_progress) {
|
||||
bio->bi_iter.bi_sector < conf->reshape_progress &&
|
||||
bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
|
||||
/* IO spans the reshape position. Need to wait for
|
||||
* reshape to pass
|
||||
*/
|
||||
allow_barrier(conf);
|
||||
wait_event(conf->wait_barrier,
|
||||
conf->reshape_progress <= bio->bi_sector ||
|
||||
conf->reshape_progress >= bio->bi_sector + sectors);
|
||||
conf->reshape_progress <= bio->bi_iter.bi_sector ||
|
||||
conf->reshape_progress >= bio->bi_iter.bi_sector +
|
||||
sectors);
|
||||
wait_barrier(conf);
|
||||
}
|
||||
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
||||
bio_data_dir(bio) == WRITE &&
|
||||
(mddev->reshape_backwards
|
||||
? (bio->bi_sector < conf->reshape_safe &&
|
||||
bio->bi_sector + sectors > conf->reshape_progress)
|
||||
: (bio->bi_sector + sectors > conf->reshape_safe &&
|
||||
bio->bi_sector < conf->reshape_progress))) {
|
||||
? (bio->bi_iter.bi_sector < conf->reshape_safe &&
|
||||
bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
|
||||
: (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
|
||||
bio->bi_iter.bi_sector < conf->reshape_progress))) {
|
||||
/* Need to update reshape_position in metadata */
|
||||
mddev->reshape_position = conf->reshape_progress;
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
@@ -1273,7 +1210,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
|
||||
r10_bio->sectors = sectors;
|
||||
|
||||
r10_bio->mddev = mddev;
|
||||
r10_bio->sector = bio->bi_sector;
|
||||
r10_bio->sector = bio->bi_iter.bi_sector;
|
||||
r10_bio->state = 0;
|
||||
|
||||
/* We might need to issue multiple reads to different
|
||||
@@ -1302,13 +1239,13 @@ read_again:
|
||||
slot = r10_bio->read_slot;
|
||||
|
||||
read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
|
||||
bio_trim(read_bio, r10_bio->sector - bio->bi_sector,
|
||||
bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
|
||||
max_sectors);
|
||||
|
||||
r10_bio->devs[slot].bio = read_bio;
|
||||
r10_bio->devs[slot].rdev = rdev;
|
||||
|
||||
read_bio->bi_sector = r10_bio->devs[slot].addr +
|
||||
read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
|
||||
choose_data_offset(r10_bio, rdev);
|
||||
read_bio->bi_bdev = rdev->bdev;
|
||||
read_bio->bi_end_io = raid10_end_read_request;
|
||||
@@ -1320,7 +1257,7 @@ read_again:
|
||||
* need another r10_bio.
|
||||
*/
|
||||
sectors_handled = (r10_bio->sector + max_sectors
|
||||
- bio->bi_sector);
|
||||
- bio->bi_iter.bi_sector);
|
||||
r10_bio->sectors = max_sectors;
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
if (bio->bi_phys_segments == 0)
|
||||
@@ -1341,7 +1278,8 @@ read_again:
|
||||
r10_bio->sectors = bio_sectors(bio) - sectors_handled;
|
||||
r10_bio->state = 0;
|
||||
r10_bio->mddev = mddev;
|
||||
r10_bio->sector = bio->bi_sector + sectors_handled;
|
||||
r10_bio->sector = bio->bi_iter.bi_sector +
|
||||
sectors_handled;
|
||||
goto read_again;
|
||||
} else
|
||||
generic_make_request(read_bio);
|
||||
@@ -1499,7 +1437,8 @@ retry_write:
|
||||
bio->bi_phys_segments++;
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
}
|
||||
sectors_handled = r10_bio->sector + max_sectors - bio->bi_sector;
|
||||
sectors_handled = r10_bio->sector + max_sectors -
|
||||
bio->bi_iter.bi_sector;
|
||||
|
||||
atomic_set(&r10_bio->remaining, 1);
|
||||
bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
|
||||
@@ -1510,11 +1449,11 @@ retry_write:
|
||||
if (r10_bio->devs[i].bio) {
|
||||
struct md_rdev *rdev = conf->mirrors[d].rdev;
|
||||
mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
|
||||
bio_trim(mbio, r10_bio->sector - bio->bi_sector,
|
||||
bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector,
|
||||
max_sectors);
|
||||
r10_bio->devs[i].bio = mbio;
|
||||
|
||||
mbio->bi_sector = (r10_bio->devs[i].addr+
|
||||
mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
|
||||
choose_data_offset(r10_bio,
|
||||
rdev));
|
||||
mbio->bi_bdev = rdev->bdev;
|
||||
@@ -1553,11 +1492,11 @@ retry_write:
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
}
|
||||
mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
|
||||
bio_trim(mbio, r10_bio->sector - bio->bi_sector,
|
||||
bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector,
|
||||
max_sectors);
|
||||
r10_bio->devs[i].repl_bio = mbio;
|
||||
|
||||
mbio->bi_sector = (r10_bio->devs[i].addr +
|
||||
mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr +
|
||||
choose_data_offset(
|
||||
r10_bio, rdev));
|
||||
mbio->bi_bdev = rdev->bdev;
|
||||
@@ -1591,11 +1530,57 @@ retry_write:
|
||||
r10_bio->sectors = bio_sectors(bio) - sectors_handled;
|
||||
|
||||
r10_bio->mddev = mddev;
|
||||
r10_bio->sector = bio->bi_sector + sectors_handled;
|
||||
r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
|
||||
r10_bio->state = 0;
|
||||
goto retry_write;
|
||||
}
|
||||
one_write_done(r10_bio);
|
||||
}
|
||||
|
||||
static void make_request(struct mddev *mddev, struct bio *bio)
|
||||
{
|
||||
struct r10conf *conf = mddev->private;
|
||||
sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
|
||||
int chunk_sects = chunk_mask + 1;
|
||||
|
||||
struct bio *split;
|
||||
|
||||
if (unlikely(bio->bi_rw & REQ_FLUSH)) {
|
||||
md_flush_request(mddev, bio);
|
||||
return;
|
||||
}
|
||||
|
||||
md_write_start(mddev, bio);
|
||||
|
||||
/*
|
||||
* Register the new request and wait if the reconstruction
|
||||
* thread has put up a bar for new requests.
|
||||
* Continue immediately if no resync is active currently.
|
||||
*/
|
||||
wait_barrier(conf);
|
||||
|
||||
do {
|
||||
|
||||
/*
|
||||
* If this request crosses a chunk boundary, we need to split
|
||||
* it.
|
||||
*/
|
||||
if (unlikely((bio->bi_iter.bi_sector & chunk_mask) +
|
||||
bio_sectors(bio) > chunk_sects
|
||||
&& (conf->geo.near_copies < conf->geo.raid_disks
|
||||
|| conf->prev.near_copies <
|
||||
conf->prev.raid_disks))) {
|
||||
split = bio_split(bio, chunk_sects -
|
||||
(bio->bi_iter.bi_sector &
|
||||
(chunk_sects - 1)),
|
||||
GFP_NOIO, fs_bio_set);
|
||||
bio_chain(split, bio);
|
||||
} else {
|
||||
split = bio;
|
||||
}
|
||||
|
||||
__make_request(mddev, split);
|
||||
} while (split != bio);
|
||||
|
||||
/* In case raid10d snuck in to freeze_array */
|
||||
wake_up(&conf->wait_barrier);
|
||||
@@ -2124,10 +2109,10 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
bio_reset(tbio);
|
||||
|
||||
tbio->bi_vcnt = vcnt;
|
||||
tbio->bi_size = r10_bio->sectors << 9;
|
||||
tbio->bi_iter.bi_size = r10_bio->sectors << 9;
|
||||
tbio->bi_rw = WRITE;
|
||||
tbio->bi_private = r10_bio;
|
||||
tbio->bi_sector = r10_bio->devs[i].addr;
|
||||
tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
|
||||
|
||||
for (j=0; j < vcnt ; j++) {
|
||||
tbio->bi_io_vec[j].bv_offset = 0;
|
||||
@@ -2144,7 +2129,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio));
|
||||
|
||||
tbio->bi_sector += conf->mirrors[d].rdev->data_offset;
|
||||
tbio->bi_iter.bi_sector += conf->mirrors[d].rdev->data_offset;
|
||||
tbio->bi_bdev = conf->mirrors[d].rdev->bdev;
|
||||
generic_make_request(tbio);
|
||||
}
|
||||
@@ -2614,8 +2599,8 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
|
||||
sectors = sect_to_write;
|
||||
/* Write at 'sector' for 'sectors' */
|
||||
wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
|
||||
bio_trim(wbio, sector - bio->bi_sector, sectors);
|
||||
wbio->bi_sector = (r10_bio->devs[i].addr+
|
||||
bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors);
|
||||
wbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
|
||||
choose_data_offset(r10_bio, rdev) +
|
||||
(sector - r10_bio->sector));
|
||||
wbio->bi_bdev = rdev->bdev;
|
||||
@@ -2687,10 +2672,10 @@ read_more:
|
||||
(unsigned long long)r10_bio->sector);
|
||||
bio = bio_clone_mddev(r10_bio->master_bio,
|
||||
GFP_NOIO, mddev);
|
||||
bio_trim(bio, r10_bio->sector - bio->bi_sector, max_sectors);
|
||||
bio_trim(bio, r10_bio->sector - bio->bi_iter.bi_sector, max_sectors);
|
||||
r10_bio->devs[slot].bio = bio;
|
||||
r10_bio->devs[slot].rdev = rdev;
|
||||
bio->bi_sector = r10_bio->devs[slot].addr
|
||||
bio->bi_iter.bi_sector = r10_bio->devs[slot].addr
|
||||
+ choose_data_offset(r10_bio, rdev);
|
||||
bio->bi_bdev = rdev->bdev;
|
||||
bio->bi_rw = READ | do_sync;
|
||||
@@ -2701,7 +2686,7 @@ read_more:
|
||||
struct bio *mbio = r10_bio->master_bio;
|
||||
int sectors_handled =
|
||||
r10_bio->sector + max_sectors
|
||||
- mbio->bi_sector;
|
||||
- mbio->bi_iter.bi_sector;
|
||||
r10_bio->sectors = max_sectors;
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
if (mbio->bi_phys_segments == 0)
|
||||
@@ -2719,7 +2704,7 @@ read_more:
|
||||
set_bit(R10BIO_ReadError,
|
||||
&r10_bio->state);
|
||||
r10_bio->mddev = mddev;
|
||||
r10_bio->sector = mbio->bi_sector
|
||||
r10_bio->sector = mbio->bi_iter.bi_sector
|
||||
+ sectors_handled;
|
||||
|
||||
goto read_more;
|
||||
@@ -3157,7 +3142,8 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
bio->bi_end_io = end_sync_read;
|
||||
bio->bi_rw = READ;
|
||||
from_addr = r10_bio->devs[j].addr;
|
||||
bio->bi_sector = from_addr + rdev->data_offset;
|
||||
bio->bi_iter.bi_sector = from_addr +
|
||||
rdev->data_offset;
|
||||
bio->bi_bdev = rdev->bdev;
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
/* and we write to 'i' (if not in_sync) */
|
||||
@@ -3181,7 +3167,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
bio->bi_private = r10_bio;
|
||||
bio->bi_end_io = end_sync_write;
|
||||
bio->bi_rw = WRITE;
|
||||
bio->bi_sector = to_addr
|
||||
bio->bi_iter.bi_sector = to_addr
|
||||
+ rdev->data_offset;
|
||||
bio->bi_bdev = rdev->bdev;
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
@@ -3210,7 +3196,8 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
bio->bi_private = r10_bio;
|
||||
bio->bi_end_io = end_sync_write;
|
||||
bio->bi_rw = WRITE;
|
||||
bio->bi_sector = to_addr + rdev->data_offset;
|
||||
bio->bi_iter.bi_sector = to_addr +
|
||||
rdev->data_offset;
|
||||
bio->bi_bdev = rdev->bdev;
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
break;
|
||||
@@ -3328,7 +3315,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
bio->bi_private = r10_bio;
|
||||
bio->bi_end_io = end_sync_read;
|
||||
bio->bi_rw = READ;
|
||||
bio->bi_sector = sector +
|
||||
bio->bi_iter.bi_sector = sector +
|
||||
conf->mirrors[d].rdev->data_offset;
|
||||
bio->bi_bdev = conf->mirrors[d].rdev->bdev;
|
||||
count++;
|
||||
@@ -3350,7 +3337,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
bio->bi_private = r10_bio;
|
||||
bio->bi_end_io = end_sync_write;
|
||||
bio->bi_rw = WRITE;
|
||||
bio->bi_sector = sector +
|
||||
bio->bi_iter.bi_sector = sector +
|
||||
conf->mirrors[d].replacement->data_offset;
|
||||
bio->bi_bdev = conf->mirrors[d].replacement->bdev;
|
||||
count++;
|
||||
@@ -3397,7 +3384,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
bio2 = bio2->bi_next) {
|
||||
/* remove last page from this bio */
|
||||
bio2->bi_vcnt--;
|
||||
bio2->bi_size -= len;
|
||||
bio2->bi_iter.bi_size -= len;
|
||||
bio2->bi_flags &= ~(1<< BIO_SEG_VALID);
|
||||
}
|
||||
goto bio_full;
|
||||
@@ -4418,7 +4405,7 @@ read_more:
|
||||
read_bio = bio_alloc_mddev(GFP_KERNEL, RESYNC_PAGES, mddev);
|
||||
|
||||
read_bio->bi_bdev = rdev->bdev;
|
||||
read_bio->bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
|
||||
read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
|
||||
+ rdev->data_offset);
|
||||
read_bio->bi_private = r10_bio;
|
||||
read_bio->bi_end_io = end_sync_read;
|
||||
@@ -4426,7 +4413,7 @@ read_more:
|
||||
read_bio->bi_flags &= ~(BIO_POOL_MASK - 1);
|
||||
read_bio->bi_flags |= 1 << BIO_UPTODATE;
|
||||
read_bio->bi_vcnt = 0;
|
||||
read_bio->bi_size = 0;
|
||||
read_bio->bi_iter.bi_size = 0;
|
||||
r10_bio->master_bio = read_bio;
|
||||
r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum;
|
||||
|
||||
@@ -4452,7 +4439,8 @@ read_more:
|
||||
|
||||
bio_reset(b);
|
||||
b->bi_bdev = rdev2->bdev;
|
||||
b->bi_sector = r10_bio->devs[s/2].addr + rdev2->new_data_offset;
|
||||
b->bi_iter.bi_sector = r10_bio->devs[s/2].addr +
|
||||
rdev2->new_data_offset;
|
||||
b->bi_private = r10_bio;
|
||||
b->bi_end_io = end_reshape_write;
|
||||
b->bi_rw = WRITE;
|
||||
@@ -4479,7 +4467,7 @@ read_more:
|
||||
bio2 = bio2->bi_next) {
|
||||
/* Remove last page from this bio */
|
||||
bio2->bi_vcnt--;
|
||||
bio2->bi_size -= len;
|
||||
bio2->bi_iter.bi_size -= len;
|
||||
bio2->bi_flags &= ~(1<<BIO_SEG_VALID);
|
||||
}
|
||||
goto bio_full;
|
||||
|
Reference in New Issue
Block a user