block: Introduce new bio_split()

The new bio_split() can split arbitrary bios - it's not restricted to
single page bios, like the old bio_split() (previously renamed to
bio_pair_split()). It also has different semantics - it doesn't allocate
a struct bio_pair, leaving it up to the caller to handle completions.

Then convert the existing bio_pair_split() users to the new bio_split()
- and also nvme, which was open coding bio splitting.

(We have to take that BUG_ON() out of bio_integrity_trim() because this
bio_split() needs to use it, and there's no reason it has to be used on
bios marked as cloned; BIO_CLONED doesn't seem to have clearly
documented semantics anyways.)

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Matthew Wilcox <matthew.r.wilcox@intel.com>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Neil Brown <neilb@suse.de>
This commit is contained in:
Kent Overstreet
2013-11-23 18:21:01 -08:00
parent ee67891bf1
commit 20d0189b10
10 changed files with 266 additions and 403 deletions

View File

@@ -1152,14 +1152,12 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
kfree(plug);
}
static void make_request(struct mddev *mddev, struct bio * bio)
static void __make_request(struct mddev *mddev, struct bio *bio)
{
struct r10conf *conf = mddev->private;
struct r10bio *r10_bio;
struct bio *read_bio;
int i;
sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
int chunk_sects = chunk_mask + 1;
const int rw = bio_data_dir(bio);
const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
@@ -1174,69 +1172,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)
int max_sectors;
int sectors;
if (unlikely(bio->bi_rw & REQ_FLUSH)) {
md_flush_request(mddev, bio);
return;
}
/* If this request crosses a chunk boundary, we need to
* split it. This will only happen for 1 PAGE (or less) requests.
*/
if (unlikely((bio->bi_iter.bi_sector & chunk_mask) + bio_sectors(bio)
> chunk_sects
&& (conf->geo.near_copies < conf->geo.raid_disks
|| conf->prev.near_copies < conf->prev.raid_disks))) {
struct bio_pair *bp;
/* Sanity check -- queue functions should prevent this happening */
if (bio_multiple_segments(bio))
goto bad_map;
/* This is a one page bio that upper layers
* refuse to split for us, so we need to split it.
*/
bp = bio_pair_split(bio, chunk_sects -
(bio->bi_iter.bi_sector & (chunk_sects - 1)));
/* Each of these 'make_request' calls will call 'wait_barrier'.
* If the first succeeds but the second blocks due to the resync
* thread raising the barrier, we will deadlock because the
* IO to the underlying device will be queued in generic_make_request
* and will never complete, so will never reduce nr_pending.
* So increment nr_waiting here so no new raise_barriers will
* succeed, and so the second wait_barrier cannot block.
*/
spin_lock_irq(&conf->resync_lock);
conf->nr_waiting++;
spin_unlock_irq(&conf->resync_lock);
make_request(mddev, &bp->bio1);
make_request(mddev, &bp->bio2);
spin_lock_irq(&conf->resync_lock);
conf->nr_waiting--;
wake_up(&conf->wait_barrier);
spin_unlock_irq(&conf->resync_lock);
bio_pair_release(bp);
return;
bad_map:
printk("md/raid10:%s: make_request bug: can't convert block across chunks"
" or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2,
(unsigned long long)bio->bi_iter.bi_sector,
bio_sectors(bio) / 2);
bio_io_error(bio);
return;
}
md_write_start(mddev, bio);
/*
* Register the new request and wait if the reconstruction
* thread has put up a bar for new requests.
* Continue immediately if no resync is active currently.
*/
wait_barrier(conf);
sectors = bio_sectors(bio);
while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
bio->bi_iter.bi_sector < conf->reshape_progress &&
@@ -1600,6 +1535,52 @@ retry_write:
goto retry_write;
}
one_write_done(r10_bio);
}
static void make_request(struct mddev *mddev, struct bio *bio)
{
struct r10conf *conf = mddev->private;
sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
int chunk_sects = chunk_mask + 1;
struct bio *split;
if (unlikely(bio->bi_rw & REQ_FLUSH)) {
md_flush_request(mddev, bio);
return;
}
md_write_start(mddev, bio);
/*
* Register the new request and wait if the reconstruction
* thread has put up a bar for new requests.
* Continue immediately if no resync is active currently.
*/
wait_barrier(conf);
do {
/*
* If this request crosses a chunk boundary, we need to split
* it.
*/
if (unlikely((bio->bi_iter.bi_sector & chunk_mask) +
bio_sectors(bio) > chunk_sects
&& (conf->geo.near_copies < conf->geo.raid_disks
|| conf->prev.near_copies <
conf->prev.raid_disks))) {
split = bio_split(bio, chunk_sects -
(bio->bi_iter.bi_sector &
(chunk_sects - 1)),
GFP_NOIO, fs_bio_set);
bio_chain(split, bio);
} else {
split = bio;
}
__make_request(mddev, split);
} while (split != bio);
/* In case raid10d snuck in to freeze_array */
wake_up(&conf->wait_barrier);