Merge branch 'for-3.14/core' of git://git.kernel.dk/linux-block

Pull core block IO changes from Jens Axboe:
 "The major piece in here is the immutable bio_ve series from Kent, the
  rest is fairly minor.  It was supposed to go in last round, but
  various issues pushed it to this release instead.  The pull request
  contains:

   - Various smaller blk-mq fixes from different folks.  Nothing major
     here, just minor fixes and cleanups.

   - Fix for a memory leak in the error path in the block ioctl code
     from Christian Engelmayer.

   - Header export fix from CaiZhiyong.

   - Finally the immutable biovec changes from Kent Overstreet.  This
     enables some nice future work on making arbitrarily sized bios
     possible, and splitting more efficient.  Related fixes to immutable
     bio_vecs:

        - dm-cache immutable fixup from Mike Snitzer.
        - btrfs immutable fixup from Muthu Kumar.

  - bio-integrity fix from Nic Bellinger, which is also going to stable"

* 'for-3.14/core' of git://git.kernel.dk/linux-block: (44 commits)
  xtensa: fixup simdisk driver to work with immutable bio_vecs
  block/blk-mq-cpu.c: use hotcpu_notifier()
  blk-mq: for_each_* macro correctness
  block: Fix memory leak in rw_copy_check_uvector() handling
  bio-integrity: Fix bio_integrity_verify segment start bug
  block: remove unrelated header files and export symbol
  blk-mq: uses page->list incorrectly
  blk-mq: use __smp_call_function_single directly
  btrfs: fix missing increment of bi_remaining
  Revert "block: Warn and free bio if bi_end_io is not set"
  block: Warn and free bio if bi_end_io is not set
  blk-mq: fix initializing request's start time
  block: blk-mq: don't export blk_mq_free_queue()
  block: blk-mq: make blk_sync_queue support mq
  block: blk-mq: support draining mq queue
  dm cache: increment bi_remaining when bi_end_io is restored
  block: fixup for generic bio chaining
  block: Really silence spurious compiler warnings
  block: Silence spurious compiler warnings
  block: Kill bio_pair_split()
  ...
This commit is contained in:
Linus Torvalds
2014-01-30 11:19:05 -08:00
139 changed files with 2137 additions and 2676 deletions

View File

@@ -1152,14 +1152,12 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
kfree(plug);
}
static void make_request(struct mddev *mddev, struct bio * bio)
static void __make_request(struct mddev *mddev, struct bio *bio)
{
struct r10conf *conf = mddev->private;
struct r10bio *r10_bio;
struct bio *read_bio;
int i;
sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
int chunk_sects = chunk_mask + 1;
const int rw = bio_data_dir(bio);
const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
@@ -1174,88 +1172,27 @@ static void make_request(struct mddev *mddev, struct bio * bio)
int max_sectors;
int sectors;
if (unlikely(bio->bi_rw & REQ_FLUSH)) {
md_flush_request(mddev, bio);
return;
}
/* If this request crosses a chunk boundary, we need to
* split it. This will only happen for 1 PAGE (or less) requests.
*/
if (unlikely((bio->bi_sector & chunk_mask) + bio_sectors(bio)
> chunk_sects
&& (conf->geo.near_copies < conf->geo.raid_disks
|| conf->prev.near_copies < conf->prev.raid_disks))) {
struct bio_pair *bp;
/* Sanity check -- queue functions should prevent this happening */
if (bio_segments(bio) > 1)
goto bad_map;
/* This is a one page bio that upper layers
* refuse to split for us, so we need to split it.
*/
bp = bio_split(bio,
chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
/* Each of these 'make_request' calls will call 'wait_barrier'.
* If the first succeeds but the second blocks due to the resync
* thread raising the barrier, we will deadlock because the
* IO to the underlying device will be queued in generic_make_request
* and will never complete, so will never reduce nr_pending.
* So increment nr_waiting here so no new raise_barriers will
* succeed, and so the second wait_barrier cannot block.
*/
spin_lock_irq(&conf->resync_lock);
conf->nr_waiting++;
spin_unlock_irq(&conf->resync_lock);
make_request(mddev, &bp->bio1);
make_request(mddev, &bp->bio2);
spin_lock_irq(&conf->resync_lock);
conf->nr_waiting--;
wake_up(&conf->wait_barrier);
spin_unlock_irq(&conf->resync_lock);
bio_pair_release(bp);
return;
bad_map:
printk("md/raid10:%s: make_request bug: can't convert block across chunks"
" or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2,
(unsigned long long)bio->bi_sector, bio_sectors(bio) / 2);
bio_io_error(bio);
return;
}
md_write_start(mddev, bio);
/*
* Register the new request and wait if the reconstruction
* thread has put up a bar for new requests.
* Continue immediately if no resync is active currently.
*/
wait_barrier(conf);
sectors = bio_sectors(bio);
while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
bio->bi_sector < conf->reshape_progress &&
bio->bi_sector + sectors > conf->reshape_progress) {
bio->bi_iter.bi_sector < conf->reshape_progress &&
bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
/* IO spans the reshape position. Need to wait for
* reshape to pass
*/
allow_barrier(conf);
wait_event(conf->wait_barrier,
conf->reshape_progress <= bio->bi_sector ||
conf->reshape_progress >= bio->bi_sector + sectors);
conf->reshape_progress <= bio->bi_iter.bi_sector ||
conf->reshape_progress >= bio->bi_iter.bi_sector +
sectors);
wait_barrier(conf);
}
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
bio_data_dir(bio) == WRITE &&
(mddev->reshape_backwards
? (bio->bi_sector < conf->reshape_safe &&
bio->bi_sector + sectors > conf->reshape_progress)
: (bio->bi_sector + sectors > conf->reshape_safe &&
bio->bi_sector < conf->reshape_progress))) {
? (bio->bi_iter.bi_sector < conf->reshape_safe &&
bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
: (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
bio->bi_iter.bi_sector < conf->reshape_progress))) {
/* Need to update reshape_position in metadata */
mddev->reshape_position = conf->reshape_progress;
set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -1273,7 +1210,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
r10_bio->sectors = sectors;
r10_bio->mddev = mddev;
r10_bio->sector = bio->bi_sector;
r10_bio->sector = bio->bi_iter.bi_sector;
r10_bio->state = 0;
/* We might need to issue multiple reads to different
@@ -1302,13 +1239,13 @@ read_again:
slot = r10_bio->read_slot;
read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
bio_trim(read_bio, r10_bio->sector - bio->bi_sector,
bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
max_sectors);
r10_bio->devs[slot].bio = read_bio;
r10_bio->devs[slot].rdev = rdev;
read_bio->bi_sector = r10_bio->devs[slot].addr +
read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
choose_data_offset(r10_bio, rdev);
read_bio->bi_bdev = rdev->bdev;
read_bio->bi_end_io = raid10_end_read_request;
@@ -1320,7 +1257,7 @@ read_again:
* need another r10_bio.
*/
sectors_handled = (r10_bio->sector + max_sectors
- bio->bi_sector);
- bio->bi_iter.bi_sector);
r10_bio->sectors = max_sectors;
spin_lock_irq(&conf->device_lock);
if (bio->bi_phys_segments == 0)
@@ -1341,7 +1278,8 @@ read_again:
r10_bio->sectors = bio_sectors(bio) - sectors_handled;
r10_bio->state = 0;
r10_bio->mddev = mddev;
r10_bio->sector = bio->bi_sector + sectors_handled;
r10_bio->sector = bio->bi_iter.bi_sector +
sectors_handled;
goto read_again;
} else
generic_make_request(read_bio);
@@ -1499,7 +1437,8 @@ retry_write:
bio->bi_phys_segments++;
spin_unlock_irq(&conf->device_lock);
}
sectors_handled = r10_bio->sector + max_sectors - bio->bi_sector;
sectors_handled = r10_bio->sector + max_sectors -
bio->bi_iter.bi_sector;
atomic_set(&r10_bio->remaining, 1);
bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
@@ -1510,11 +1449,11 @@ retry_write:
if (r10_bio->devs[i].bio) {
struct md_rdev *rdev = conf->mirrors[d].rdev;
mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
bio_trim(mbio, r10_bio->sector - bio->bi_sector,
bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector,
max_sectors);
r10_bio->devs[i].bio = mbio;
mbio->bi_sector = (r10_bio->devs[i].addr+
mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
choose_data_offset(r10_bio,
rdev));
mbio->bi_bdev = rdev->bdev;
@@ -1553,11 +1492,11 @@ retry_write:
rdev = conf->mirrors[d].rdev;
}
mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
bio_trim(mbio, r10_bio->sector - bio->bi_sector,
bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector,
max_sectors);
r10_bio->devs[i].repl_bio = mbio;
mbio->bi_sector = (r10_bio->devs[i].addr +
mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr +
choose_data_offset(
r10_bio, rdev));
mbio->bi_bdev = rdev->bdev;
@@ -1591,11 +1530,57 @@ retry_write:
r10_bio->sectors = bio_sectors(bio) - sectors_handled;
r10_bio->mddev = mddev;
r10_bio->sector = bio->bi_sector + sectors_handled;
r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
r10_bio->state = 0;
goto retry_write;
}
one_write_done(r10_bio);
}
static void make_request(struct mddev *mddev, struct bio *bio)
{
struct r10conf *conf = mddev->private;
sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
int chunk_sects = chunk_mask + 1;
struct bio *split;
if (unlikely(bio->bi_rw & REQ_FLUSH)) {
md_flush_request(mddev, bio);
return;
}
md_write_start(mddev, bio);
/*
* Register the new request and wait if the reconstruction
* thread has put up a bar for new requests.
* Continue immediately if no resync is active currently.
*/
wait_barrier(conf);
do {
/*
* If this request crosses a chunk boundary, we need to split
* it.
*/
if (unlikely((bio->bi_iter.bi_sector & chunk_mask) +
bio_sectors(bio) > chunk_sects
&& (conf->geo.near_copies < conf->geo.raid_disks
|| conf->prev.near_copies <
conf->prev.raid_disks))) {
split = bio_split(bio, chunk_sects -
(bio->bi_iter.bi_sector &
(chunk_sects - 1)),
GFP_NOIO, fs_bio_set);
bio_chain(split, bio);
} else {
split = bio;
}
__make_request(mddev, split);
} while (split != bio);
/* In case raid10d snuck in to freeze_array */
wake_up(&conf->wait_barrier);
@@ -2124,10 +2109,10 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
bio_reset(tbio);
tbio->bi_vcnt = vcnt;
tbio->bi_size = r10_bio->sectors << 9;
tbio->bi_iter.bi_size = r10_bio->sectors << 9;
tbio->bi_rw = WRITE;
tbio->bi_private = r10_bio;
tbio->bi_sector = r10_bio->devs[i].addr;
tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
for (j=0; j < vcnt ; j++) {
tbio->bi_io_vec[j].bv_offset = 0;
@@ -2144,7 +2129,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
atomic_inc(&r10_bio->remaining);
md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio));
tbio->bi_sector += conf->mirrors[d].rdev->data_offset;
tbio->bi_iter.bi_sector += conf->mirrors[d].rdev->data_offset;
tbio->bi_bdev = conf->mirrors[d].rdev->bdev;
generic_make_request(tbio);
}
@@ -2614,8 +2599,8 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
sectors = sect_to_write;
/* Write at 'sector' for 'sectors' */
wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
bio_trim(wbio, sector - bio->bi_sector, sectors);
wbio->bi_sector = (r10_bio->devs[i].addr+
bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors);
wbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
choose_data_offset(r10_bio, rdev) +
(sector - r10_bio->sector));
wbio->bi_bdev = rdev->bdev;
@@ -2687,10 +2672,10 @@ read_more:
(unsigned long long)r10_bio->sector);
bio = bio_clone_mddev(r10_bio->master_bio,
GFP_NOIO, mddev);
bio_trim(bio, r10_bio->sector - bio->bi_sector, max_sectors);
bio_trim(bio, r10_bio->sector - bio->bi_iter.bi_sector, max_sectors);
r10_bio->devs[slot].bio = bio;
r10_bio->devs[slot].rdev = rdev;
bio->bi_sector = r10_bio->devs[slot].addr
bio->bi_iter.bi_sector = r10_bio->devs[slot].addr
+ choose_data_offset(r10_bio, rdev);
bio->bi_bdev = rdev->bdev;
bio->bi_rw = READ | do_sync;
@@ -2701,7 +2686,7 @@ read_more:
struct bio *mbio = r10_bio->master_bio;
int sectors_handled =
r10_bio->sector + max_sectors
- mbio->bi_sector;
- mbio->bi_iter.bi_sector;
r10_bio->sectors = max_sectors;
spin_lock_irq(&conf->device_lock);
if (mbio->bi_phys_segments == 0)
@@ -2719,7 +2704,7 @@ read_more:
set_bit(R10BIO_ReadError,
&r10_bio->state);
r10_bio->mddev = mddev;
r10_bio->sector = mbio->bi_sector
r10_bio->sector = mbio->bi_iter.bi_sector
+ sectors_handled;
goto read_more;
@@ -3157,7 +3142,8 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
bio->bi_end_io = end_sync_read;
bio->bi_rw = READ;
from_addr = r10_bio->devs[j].addr;
bio->bi_sector = from_addr + rdev->data_offset;
bio->bi_iter.bi_sector = from_addr +
rdev->data_offset;
bio->bi_bdev = rdev->bdev;
atomic_inc(&rdev->nr_pending);
/* and we write to 'i' (if not in_sync) */
@@ -3181,7 +3167,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
bio->bi_private = r10_bio;
bio->bi_end_io = end_sync_write;
bio->bi_rw = WRITE;
bio->bi_sector = to_addr
bio->bi_iter.bi_sector = to_addr
+ rdev->data_offset;
bio->bi_bdev = rdev->bdev;
atomic_inc(&r10_bio->remaining);
@@ -3210,7 +3196,8 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
bio->bi_private = r10_bio;
bio->bi_end_io = end_sync_write;
bio->bi_rw = WRITE;
bio->bi_sector = to_addr + rdev->data_offset;
bio->bi_iter.bi_sector = to_addr +
rdev->data_offset;
bio->bi_bdev = rdev->bdev;
atomic_inc(&r10_bio->remaining);
break;
@@ -3328,7 +3315,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
bio->bi_private = r10_bio;
bio->bi_end_io = end_sync_read;
bio->bi_rw = READ;
bio->bi_sector = sector +
bio->bi_iter.bi_sector = sector +
conf->mirrors[d].rdev->data_offset;
bio->bi_bdev = conf->mirrors[d].rdev->bdev;
count++;
@@ -3350,7 +3337,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
bio->bi_private = r10_bio;
bio->bi_end_io = end_sync_write;
bio->bi_rw = WRITE;
bio->bi_sector = sector +
bio->bi_iter.bi_sector = sector +
conf->mirrors[d].replacement->data_offset;
bio->bi_bdev = conf->mirrors[d].replacement->bdev;
count++;
@@ -3397,7 +3384,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
bio2 = bio2->bi_next) {
/* remove last page from this bio */
bio2->bi_vcnt--;
bio2->bi_size -= len;
bio2->bi_iter.bi_size -= len;
bio2->bi_flags &= ~(1<< BIO_SEG_VALID);
}
goto bio_full;
@@ -4418,7 +4405,7 @@ read_more:
read_bio = bio_alloc_mddev(GFP_KERNEL, RESYNC_PAGES, mddev);
read_bio->bi_bdev = rdev->bdev;
read_bio->bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
+ rdev->data_offset);
read_bio->bi_private = r10_bio;
read_bio->bi_end_io = end_sync_read;
@@ -4426,7 +4413,7 @@ read_more:
read_bio->bi_flags &= ~(BIO_POOL_MASK - 1);
read_bio->bi_flags |= 1 << BIO_UPTODATE;
read_bio->bi_vcnt = 0;
read_bio->bi_size = 0;
read_bio->bi_iter.bi_size = 0;
r10_bio->master_bio = read_bio;
r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum;
@@ -4452,7 +4439,8 @@ read_more:
bio_reset(b);
b->bi_bdev = rdev2->bdev;
b->bi_sector = r10_bio->devs[s/2].addr + rdev2->new_data_offset;
b->bi_iter.bi_sector = r10_bio->devs[s/2].addr +
rdev2->new_data_offset;
b->bi_private = r10_bio;
b->bi_end_io = end_reshape_write;
b->bi_rw = WRITE;
@@ -4479,7 +4467,7 @@ read_more:
bio2 = bio2->bi_next) {
/* Remove last page from this bio */
bio2->bi_vcnt--;
bio2->bi_size -= len;
bio2->bi_iter.bi_size -= len;
bio2->bi_flags &= ~(1<<BIO_SEG_VALID);
}
goto bio_full;