block: improve handling of the magic discard payload

Instead of allocating a single unused biovec for discard requests, send
them down without any payload.  Instead we allow the driver to add a
"special" payload using a biovec embedded into struct request (unioned
over other fields never used while in the driver), and overloading
the number of segments for this case.

This has a couple of advantages:

 - we don't have to allocate the bio_vec
 - the amount of special casing for discard requests in the block
   layer is significantly reduced
 - using this same scheme for other request types is trivial,
   which will be important for implementing the new WRITE_ZEROES
   op on devices where it actually requires a payload (e.g. SCSI)
 - we can get rid of playing games with the request length, as
   we'll never touch it and completions will work just fine
 - it will allow us to support ranged discard operations in the
   future by merging non-contiguous discard bios into a single
   request
 - last but not least it removes a lot of code

This patch is the common base for my WIP series for ranges discards and to
remove discard_zeroes_data in favor of always using REQ_OP_WRITE_ZEROES,
so it would be good to get it in quickly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
Christoph Hellwig
2016-12-08 15:20:32 -07:00
committed by Jens Axboe
parent be07e14f96
commit f9d03f96b9
13 changed files with 76 additions and 138 deletions

View File

@@ -241,18 +241,13 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
if (!bio)
return 0;
/*
* This should probably be returning 0, but blk_add_request_payload()
* (Christoph!!!!)
*/
switch (bio_op(bio)) {
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
case REQ_OP_WRITE_SAME:
case REQ_OP_WRITE_ZEROES:
return 0;
case REQ_OP_WRITE_SAME:
return 1;
default:
break;
}
fbio = bio;
@@ -410,39 +405,21 @@ new_segment:
*bvprv = *bvec;
}
static inline int __blk_bvec_map_sg(struct request_queue *q, struct bio_vec bv,
struct scatterlist *sglist, struct scatterlist **sg)
{
*sg = sglist;
sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset);
return 1;
}
static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
struct scatterlist *sglist,
struct scatterlist **sg)
{
struct bio_vec bvec, bvprv = { NULL };
struct bvec_iter iter;
int nsegs, cluster;
nsegs = 0;
cluster = blk_queue_cluster(q);
switch (bio_op(bio)) {
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
case REQ_OP_WRITE_ZEROES:
/*
* This is a hack - drivers should be neither modifying the
* biovec, nor relying on bi_vcnt - but because of
* blk_add_request_payload(), a discard bio may or may not have
* a payload we need to set up here (thank you Christoph) and
* bi_vcnt is really the only way of telling if we need to.
*/
if (!bio->bi_vcnt)
return 0;
/* Fall through */
case REQ_OP_WRITE_SAME:
*sg = sglist;
bvec = bio_iovec(bio);
sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
return 1;
default:
break;
}
int cluster = blk_queue_cluster(q), nsegs = 0;
for_each_bio(bio)
bio_for_each_segment(bvec, bio, iter)
@@ -462,7 +439,11 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
struct scatterlist *sg = NULL;
int nsegs = 0;
if (rq->bio)
if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
nsegs = __blk_bvec_map_sg(q, rq->special_vec, sglist, &sg);
else if (rq->bio && bio_op(rq->bio) == REQ_OP_WRITE_SAME)
nsegs = __blk_bvec_map_sg(q, bio_iovec(rq->bio), sglist, &sg);
else if (rq->bio)
nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg);
if (unlikely(rq->rq_flags & RQF_COPY_USER) &&
@@ -495,7 +476,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
* Something must have been wrong if the figured number of
* segment is bigger than number of req's physical segments
*/
WARN_ON(nsegs > rq->nr_phys_segments);
WARN_ON(nsegs > blk_rq_nr_phys_segments(rq));
return nsegs;
}