block: improve handling of the magic discard payload

Instead of allocating a single unused biovec for discard requests, send
them down without any payload.  Instead we allow the driver to add a
"special" payload using a biovec embedded into struct request (unioned
over other fields never used while in the driver), and overloading
the number of segments for this case.

This has a couple of advantages:

 - we don't have to allocate the bio_vec
 - the amount of special casing for discard requests in the block
   layer is significantly reduced
 - using this same scheme for other request types is trivial,
   which will be important for implementing the new WRITE_ZEROES
   op on devices where it actually requires a payload (e.g. SCSI)
 - we can get rid of playing games with the request length, as
   we'll never touch it and completions will work just fine
 - it will allow us to support ranged discard operations in the
   future by merging non-contiguous discard bios into a single
   request
 - last but not least it removes a lot of code

This patch is the common base for my WIP series for ranges discards and to
remove discard_zeroes_data in favor of always using REQ_OP_WRITE_ZEROES,
so it would be good to get it in quickly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
Christoph Hellwig
2016-12-08 15:20:32 -07:00
committed by Jens Axboe
父節點 be07e14f96
當前提交 f9d03f96b9
共有 13 個文件被更改,包括 76 次插入138 次删除

查看文件

@@ -1007,8 +1007,8 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb)
/*
* If sg table allocation fails, requeue request later.
*/
if (unlikely(sg_alloc_table_chained(&sdb->table, req->nr_phys_segments,
sdb->table.sgl)))
if (unlikely(sg_alloc_table_chained(&sdb->table,
blk_rq_nr_phys_segments(req), sdb->table.sgl)))
return BLKPREP_DEFER;
/*
@@ -1040,7 +1040,7 @@ int scsi_init_io(struct scsi_cmnd *cmd)
bool is_mq = (rq->mq_ctx != NULL);
int error;
BUG_ON(!rq->nr_phys_segments);
BUG_ON(!blk_rq_nr_phys_segments(rq));
error = scsi_init_sgtable(rq, &cmd->sdb);
if (error)

查看文件

@@ -716,7 +716,6 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
sector_t sector = blk_rq_pos(rq);
unsigned int nr_sectors = blk_rq_sectors(rq);
unsigned int nr_bytes = blk_rq_bytes(rq);
unsigned int len;
int ret;
char *buf;
@@ -772,24 +771,19 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
goto out;
}
rq->completion_data = page;
rq->timeout = SD_TIMEOUT;
cmd->transfersize = len;
cmd->allowed = SD_MAX_RETRIES;
/*
* Initially __data_len is set to the amount of data that needs to be
* transferred to the target. This amount depends on whether WRITE SAME
* or UNMAP is being used. After the scatterlist has been mapped by
* scsi_init_io() we set __data_len to the size of the area to be
* discarded on disk. This allows us to report completion on the full
* amount of blocks described by the request.
*/
blk_add_request_payload(rq, page, 0, len);
ret = scsi_init_io(cmd);
rq->__data_len = nr_bytes;
rq->special_vec.bv_page = page;
rq->special_vec.bv_offset = 0;
rq->special_vec.bv_len = len;
rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
rq->resid_len = len;
ret = scsi_init_io(cmd);
out:
if (ret != BLKPREP_OK)
__free_page(page);
@@ -1182,8 +1176,8 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
{
struct request *rq = SCpnt->request;
if (req_op(rq) == REQ_OP_DISCARD)
__free_page(rq->completion_data);
if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
__free_page(rq->special_vec.bv_page);
if (SCpnt->cmnd != rq->cmd) {
mempool_free(SCpnt->cmnd, sd_cdb_pool);