Merge branch 'for-4.12/block' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe: - Add BFQ IO scheduler under the new blk-mq scheduling framework. BFQ was initially a fork of CFQ, but subsequently changed to implement fairness based on B-WF2Q+, a modified variant of WF2Q. BFQ is meant to be used on desktop type single drives, providing good fairness. From Paolo. - Add Kyber IO scheduler. This is a full multiqueue aware scheduler, using a scalable token based algorithm that throttles IO based on live completion IO stats, similary to blk-wbt. From Omar. - A series from Jan, moving users to separately allocated backing devices. This continues the work of separating backing device life times, solving various problems with hot removal. - A series of updates for lightnvm, mostly from Javier. Includes a 'pblk' target that exposes an open channel SSD as a physical block device. - A series of fixes and improvements for nbd from Josef. - A series from Omar, removing queue sharing between devices on mostly legacy drivers. This helps us clean up other bits, if we know that a queue only has a single device backing. This has been overdue for more than a decade. - Fixes for the blk-stats, and improvements to unify the stats and user windows. This both improves blk-wbt, and enables other users to register a need to receive IO stats for a device. From Omar. - blk-throttle improvements from Shaohua. This provides a scalable framework for implementing scalable priotization - particularly for blk-mq, but applicable to any type of block device. The interface is marked experimental for now. - Bucketized IO stats for IO polling from Stephen Bates. This improves efficiency of polled workloads in the presence of mixed block size IO. - A few fixes for opal, from Scott. - A few pulls for NVMe, including a lot of fixes for NVMe-over-fabrics. From a variety of folks, mostly Sagi and James Smart. - A series from Bart, improving our exposed info and capabilities from the blk-mq debugfs support. - A series from Christoph, cleaning up how handle WRITE_ZEROES. - A series from Christoph, cleaning up the block layer handling of how we track errors in a request. On top of being a nice cleanup, it also shrinks the size of struct request a bit. - Removal of mg_disk and hd (sorry Linus) by Christoph. The former was never used by platforms, and the latter has outlived it's usefulness. - Various little bug fixes and cleanups from a wide variety of folks. * 'for-4.12/block' of git://git.kernel.dk/linux-block: (329 commits) block: hide badblocks attribute by default blk-mq: unify hctx delay_work and run_work block: add kblock_mod_delayed_work_on() blk-mq: unify hctx delayed_run_work and run_work nbd: fix use after free on module unload MAINTAINERS: bfq: Add Paolo as maintainer for the BFQ I/O scheduler blk-mq-sched: alloate reserved tags out of normal pool mtip32xx: use runtime tag to initialize command header scsi: Implement blk_mq_ops.show_rq() blk-mq: Add blk_mq_ops.show_rq() blk-mq: Show operation, cmd_flags and rq_flags names blk-mq: Make blk_flags_show() callers append a newline character blk-mq: Move the "state" debugfs attribute one level down blk-mq: Unregister debugfs attributes earlier blk-mq: Only unregister hctxs for which registration succeeded blk-mq-debugfs: Rename functions for registering and unregistering the mq directory blk-mq: Let blk_mq_debugfs_register() look up the queue name blk-mq: Register <dev>/queue/mq after having registered <dev>/queue ide-pm: always pass 0 error to ide_complete_rq in ide_do_devset ide-pm: always pass 0 error to __blk_end_request_all ..
This commit is contained in:
@@ -418,6 +418,46 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr,
|
||||
}
|
||||
static DEVICE_ATTR_RW(provisioning_mode);
|
||||
|
||||
static const char *zeroing_mode[] = {
|
||||
[SD_ZERO_WRITE] = "write",
|
||||
[SD_ZERO_WS] = "writesame",
|
||||
[SD_ZERO_WS16_UNMAP] = "writesame_16_unmap",
|
||||
[SD_ZERO_WS10_UNMAP] = "writesame_10_unmap",
|
||||
};
|
||||
|
||||
static ssize_t
|
||||
zeroing_mode_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct scsi_disk *sdkp = to_scsi_disk(dev);
|
||||
|
||||
return snprintf(buf, 20, "%s\n", zeroing_mode[sdkp->zeroing_mode]);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
zeroing_mode_store(struct device *dev, struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct scsi_disk *sdkp = to_scsi_disk(dev);
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
|
||||
if (!strncmp(buf, zeroing_mode[SD_ZERO_WRITE], 20))
|
||||
sdkp->zeroing_mode = SD_ZERO_WRITE;
|
||||
else if (!strncmp(buf, zeroing_mode[SD_ZERO_WS], 20))
|
||||
sdkp->zeroing_mode = SD_ZERO_WS;
|
||||
else if (!strncmp(buf, zeroing_mode[SD_ZERO_WS16_UNMAP], 20))
|
||||
sdkp->zeroing_mode = SD_ZERO_WS16_UNMAP;
|
||||
else if (!strncmp(buf, zeroing_mode[SD_ZERO_WS10_UNMAP], 20))
|
||||
sdkp->zeroing_mode = SD_ZERO_WS10_UNMAP;
|
||||
else
|
||||
return -EINVAL;
|
||||
|
||||
return count;
|
||||
}
|
||||
static DEVICE_ATTR_RW(zeroing_mode);
|
||||
|
||||
static ssize_t
|
||||
max_medium_access_timeouts_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
@@ -496,6 +536,7 @@ static struct attribute *sd_disk_attrs[] = {
|
||||
&dev_attr_app_tag_own.attr,
|
||||
&dev_attr_thin_provisioning.attr,
|
||||
&dev_attr_provisioning_mode.attr,
|
||||
&dev_attr_zeroing_mode.attr,
|
||||
&dev_attr_max_write_same_blocks.attr,
|
||||
&dev_attr_max_medium_access_timeouts.attr,
|
||||
NULL,
|
||||
@@ -644,26 +685,11 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
|
||||
unsigned int logical_block_size = sdkp->device->sector_size;
|
||||
unsigned int max_blocks = 0;
|
||||
|
||||
q->limits.discard_zeroes_data = 0;
|
||||
|
||||
/*
|
||||
* When LBPRZ is reported, discard alignment and granularity
|
||||
* must be fixed to the logical block size. Otherwise the block
|
||||
* layer will drop misaligned portions of the request which can
|
||||
* lead to data corruption. If LBPRZ is not set, we honor the
|
||||
* device preference.
|
||||
*/
|
||||
if (sdkp->lbprz) {
|
||||
q->limits.discard_alignment = 0;
|
||||
q->limits.discard_granularity = logical_block_size;
|
||||
} else {
|
||||
q->limits.discard_alignment = sdkp->unmap_alignment *
|
||||
logical_block_size;
|
||||
q->limits.discard_granularity =
|
||||
max(sdkp->physical_block_size,
|
||||
sdkp->unmap_granularity * logical_block_size);
|
||||
}
|
||||
|
||||
q->limits.discard_alignment =
|
||||
sdkp->unmap_alignment * logical_block_size;
|
||||
q->limits.discard_granularity =
|
||||
max(sdkp->physical_block_size,
|
||||
sdkp->unmap_granularity * logical_block_size);
|
||||
sdkp->provisioning_mode = mode;
|
||||
|
||||
switch (mode) {
|
||||
@@ -681,19 +707,16 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
|
||||
case SD_LBP_WS16:
|
||||
max_blocks = min_not_zero(sdkp->max_ws_blocks,
|
||||
(u32)SD_MAX_WS16_BLOCKS);
|
||||
q->limits.discard_zeroes_data = sdkp->lbprz;
|
||||
break;
|
||||
|
||||
case SD_LBP_WS10:
|
||||
max_blocks = min_not_zero(sdkp->max_ws_blocks,
|
||||
(u32)SD_MAX_WS10_BLOCKS);
|
||||
q->limits.discard_zeroes_data = sdkp->lbprz;
|
||||
break;
|
||||
|
||||
case SD_LBP_ZERO:
|
||||
max_blocks = min_not_zero(sdkp->max_ws_blocks,
|
||||
(u32)SD_MAX_WS10_BLOCKS);
|
||||
q->limits.discard_zeroes_data = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -701,93 +724,122 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
|
||||
}
|
||||
|
||||
/**
|
||||
* sd_setup_discard_cmnd - unmap blocks on thinly provisioned device
|
||||
* @sdp: scsi device to operate on
|
||||
* @rq: Request to prepare
|
||||
*
|
||||
* Will issue either UNMAP or WRITE SAME(16) depending on preference
|
||||
* indicated by target device.
|
||||
**/
|
||||
static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
|
||||
static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
|
||||
{
|
||||
struct scsi_device *sdp = cmd->device;
|
||||
struct request *rq = cmd->request;
|
||||
u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
|
||||
u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
|
||||
unsigned int data_len = 24;
|
||||
char *buf;
|
||||
|
||||
rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
|
||||
if (!rq->special_vec.bv_page)
|
||||
return BLKPREP_DEFER;
|
||||
rq->special_vec.bv_offset = 0;
|
||||
rq->special_vec.bv_len = data_len;
|
||||
rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
|
||||
|
||||
cmd->cmd_len = 10;
|
||||
cmd->cmnd[0] = UNMAP;
|
||||
cmd->cmnd[8] = 24;
|
||||
|
||||
buf = page_address(rq->special_vec.bv_page);
|
||||
put_unaligned_be16(6 + 16, &buf[0]);
|
||||
put_unaligned_be16(16, &buf[2]);
|
||||
put_unaligned_be64(sector, &buf[8]);
|
||||
put_unaligned_be32(nr_sectors, &buf[16]);
|
||||
|
||||
cmd->allowed = SD_MAX_RETRIES;
|
||||
cmd->transfersize = data_len;
|
||||
rq->timeout = SD_TIMEOUT;
|
||||
scsi_req(rq)->resid_len = data_len;
|
||||
|
||||
return scsi_init_io(cmd);
|
||||
}
|
||||
|
||||
static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap)
|
||||
{
|
||||
struct scsi_device *sdp = cmd->device;
|
||||
struct request *rq = cmd->request;
|
||||
u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
|
||||
u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
|
||||
u32 data_len = sdp->sector_size;
|
||||
|
||||
rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
|
||||
if (!rq->special_vec.bv_page)
|
||||
return BLKPREP_DEFER;
|
||||
rq->special_vec.bv_offset = 0;
|
||||
rq->special_vec.bv_len = data_len;
|
||||
rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
|
||||
|
||||
cmd->cmd_len = 16;
|
||||
cmd->cmnd[0] = WRITE_SAME_16;
|
||||
if (unmap)
|
||||
cmd->cmnd[1] = 0x8; /* UNMAP */
|
||||
put_unaligned_be64(sector, &cmd->cmnd[2]);
|
||||
put_unaligned_be32(nr_sectors, &cmd->cmnd[10]);
|
||||
|
||||
cmd->allowed = SD_MAX_RETRIES;
|
||||
cmd->transfersize = data_len;
|
||||
rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT;
|
||||
scsi_req(rq)->resid_len = data_len;
|
||||
|
||||
return scsi_init_io(cmd);
|
||||
}
|
||||
|
||||
static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap)
|
||||
{
|
||||
struct scsi_device *sdp = cmd->device;
|
||||
struct request *rq = cmd->request;
|
||||
u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
|
||||
u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
|
||||
u32 data_len = sdp->sector_size;
|
||||
|
||||
rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
|
||||
if (!rq->special_vec.bv_page)
|
||||
return BLKPREP_DEFER;
|
||||
rq->special_vec.bv_offset = 0;
|
||||
rq->special_vec.bv_len = data_len;
|
||||
rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
|
||||
|
||||
cmd->cmd_len = 10;
|
||||
cmd->cmnd[0] = WRITE_SAME;
|
||||
if (unmap)
|
||||
cmd->cmnd[1] = 0x8; /* UNMAP */
|
||||
put_unaligned_be32(sector, &cmd->cmnd[2]);
|
||||
put_unaligned_be16(nr_sectors, &cmd->cmnd[7]);
|
||||
|
||||
cmd->allowed = SD_MAX_RETRIES;
|
||||
cmd->transfersize = data_len;
|
||||
rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT;
|
||||
scsi_req(rq)->resid_len = data_len;
|
||||
|
||||
return scsi_init_io(cmd);
|
||||
}
|
||||
|
||||
static int sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd)
|
||||
{
|
||||
struct request *rq = cmd->request;
|
||||
struct scsi_device *sdp = cmd->device;
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
|
||||
sector_t sector = blk_rq_pos(rq);
|
||||
unsigned int nr_sectors = blk_rq_sectors(rq);
|
||||
unsigned int len;
|
||||
int ret;
|
||||
char *buf;
|
||||
struct page *page;
|
||||
u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
|
||||
u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
|
||||
|
||||
sector >>= ilog2(sdp->sector_size) - 9;
|
||||
nr_sectors >>= ilog2(sdp->sector_size) - 9;
|
||||
|
||||
page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
|
||||
if (!page)
|
||||
return BLKPREP_DEFER;
|
||||
|
||||
switch (sdkp->provisioning_mode) {
|
||||
case SD_LBP_UNMAP:
|
||||
buf = page_address(page);
|
||||
|
||||
cmd->cmd_len = 10;
|
||||
cmd->cmnd[0] = UNMAP;
|
||||
cmd->cmnd[8] = 24;
|
||||
|
||||
put_unaligned_be16(6 + 16, &buf[0]);
|
||||
put_unaligned_be16(16, &buf[2]);
|
||||
put_unaligned_be64(sector, &buf[8]);
|
||||
put_unaligned_be32(nr_sectors, &buf[16]);
|
||||
|
||||
len = 24;
|
||||
break;
|
||||
|
||||
case SD_LBP_WS16:
|
||||
cmd->cmd_len = 16;
|
||||
cmd->cmnd[0] = WRITE_SAME_16;
|
||||
cmd->cmnd[1] = 0x8; /* UNMAP */
|
||||
put_unaligned_be64(sector, &cmd->cmnd[2]);
|
||||
put_unaligned_be32(nr_sectors, &cmd->cmnd[10]);
|
||||
|
||||
len = sdkp->device->sector_size;
|
||||
break;
|
||||
|
||||
case SD_LBP_WS10:
|
||||
case SD_LBP_ZERO:
|
||||
cmd->cmd_len = 10;
|
||||
cmd->cmnd[0] = WRITE_SAME;
|
||||
if (sdkp->provisioning_mode == SD_LBP_WS10)
|
||||
cmd->cmnd[1] = 0x8; /* UNMAP */
|
||||
put_unaligned_be32(sector, &cmd->cmnd[2]);
|
||||
put_unaligned_be16(nr_sectors, &cmd->cmnd[7]);
|
||||
|
||||
len = sdkp->device->sector_size;
|
||||
break;
|
||||
|
||||
default:
|
||||
ret = BLKPREP_INVALID;
|
||||
goto out;
|
||||
if (!(rq->cmd_flags & REQ_NOUNMAP)) {
|
||||
switch (sdkp->zeroing_mode) {
|
||||
case SD_ZERO_WS16_UNMAP:
|
||||
return sd_setup_write_same16_cmnd(cmd, true);
|
||||
case SD_ZERO_WS10_UNMAP:
|
||||
return sd_setup_write_same10_cmnd(cmd, true);
|
||||
}
|
||||
}
|
||||
|
||||
rq->timeout = SD_TIMEOUT;
|
||||
|
||||
cmd->transfersize = len;
|
||||
cmd->allowed = SD_MAX_RETRIES;
|
||||
|
||||
rq->special_vec.bv_page = page;
|
||||
rq->special_vec.bv_offset = 0;
|
||||
rq->special_vec.bv_len = len;
|
||||
|
||||
rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
|
||||
scsi_req(rq)->resid_len = len;
|
||||
|
||||
ret = scsi_init_io(cmd);
|
||||
out:
|
||||
if (ret != BLKPREP_OK)
|
||||
__free_page(page);
|
||||
return ret;
|
||||
if (sdp->no_write_same)
|
||||
return BLKPREP_INVALID;
|
||||
if (sdkp->ws16 || sector > 0xffffffff || nr_sectors > 0xffff)
|
||||
return sd_setup_write_same16_cmnd(cmd, false);
|
||||
return sd_setup_write_same10_cmnd(cmd, false);
|
||||
}
|
||||
|
||||
static void sd_config_write_same(struct scsi_disk *sdkp)
|
||||
@@ -816,9 +868,20 @@ static void sd_config_write_same(struct scsi_disk *sdkp)
|
||||
sdkp->max_ws_blocks = 0;
|
||||
}
|
||||
|
||||
if (sdkp->lbprz && sdkp->lbpws)
|
||||
sdkp->zeroing_mode = SD_ZERO_WS16_UNMAP;
|
||||
else if (sdkp->lbprz && sdkp->lbpws10)
|
||||
sdkp->zeroing_mode = SD_ZERO_WS10_UNMAP;
|
||||
else if (sdkp->max_ws_blocks)
|
||||
sdkp->zeroing_mode = SD_ZERO_WS;
|
||||
else
|
||||
sdkp->zeroing_mode = SD_ZERO_WRITE;
|
||||
|
||||
out:
|
||||
blk_queue_max_write_same_sectors(q, sdkp->max_ws_blocks *
|
||||
(logical_block_size >> 9));
|
||||
blk_queue_max_write_zeroes_sectors(q, sdkp->max_ws_blocks *
|
||||
(logical_block_size >> 9));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1155,7 +1218,20 @@ static int sd_init_command(struct scsi_cmnd *cmd)
|
||||
|
||||
switch (req_op(rq)) {
|
||||
case REQ_OP_DISCARD:
|
||||
return sd_setup_discard_cmnd(cmd);
|
||||
switch (scsi_disk(rq->rq_disk)->provisioning_mode) {
|
||||
case SD_LBP_UNMAP:
|
||||
return sd_setup_unmap_cmnd(cmd);
|
||||
case SD_LBP_WS16:
|
||||
return sd_setup_write_same16_cmnd(cmd, true);
|
||||
case SD_LBP_WS10:
|
||||
return sd_setup_write_same10_cmnd(cmd, true);
|
||||
case SD_LBP_ZERO:
|
||||
return sd_setup_write_same10_cmnd(cmd, false);
|
||||
default:
|
||||
return BLKPREP_INVALID;
|
||||
}
|
||||
case REQ_OP_WRITE_ZEROES:
|
||||
return sd_setup_write_zeroes_cmnd(cmd);
|
||||
case REQ_OP_WRITE_SAME:
|
||||
return sd_setup_write_same_cmnd(cmd);
|
||||
case REQ_OP_FLUSH:
|
||||
@@ -1795,6 +1871,7 @@ static int sd_done(struct scsi_cmnd *SCpnt)
|
||||
|
||||
switch (req_op(req)) {
|
||||
case REQ_OP_DISCARD:
|
||||
case REQ_OP_WRITE_ZEROES:
|
||||
case REQ_OP_WRITE_SAME:
|
||||
case REQ_OP_ZONE_RESET:
|
||||
if (!result) {
|
||||
@@ -2768,7 +2845,7 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
|
||||
sd_config_discard(sdkp, SD_LBP_WS16);
|
||||
|
||||
} else { /* LBP VPD page tells us what to use */
|
||||
if (sdkp->lbpu && sdkp->max_unmap_blocks && !sdkp->lbprz)
|
||||
if (sdkp->lbpu && sdkp->max_unmap_blocks)
|
||||
sd_config_discard(sdkp, SD_LBP_UNMAP);
|
||||
else if (sdkp->lbpws)
|
||||
sd_config_discard(sdkp, SD_LBP_WS16);
|
||||
|
Reference in New Issue
Block a user