Merge branch 'for-5.9/drivers' into for-5.9/block-merge

* for-5.9/drivers: (38 commits)
  block: add max_active_zones to blk-sysfs
  block: add max_open_zones to blk-sysfs
  s390/dasd: Use struct_size() helper
  s390/dasd: fix inability to use DASD with DIAG driver
  md-cluster: fix wild pointer of unlock_all_bitmaps()
  md/raid5-cache: clear MD_SB_CHANGE_PENDING before flushing stripes
  md: fix deadlock causing by sysfs_notify
  md: improve io stats accounting
  md: raid0/linear: fix dereference before null check on pointer mddev
  rsxx: switch from 'pci_free_consistent()' to 'dma_free_coherent()'
  nvme: remove ns->disk checks
  nvme-pci: use standard block status symbolic names
  nvme-pci: use the consistent return type of nvme_pci_iod_alloc_size()
  nvme-pci: add a blank line after declarations
  nvme-pci: fix some comments issues
  nvme-pci: remove redundant segment validation
  nvme: document quirked Intel models
  nvme: expose reconnect_delay and ctrl_loss_tmo via sysfs
  nvme: support for zoned namespaces
  nvme: support for multiple Command Sets Supported and Effects log pages
  ...
This commit is contained in:
Jens Axboe
2020-07-20 15:38:27 -06:00
39 changed files with 1114 additions and 202 deletions

View File

@@ -273,6 +273,24 @@ Description:
device ("host-aware" or "host-managed" zone model). For regular device ("host-aware" or "host-managed" zone model). For regular
block devices, the value is always 0. block devices, the value is always 0.
What: /sys/block/<disk>/queue/max_active_zones
Date: July 2020
Contact: Niklas Cassel <niklas.cassel@wdc.com>
Description:
For zoned block devices (zoned attribute indicating
"host-managed" or "host-aware"), the sum of zones belonging to
any of the zone states: EXPLICIT OPEN, IMPLICIT OPEN or CLOSED,
is limited by this value. If this value is 0, there is no limit.
What: /sys/block/<disk>/queue/max_open_zones
Date: July 2020
Contact: Niklas Cassel <niklas.cassel@wdc.com>
Description:
For zoned block devices (zoned attribute indicating
"host-managed" or "host-aware"), the sum of zones belonging to
any of the zone states: EXPLICIT OPEN or IMPLICIT OPEN,
is limited by this value. If this value is 0, there is no limit.
What: /sys/block/<disk>/queue/chunk_sectors What: /sys/block/<disk>/queue/chunk_sectors
Date: September 2016 Date: September 2016
Contact: Hannes Reinecke <hare@suse.com> Contact: Hannes Reinecke <hare@suse.com>

View File

@@ -117,6 +117,20 @@ Maximum number of elements in a DMA scatter/gather list with integrity
data that will be submitted by the block layer core to the associated data that will be submitted by the block layer core to the associated
block driver. block driver.
max_active_zones (RO)
---------------------
For zoned block devices (zoned attribute indicating "host-managed" or
"host-aware"), the sum of zones belonging to any of the zone states:
EXPLICIT OPEN, IMPLICIT OPEN or CLOSED, is limited by this value.
If this value is 0, there is no limit.
max_open_zones (RO)
-------------------
For zoned block devices (zoned attribute indicating "host-managed" or
"host-aware"), the sum of zones belonging to any of the zone states:
EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value.
If this value is 0, there is no limit.
max_sectors_kb (RW) max_sectors_kb (RW)
------------------- -------------------
This is the maximum number of kilobytes that the block layer will allow This is the maximum number of kilobytes that the block layer will allow

View File

@@ -86,9 +86,10 @@ config BLK_DEV_ZONED
select MQ_IOSCHED_DEADLINE select MQ_IOSCHED_DEADLINE
help help
Block layer zoned block device support. This option enables Block layer zoned block device support. This option enables
support for ZAC/ZBC host-managed and host-aware zoned block devices. support for ZAC/ZBC/ZNS host-managed and host-aware zoned block
devices.
Say yes here if you have a ZAC or ZBC storage device. Say yes here if you have a ZAC, ZBC, or ZNS storage device.
config BLK_DEV_THROTTLING config BLK_DEV_THROTTLING
bool "Block layer bio throttling support" bool "Block layer bio throttling support"

View File

@@ -306,6 +306,16 @@ static ssize_t queue_nr_zones_show(struct request_queue *q, char *page)
return queue_var_show(blk_queue_nr_zones(q), page); return queue_var_show(blk_queue_nr_zones(q), page);
} }
static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page)
{
return queue_var_show(queue_max_open_zones(q), page);
}
static ssize_t queue_max_active_zones_show(struct request_queue *q, char *page)
{
return queue_var_show(queue_max_active_zones(q), page);
}
static ssize_t queue_nomerges_show(struct request_queue *q, char *page) static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
{ {
return queue_var_show((blk_queue_nomerges(q) << 1) | return queue_var_show((blk_queue_nomerges(q) << 1) |
@@ -668,6 +678,16 @@ static struct queue_sysfs_entry queue_nr_zones_entry = {
.show = queue_nr_zones_show, .show = queue_nr_zones_show,
}; };
static struct queue_sysfs_entry queue_max_open_zones_entry = {
.attr = {.name = "max_open_zones", .mode = 0444 },
.show = queue_max_open_zones_show,
};
static struct queue_sysfs_entry queue_max_active_zones_entry = {
.attr = {.name = "max_active_zones", .mode = 0444 },
.show = queue_max_active_zones_show,
};
static struct queue_sysfs_entry queue_nomerges_entry = { static struct queue_sysfs_entry queue_nomerges_entry = {
.attr = {.name = "nomerges", .mode = 0644 }, .attr = {.name = "nomerges", .mode = 0644 },
.show = queue_nomerges_show, .show = queue_nomerges_show,
@@ -766,6 +786,8 @@ static struct attribute *queue_attrs[] = {
&queue_nonrot_entry.attr, &queue_nonrot_entry.attr,
&queue_zoned_entry.attr, &queue_zoned_entry.attr,
&queue_nr_zones_entry.attr, &queue_nr_zones_entry.attr,
&queue_max_open_zones_entry.attr,
&queue_max_active_zones_entry.attr,
&queue_nomerges_entry.attr, &queue_nomerges_entry.attr,
&queue_rq_affinity_entry.attr, &queue_rq_affinity_entry.attr,
&queue_iostats_entry.attr, &queue_iostats_entry.attr,
@@ -793,6 +815,11 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
(!q->mq_ops || !q->mq_ops->timeout)) (!q->mq_ops || !q->mq_ops->timeout))
return 0; return 0;
if ((attr == &queue_max_open_zones_entry.attr ||
attr == &queue_max_active_zones_entry.attr) &&
!blk_queue_is_zoned(q))
return 0;
return attr->mode; return attr->mode;
} }

View File

@@ -312,6 +312,7 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
return ret; return ret;
rep.nr_zones = ret; rep.nr_zones = ret;
rep.flags = BLK_ZONE_REP_CAPACITY;
if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report)))
return -EFAULT; return -EFAULT;
return 0; return 0;

View File

@@ -49,6 +49,7 @@ struct nullb_device {
unsigned long completion_nsec; /* time in ns to complete a request */ unsigned long completion_nsec; /* time in ns to complete a request */
unsigned long cache_size; /* disk cache size in MB */ unsigned long cache_size; /* disk cache size in MB */
unsigned long zone_size; /* zone size in MB if device is zoned */ unsigned long zone_size; /* zone size in MB if device is zoned */
unsigned long zone_capacity; /* zone capacity in MB if device is zoned */
unsigned int zone_nr_conv; /* number of conventional zones */ unsigned int zone_nr_conv; /* number of conventional zones */
unsigned int submit_queues; /* number of submission queues */ unsigned int submit_queues; /* number of submission queues */
unsigned int home_node; /* home node for the device */ unsigned int home_node; /* home node for the device */

View File

@@ -200,6 +200,10 @@ static unsigned long g_zone_size = 256;
module_param_named(zone_size, g_zone_size, ulong, S_IRUGO); module_param_named(zone_size, g_zone_size, ulong, S_IRUGO);
MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256"); MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256");
static unsigned long g_zone_capacity;
module_param_named(zone_capacity, g_zone_capacity, ulong, 0444);
MODULE_PARM_DESC(zone_capacity, "Zone capacity in MB when block device is zoned. Can be less than or equal to zone size. Default: Zone size");
static unsigned int g_zone_nr_conv; static unsigned int g_zone_nr_conv;
module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444); module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444);
MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0"); MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0");
@@ -341,6 +345,7 @@ NULLB_DEVICE_ATTR(mbps, uint, NULL);
NULLB_DEVICE_ATTR(cache_size, ulong, NULL); NULLB_DEVICE_ATTR(cache_size, ulong, NULL);
NULLB_DEVICE_ATTR(zoned, bool, NULL); NULLB_DEVICE_ATTR(zoned, bool, NULL);
NULLB_DEVICE_ATTR(zone_size, ulong, NULL); NULLB_DEVICE_ATTR(zone_size, ulong, NULL);
NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL);
NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL); NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
static ssize_t nullb_device_power_show(struct config_item *item, char *page) static ssize_t nullb_device_power_show(struct config_item *item, char *page)
@@ -457,6 +462,7 @@ static struct configfs_attribute *nullb_device_attrs[] = {
&nullb_device_attr_badblocks, &nullb_device_attr_badblocks,
&nullb_device_attr_zoned, &nullb_device_attr_zoned,
&nullb_device_attr_zone_size, &nullb_device_attr_zone_size,
&nullb_device_attr_zone_capacity,
&nullb_device_attr_zone_nr_conv, &nullb_device_attr_zone_nr_conv,
NULL, NULL,
}; };
@@ -510,7 +516,8 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
static ssize_t memb_group_features_show(struct config_item *item, char *page) static ssize_t memb_group_features_show(struct config_item *item, char *page)
{ {
return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_nr_conv\n"); return snprintf(page, PAGE_SIZE,
"memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv\n");
} }
CONFIGFS_ATTR_RO(memb_group_, features); CONFIGFS_ATTR_RO(memb_group_, features);
@@ -571,6 +578,7 @@ static struct nullb_device *null_alloc_dev(void)
dev->use_per_node_hctx = g_use_per_node_hctx; dev->use_per_node_hctx = g_use_per_node_hctx;
dev->zoned = g_zoned; dev->zoned = g_zoned;
dev->zone_size = g_zone_size; dev->zone_size = g_zone_size;
dev->zone_capacity = g_zone_capacity;
dev->zone_nr_conv = g_zone_nr_conv; dev->zone_nr_conv = g_zone_nr_conv;
return dev; return dev;
} }

View File

@@ -28,6 +28,15 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
return -EINVAL; return -EINVAL;
} }
if (!dev->zone_capacity)
dev->zone_capacity = dev->zone_size;
if (dev->zone_capacity > dev->zone_size) {
pr_err("null_blk: zone capacity (%lu MB) larger than zone size (%lu MB)\n",
dev->zone_capacity, dev->zone_size);
return -EINVAL;
}
dev->zone_size_sects = dev->zone_size << ZONE_SIZE_SHIFT; dev->zone_size_sects = dev->zone_size << ZONE_SIZE_SHIFT;
dev->nr_zones = dev_size >> dev->nr_zones = dev_size >>
(SECTOR_SHIFT + ilog2(dev->zone_size_sects)); (SECTOR_SHIFT + ilog2(dev->zone_size_sects));
@@ -47,6 +56,7 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
zone->start = sector; zone->start = sector;
zone->len = dev->zone_size_sects; zone->len = dev->zone_size_sects;
zone->capacity = zone->len;
zone->wp = zone->start + zone->len; zone->wp = zone->start + zone->len;
zone->type = BLK_ZONE_TYPE_CONVENTIONAL; zone->type = BLK_ZONE_TYPE_CONVENTIONAL;
zone->cond = BLK_ZONE_COND_NOT_WP; zone->cond = BLK_ZONE_COND_NOT_WP;
@@ -59,6 +69,7 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
zone->start = zone->wp = sector; zone->start = zone->wp = sector;
zone->len = dev->zone_size_sects; zone->len = dev->zone_size_sects;
zone->capacity = dev->zone_capacity << ZONE_SIZE_SHIFT;
zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ; zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ;
zone->cond = BLK_ZONE_COND_EMPTY; zone->cond = BLK_ZONE_COND_EMPTY;
@@ -185,6 +196,9 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
return BLK_STS_IOERR; return BLK_STS_IOERR;
} }
if (zone->wp + nr_sectors > zone->start + zone->capacity)
return BLK_STS_IOERR;
if (zone->cond != BLK_ZONE_COND_EXP_OPEN) if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
zone->cond = BLK_ZONE_COND_IMP_OPEN; zone->cond = BLK_ZONE_COND_IMP_OPEN;
@@ -193,7 +207,7 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
return ret; return ret;
zone->wp += nr_sectors; zone->wp += nr_sectors;
if (zone->wp == zone->start + zone->len) if (zone->wp == zone->start + zone->capacity)
zone->cond = BLK_ZONE_COND_FULL; zone->cond = BLK_ZONE_COND_FULL;
return BLK_STS_OK; return BLK_STS_OK;
default: default:

View File

@@ -562,13 +562,15 @@ static int rsxx_eeh_frozen(struct pci_dev *dev)
for (i = 0; i < card->n_targets; i++) { for (i = 0; i < card->n_targets; i++) {
if (card->ctrl[i].status.buf) if (card->ctrl[i].status.buf)
pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8, dma_free_coherent(&card->dev->dev,
card->ctrl[i].status.buf, STATUS_BUFFER_SIZE8,
card->ctrl[i].status.dma_addr); card->ctrl[i].status.buf,
card->ctrl[i].status.dma_addr);
if (card->ctrl[i].cmd.buf) if (card->ctrl[i].cmd.buf)
pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8, dma_free_coherent(&card->dev->dev,
card->ctrl[i].cmd.buf, COMMAND_BUFFER_SIZE8,
card->ctrl[i].cmd.dma_addr); card->ctrl[i].cmd.buf,
card->ctrl[i].cmd.dma_addr);
} }
return 0; return 0;
@@ -711,15 +713,15 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
failed_hw_buffers_init: failed_hw_buffers_init:
for (i = 0; i < card->n_targets; i++) { for (i = 0; i < card->n_targets; i++) {
if (card->ctrl[i].status.buf) if (card->ctrl[i].status.buf)
pci_free_consistent(card->dev, dma_free_coherent(&card->dev->dev,
STATUS_BUFFER_SIZE8, STATUS_BUFFER_SIZE8,
card->ctrl[i].status.buf, card->ctrl[i].status.buf,
card->ctrl[i].status.dma_addr); card->ctrl[i].status.dma_addr);
if (card->ctrl[i].cmd.buf) if (card->ctrl[i].cmd.buf)
pci_free_consistent(card->dev, dma_free_coherent(&card->dev->dev,
COMMAND_BUFFER_SIZE8, COMMAND_BUFFER_SIZE8,
card->ctrl[i].cmd.buf, card->ctrl[i].cmd.buf,
card->ctrl[i].cmd.dma_addr); card->ctrl[i].cmd.dma_addr);
} }
failed_hw_setup: failed_hw_setup:
rsxx_eeh_failure(dev); rsxx_eeh_failure(dev);

View File

@@ -1631,7 +1631,7 @@ void md_bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force)
s += blocks; s += blocks;
} }
bitmap->last_end_sync = jiffies; bitmap->last_end_sync = jiffies;
sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed"); sysfs_notify_dirent_safe(bitmap->mddev->sysfs_completed);
} }
EXPORT_SYMBOL(md_bitmap_cond_end_sync); EXPORT_SYMBOL(md_bitmap_cond_end_sync);

View File

@@ -1518,6 +1518,7 @@ static void unlock_all_bitmaps(struct mddev *mddev)
} }
} }
kfree(cinfo->other_bitmap_lockres); kfree(cinfo->other_bitmap_lockres);
cinfo->other_bitmap_lockres = NULL;
} }
} }

View File

@@ -463,12 +463,38 @@ check_suspended:
} }
EXPORT_SYMBOL(md_handle_request); EXPORT_SYMBOL(md_handle_request);
struct md_io {
struct mddev *mddev;
bio_end_io_t *orig_bi_end_io;
void *orig_bi_private;
unsigned long start_time;
};
static void md_end_io(struct bio *bio)
{
struct md_io *md_io = bio->bi_private;
struct mddev *mddev = md_io->mddev;
disk_end_io_acct(mddev->gendisk, bio_op(bio), md_io->start_time);
bio->bi_end_io = md_io->orig_bi_end_io;
bio->bi_private = md_io->orig_bi_private;
mempool_free(md_io, &mddev->md_io_pool);
if (bio->bi_end_io)
bio->bi_end_io(bio);
}
static blk_qc_t md_submit_bio(struct bio *bio) static blk_qc_t md_submit_bio(struct bio *bio)
{ {
const int rw = bio_data_dir(bio); const int rw = bio_data_dir(bio);
const int sgrp = op_stat_group(bio_op(bio));
struct mddev *mddev = bio->bi_disk->private_data; struct mddev *mddev = bio->bi_disk->private_data;
unsigned int sectors;
if (mddev == NULL || mddev->pers == NULL) {
bio_io_error(bio);
return BLK_QC_T_NONE;
}
if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) { if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
bio_io_error(bio); bio_io_error(bio);
@@ -477,10 +503,6 @@ static blk_qc_t md_submit_bio(struct bio *bio)
blk_queue_split(&bio); blk_queue_split(&bio);
if (mddev == NULL || mddev->pers == NULL) {
bio_io_error(bio);
return BLK_QC_T_NONE;
}
if (mddev->ro == 1 && unlikely(rw == WRITE)) { if (mddev->ro == 1 && unlikely(rw == WRITE)) {
if (bio_sectors(bio) != 0) if (bio_sectors(bio) != 0)
bio->bi_status = BLK_STS_IOERR; bio->bi_status = BLK_STS_IOERR;
@@ -488,21 +510,27 @@ static blk_qc_t md_submit_bio(struct bio *bio)
return BLK_QC_T_NONE; return BLK_QC_T_NONE;
} }
/* if (bio->bi_end_io != md_end_io) {
* save the sectors now since our bio can struct md_io *md_io;
* go away inside make_request
*/ md_io = mempool_alloc(&mddev->md_io_pool, GFP_NOIO);
sectors = bio_sectors(bio); md_io->mddev = mddev;
md_io->orig_bi_end_io = bio->bi_end_io;
md_io->orig_bi_private = bio->bi_private;
bio->bi_end_io = md_end_io;
bio->bi_private = md_io;
md_io->start_time = disk_start_io_acct(mddev->gendisk,
bio_sectors(bio),
bio_op(bio));
}
/* bio could be mergeable after passing to underlayer */ /* bio could be mergeable after passing to underlayer */
bio->bi_opf &= ~REQ_NOMERGE; bio->bi_opf &= ~REQ_NOMERGE;
md_handle_request(mddev, bio); md_handle_request(mddev, bio);
part_stat_lock();
part_stat_inc(&mddev->gendisk->part0, ios[sgrp]);
part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors);
part_stat_unlock();
return BLK_QC_T_NONE; return BLK_QC_T_NONE;
} }
@@ -2424,6 +2452,10 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
if (sysfs_create_link(&rdev->kobj, ko, "block")) if (sysfs_create_link(&rdev->kobj, ko, "block"))
/* failure here is OK */; /* failure here is OK */;
rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state"); rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
rdev->sysfs_unack_badblocks =
sysfs_get_dirent_safe(rdev->kobj.sd, "unacknowledged_bad_blocks");
rdev->sysfs_badblocks =
sysfs_get_dirent_safe(rdev->kobj.sd, "bad_blocks");
list_add_rcu(&rdev->same_set, &mddev->disks); list_add_rcu(&rdev->same_set, &mddev->disks);
bd_link_disk_holder(rdev->bdev, mddev->gendisk); bd_link_disk_holder(rdev->bdev, mddev->gendisk);
@@ -2457,7 +2489,11 @@ static void unbind_rdev_from_array(struct md_rdev *rdev)
rdev->mddev = NULL; rdev->mddev = NULL;
sysfs_remove_link(&rdev->kobj, "block"); sysfs_remove_link(&rdev->kobj, "block");
sysfs_put(rdev->sysfs_state); sysfs_put(rdev->sysfs_state);
sysfs_put(rdev->sysfs_unack_badblocks);
sysfs_put(rdev->sysfs_badblocks);
rdev->sysfs_state = NULL; rdev->sysfs_state = NULL;
rdev->sysfs_unack_badblocks = NULL;
rdev->sysfs_badblocks = NULL;
rdev->badblocks.count = 0; rdev->badblocks.count = 0;
/* We need to delay this, otherwise we can deadlock when /* We need to delay this, otherwise we can deadlock when
* writing to 'remove' to "dev/state". We also need * writing to 'remove' to "dev/state". We also need
@@ -2802,7 +2838,7 @@ rewrite:
goto repeat; goto repeat;
wake_up(&mddev->sb_wait); wake_up(&mddev->sb_wait);
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
sysfs_notify(&mddev->kobj, NULL, "sync_completed"); sysfs_notify_dirent_safe(mddev->sysfs_completed);
rdev_for_each(rdev, mddev) { rdev_for_each(rdev, mddev) {
if (test_and_clear_bit(FaultRecorded, &rdev->flags)) if (test_and_clear_bit(FaultRecorded, &rdev->flags))
@@ -4055,7 +4091,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
mddev_resume(mddev); mddev_resume(mddev);
if (!mddev->thread) if (!mddev->thread)
md_update_sb(mddev, 1); md_update_sb(mddev, 1);
sysfs_notify(&mddev->kobj, NULL, "level"); sysfs_notify_dirent_safe(mddev->sysfs_level);
md_new_event(mddev); md_new_event(mddev);
rv = len; rv = len;
out_unlock: out_unlock:
@@ -4808,7 +4844,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
} }
if (err) if (err)
return err; return err;
sysfs_notify(&mddev->kobj, NULL, "degraded"); sysfs_notify_dirent_safe(mddev->sysfs_degraded);
} else { } else {
if (cmd_match(page, "check")) if (cmd_match(page, "check"))
set_bit(MD_RECOVERY_CHECK, &mddev->recovery); set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
@@ -5514,6 +5550,13 @@ static void md_free(struct kobject *ko)
if (mddev->sysfs_state) if (mddev->sysfs_state)
sysfs_put(mddev->sysfs_state); sysfs_put(mddev->sysfs_state);
if (mddev->sysfs_completed)
sysfs_put(mddev->sysfs_completed);
if (mddev->sysfs_degraded)
sysfs_put(mddev->sysfs_degraded);
if (mddev->sysfs_level)
sysfs_put(mddev->sysfs_level);
if (mddev->gendisk) if (mddev->gendisk)
del_gendisk(mddev->gendisk); del_gendisk(mddev->gendisk);
@@ -5525,6 +5568,7 @@ static void md_free(struct kobject *ko)
bioset_exit(&mddev->bio_set); bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set); bioset_exit(&mddev->sync_set);
mempool_exit(&mddev->md_io_pool);
kfree(mddev); kfree(mddev);
} }
@@ -5620,6 +5664,11 @@ static int md_alloc(dev_t dev, char *name)
*/ */
mddev->hold_active = UNTIL_STOP; mddev->hold_active = UNTIL_STOP;
error = mempool_init_kmalloc_pool(&mddev->md_io_pool, BIO_POOL_SIZE,
sizeof(struct md_io));
if (error)
goto abort;
error = -ENOMEM; error = -ENOMEM;
mddev->queue = blk_alloc_queue(NUMA_NO_NODE); mddev->queue = blk_alloc_queue(NUMA_NO_NODE);
if (!mddev->queue) if (!mddev->queue)
@@ -5676,6 +5725,9 @@ static int md_alloc(dev_t dev, char *name)
if (!error && mddev->kobj.sd) { if (!error && mddev->kobj.sd) {
kobject_uevent(&mddev->kobj, KOBJ_ADD); kobject_uevent(&mddev->kobj, KOBJ_ADD);
mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state"); mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
} }
mddev_put(mddev); mddev_put(mddev);
return error; return error;
@@ -6028,7 +6080,7 @@ static int do_md_run(struct mddev *mddev)
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
sysfs_notify_dirent_safe(mddev->sysfs_state); sysfs_notify_dirent_safe(mddev->sysfs_state);
sysfs_notify_dirent_safe(mddev->sysfs_action); sysfs_notify_dirent_safe(mddev->sysfs_action);
sysfs_notify(&mddev->kobj, NULL, "degraded"); sysfs_notify_dirent_safe(mddev->sysfs_degraded);
out: out:
clear_bit(MD_NOT_READY, &mddev->flags); clear_bit(MD_NOT_READY, &mddev->flags);
return err; return err;
@@ -8742,7 +8794,7 @@ void md_do_sync(struct md_thread *thread)
} else } else
mddev->curr_resync = 3; /* no longer delayed */ mddev->curr_resync = 3; /* no longer delayed */
mddev->curr_resync_completed = j; mddev->curr_resync_completed = j;
sysfs_notify(&mddev->kobj, NULL, "sync_completed"); sysfs_notify_dirent_safe(mddev->sysfs_completed);
md_new_event(mddev); md_new_event(mddev);
update_time = jiffies; update_time = jiffies;
@@ -8770,7 +8822,7 @@ void md_do_sync(struct md_thread *thread)
mddev->recovery_cp = j; mddev->recovery_cp = j;
update_time = jiffies; update_time = jiffies;
set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
sysfs_notify(&mddev->kobj, NULL, "sync_completed"); sysfs_notify_dirent_safe(mddev->sysfs_completed);
} }
while (j >= mddev->resync_max && while (j >= mddev->resync_max &&
@@ -8877,7 +8929,7 @@ void md_do_sync(struct md_thread *thread)
!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
mddev->curr_resync > 3) { mddev->curr_resync > 3) {
mddev->curr_resync_completed = mddev->curr_resync; mddev->curr_resync_completed = mddev->curr_resync;
sysfs_notify(&mddev->kobj, NULL, "sync_completed"); sysfs_notify_dirent_safe(mddev->sysfs_completed);
} }
mddev->pers->sync_request(mddev, max_sectors, &skipped); mddev->pers->sync_request(mddev, max_sectors, &skipped);
@@ -9007,7 +9059,7 @@ static int remove_and_add_spares(struct mddev *mddev,
} }
if (removed && mddev->kobj.sd) if (removed && mddev->kobj.sd)
sysfs_notify(&mddev->kobj, NULL, "degraded"); sysfs_notify_dirent_safe(mddev->sysfs_degraded);
if (this && removed) if (this && removed)
goto no_add; goto no_add;
@@ -9290,8 +9342,7 @@ void md_reap_sync_thread(struct mddev *mddev)
/* success...*/ /* success...*/
/* activate any spares */ /* activate any spares */
if (mddev->pers->spare_active(mddev)) { if (mddev->pers->spare_active(mddev)) {
sysfs_notify(&mddev->kobj, NULL, sysfs_notify_dirent_safe(mddev->sysfs_degraded);
"degraded");
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
} }
} }
@@ -9381,8 +9432,7 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
if (rv == 0) { if (rv == 0) {
/* Make sure they get written out promptly */ /* Make sure they get written out promptly */
if (test_bit(ExternalBbl, &rdev->flags)) if (test_bit(ExternalBbl, &rdev->flags))
sysfs_notify(&rdev->kobj, NULL, sysfs_notify_dirent_safe(rdev->sysfs_unack_badblocks);
"unacknowledged_bad_blocks");
sysfs_notify_dirent_safe(rdev->sysfs_state); sysfs_notify_dirent_safe(rdev->sysfs_state);
set_mask_bits(&mddev->sb_flags, 0, set_mask_bits(&mddev->sb_flags, 0,
BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING)); BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
@@ -9403,7 +9453,7 @@ int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
s += rdev->data_offset; s += rdev->data_offset;
rv = badblocks_clear(&rdev->badblocks, s, sectors); rv = badblocks_clear(&rdev->badblocks, s, sectors);
if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags)) if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
sysfs_notify(&rdev->kobj, NULL, "bad_blocks"); sysfs_notify_dirent_safe(rdev->sysfs_badblocks);
return rv; return rv;
} }
EXPORT_SYMBOL_GPL(rdev_clear_badblocks); EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
@@ -9633,7 +9683,7 @@ static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
if (rdev->recovery_offset == MaxSector && if (rdev->recovery_offset == MaxSector &&
!test_bit(In_sync, &rdev->flags) && !test_bit(In_sync, &rdev->flags) &&
mddev->pers->spare_active(mddev)) mddev->pers->spare_active(mddev))
sysfs_notify(&mddev->kobj, NULL, "degraded"); sysfs_notify_dirent_safe(mddev->sysfs_degraded);
put_page(swapout); put_page(swapout);
return 0; return 0;

View File

@@ -126,7 +126,10 @@ struct md_rdev {
struct kernfs_node *sysfs_state; /* handle for 'state' struct kernfs_node *sysfs_state; /* handle for 'state'
* sysfs entry */ * sysfs entry */
/* handle for 'unacknowledged_bad_blocks' sysfs dentry */
struct kernfs_node *sysfs_unack_badblocks;
/* handle for 'bad_blocks' sysfs dentry */
struct kernfs_node *sysfs_badblocks;
struct badblocks badblocks; struct badblocks badblocks;
struct { struct {
@@ -420,6 +423,9 @@ struct mddev {
* file in sysfs. * file in sysfs.
*/ */
struct kernfs_node *sysfs_action; /* handle for 'sync_action' */ struct kernfs_node *sysfs_action; /* handle for 'sync_action' */
struct kernfs_node *sysfs_completed; /*handle for 'sync_completed' */
struct kernfs_node *sysfs_degraded; /*handle for 'degraded' */
struct kernfs_node *sysfs_level; /*handle for 'level' */
struct work_struct del_work; /* used for delayed sysfs removal */ struct work_struct del_work; /* used for delayed sysfs removal */
@@ -481,6 +487,7 @@ struct mddev {
struct bio_set sync_set; /* for sync operations like struct bio_set sync_set; /* for sync operations like
* metadata and bitmap writes * metadata and bitmap writes
*/ */
mempool_t md_io_pool;
/* Generic flush handling. /* Generic flush handling.
* The last to finish preflush schedules a worker to submit * The last to finish preflush schedules a worker to submit

View File

@@ -4429,7 +4429,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
sector_nr = conf->reshape_progress; sector_nr = conf->reshape_progress;
if (sector_nr) { if (sector_nr) {
mddev->curr_resync_completed = sector_nr; mddev->curr_resync_completed = sector_nr;
sysfs_notify(&mddev->kobj, NULL, "sync_completed"); sysfs_notify_dirent_safe(mddev->sysfs_completed);
*skipped = 1; *skipped = 1;
return sector_nr; return sector_nr;
} }

View File

@@ -2430,10 +2430,15 @@ static void r5c_recovery_flush_data_only_stripes(struct r5l_log *log,
struct mddev *mddev = log->rdev->mddev; struct mddev *mddev = log->rdev->mddev;
struct r5conf *conf = mddev->private; struct r5conf *conf = mddev->private;
struct stripe_head *sh, *next; struct stripe_head *sh, *next;
bool cleared_pending = false;
if (ctx->data_only_stripes == 0) if (ctx->data_only_stripes == 0)
return; return;
if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
cleared_pending = true;
clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
}
log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_BACK; log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_BACK;
list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) { list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) {
@@ -2448,6 +2453,8 @@ static void r5c_recovery_flush_data_only_stripes(struct r5l_log *log,
atomic_read(&conf->active_stripes) == 0); atomic_read(&conf->active_stripes) == 0);
log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
if (cleared_pending)
set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
} }
static int r5l_recovery_log(struct r5l_log *log) static int r5l_recovery_log(struct r5l_log *log)

View File

@@ -5777,7 +5777,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
sector_div(sector_nr, new_data_disks); sector_div(sector_nr, new_data_disks);
if (sector_nr) { if (sector_nr) {
mddev->curr_resync_completed = sector_nr; mddev->curr_resync_completed = sector_nr;
sysfs_notify(&mddev->kobj, NULL, "sync_completed"); sysfs_notify_dirent_safe(mddev->sysfs_completed);
*skipped = 1; *skipped = 1;
retn = sector_nr; retn = sector_nr;
goto finish; goto finish;
@@ -5891,7 +5891,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
conf->reshape_safe = mddev->reshape_position; conf->reshape_safe = mddev->reshape_position;
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_for_overlap); wake_up(&conf->wait_for_overlap);
sysfs_notify(&mddev->kobj, NULL, "sync_completed"); sysfs_notify_dirent_safe(mddev->sysfs_completed);
} }
INIT_LIST_HEAD(&stripes); INIT_LIST_HEAD(&stripes);
@@ -5998,7 +5998,7 @@ finish:
conf->reshape_safe = mddev->reshape_position; conf->reshape_safe = mddev->reshape_position;
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_for_overlap); wake_up(&conf->wait_for_overlap);
sysfs_notify(&mddev->kobj, NULL, "sync_completed"); sysfs_notify_dirent_safe(mddev->sysfs_completed);
} }
ret: ret:
return retn; return retn;

View File

@@ -13,6 +13,7 @@ nvme-core-y := core.o
nvme-core-$(CONFIG_TRACING) += trace.o nvme-core-$(CONFIG_TRACING) += trace.o
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
nvme-core-$(CONFIG_NVM) += lightnvm.o nvme-core-$(CONFIG_NVM) += lightnvm.o
nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o
nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o

View File

@@ -89,7 +89,7 @@ static dev_t nvme_chr_devt;
static struct class *nvme_class; static struct class *nvme_class;
static struct class *nvme_subsys_class; static struct class *nvme_subsys_class;
static int nvme_revalidate_disk(struct gendisk *disk); static int _nvme_revalidate_disk(struct gendisk *disk);
static void nvme_put_subsystem(struct nvme_subsystem *subsys); static void nvme_put_subsystem(struct nvme_subsystem *subsys);
static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
unsigned nsid); unsigned nsid);
@@ -100,7 +100,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
* Revalidating a dead namespace sets capacity to 0. This will end * Revalidating a dead namespace sets capacity to 0. This will end
* buffered writers dirtying pages that can't be synced. * buffered writers dirtying pages that can't be synced.
*/ */
if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags)) if (test_and_set_bit(NVME_NS_DEAD, &ns->flags))
return; return;
blk_set_queue_dying(ns->queue); blk_set_queue_dying(ns->queue);
/* Forcibly unquiesce queues to avoid blocking dispatch */ /* Forcibly unquiesce queues to avoid blocking dispatch */
@@ -287,6 +287,10 @@ void nvme_complete_rq(struct request *req)
nvme_retry_req(req); nvme_retry_req(req);
return; return;
} }
} else if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
req_op(req) == REQ_OP_ZONE_APPEND) {
req->__sector = nvme_lba_to_sect(req->q->queuedata,
le64_to_cpu(nvme_req(req)->result.u64));
} }
nvme_trace_bio_complete(req, status); nvme_trace_bio_complete(req, status);
@@ -555,7 +559,7 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl)
goto out_disable_stream; goto out_disable_stream;
} }
ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1); ctrl->nr_streams = min_t(u16, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams); dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
return 0; return 0;
@@ -673,7 +677,8 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
} }
static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
struct request *req, struct nvme_command *cmnd) struct request *req, struct nvme_command *cmnd,
enum nvme_opcode op)
{ {
struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_ctrl *ctrl = ns->ctrl;
u16 control = 0; u16 control = 0;
@@ -687,7 +692,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
if (req->cmd_flags & REQ_RAHEAD) if (req->cmd_flags & REQ_RAHEAD)
dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read); cmnd->rw.opcode = op;
cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id); cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
@@ -716,6 +721,8 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
case NVME_NS_DPS_PI_TYPE2: case NVME_NS_DPS_PI_TYPE2:
control |= NVME_RW_PRINFO_PRCHK_GUARD | control |= NVME_RW_PRINFO_PRCHK_GUARD |
NVME_RW_PRINFO_PRCHK_REF; NVME_RW_PRINFO_PRCHK_REF;
if (op == nvme_cmd_zone_append)
control |= NVME_RW_APPEND_PIREMAP;
cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req)); cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req));
break; break;
} }
@@ -756,6 +763,19 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
case REQ_OP_FLUSH: case REQ_OP_FLUSH:
nvme_setup_flush(ns, cmd); nvme_setup_flush(ns, cmd);
break; break;
case REQ_OP_ZONE_RESET_ALL:
case REQ_OP_ZONE_RESET:
ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_RESET);
break;
case REQ_OP_ZONE_OPEN:
ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_OPEN);
break;
case REQ_OP_ZONE_CLOSE:
ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_CLOSE);
break;
case REQ_OP_ZONE_FINISH:
ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_FINISH);
break;
case REQ_OP_WRITE_ZEROES: case REQ_OP_WRITE_ZEROES:
ret = nvme_setup_write_zeroes(ns, req, cmd); ret = nvme_setup_write_zeroes(ns, req, cmd);
break; break;
@@ -763,8 +783,13 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
ret = nvme_setup_discard(ns, req, cmd); ret = nvme_setup_discard(ns, req, cmd);
break; break;
case REQ_OP_READ: case REQ_OP_READ:
ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_read);
break;
case REQ_OP_WRITE: case REQ_OP_WRITE:
ret = nvme_setup_rw(ns, req, cmd); ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_write);
break;
case REQ_OP_ZONE_APPEND:
ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_zone_append);
break; break;
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
@@ -1056,8 +1081,13 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
return error; return error;
} }
static bool nvme_multi_css(struct nvme_ctrl *ctrl)
{
return (ctrl->ctrl_config & NVME_CC_CSS_MASK) == NVME_CC_CSS_CSI;
}
static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
struct nvme_ns_id_desc *cur) struct nvme_ns_id_desc *cur, bool *csi_seen)
{ {
const char *warn_str = "ctrl returned bogus length:"; const char *warn_str = "ctrl returned bogus length:";
void *data = cur; void *data = cur;
@@ -1087,6 +1117,15 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
} }
uuid_copy(&ids->uuid, data + sizeof(*cur)); uuid_copy(&ids->uuid, data + sizeof(*cur));
return NVME_NIDT_UUID_LEN; return NVME_NIDT_UUID_LEN;
case NVME_NIDT_CSI:
if (cur->nidl != NVME_NIDT_CSI_LEN) {
dev_warn(ctrl->device, "%s %d for NVME_NIDT_CSI\n",
warn_str, cur->nidl);
return -1;
}
memcpy(&ids->csi, data + sizeof(*cur), NVME_NIDT_CSI_LEN);
*csi_seen = true;
return NVME_NIDT_CSI_LEN;
default: default:
/* Skip unknown types */ /* Skip unknown types */
return cur->nidl; return cur->nidl;
@@ -1097,10 +1136,9 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
struct nvme_ns_ids *ids) struct nvme_ns_ids *ids)
{ {
struct nvme_command c = { }; struct nvme_command c = { };
int status; bool csi_seen = false;
int status, pos, len;
void *data; void *data;
int pos;
int len;
c.identify.opcode = nvme_admin_identify; c.identify.opcode = nvme_admin_identify;
c.identify.nsid = cpu_to_le32(nsid); c.identify.nsid = cpu_to_le32(nsid);
@@ -1125,7 +1163,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
* device just because of a temporal retry-able error (such * device just because of a temporal retry-able error (such
* as path of transport errors). * as path of transport errors).
*/ */
if (status > 0 && (status & NVME_SC_DNR)) if (status > 0 && (status & NVME_SC_DNR) && !nvme_multi_css(ctrl))
status = 0; status = 0;
goto free_data; goto free_data;
} }
@@ -1136,12 +1174,19 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
if (cur->nidl == 0) if (cur->nidl == 0)
break; break;
len = nvme_process_ns_desc(ctrl, ids, cur); len = nvme_process_ns_desc(ctrl, ids, cur, &csi_seen);
if (len < 0) if (len < 0)
goto free_data; break;
len += sizeof(*cur); len += sizeof(*cur);
} }
if (nvme_multi_css(ctrl) && !csi_seen) {
dev_warn(ctrl->device, "Command set not reported for nsid:%d\n",
nsid);
status = -EINVAL;
}
free_data: free_data:
kfree(data); kfree(data);
return status; return status;
@@ -1350,8 +1395,8 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
u32 effects = 0; u32 effects = 0;
if (ns) { if (ns) {
if (ctrl->effects) if (ns->head->effects)
effects = le32_to_cpu(ctrl->effects->iocs[opcode]); effects = le32_to_cpu(ns->head->effects->iocs[opcode]);
if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC)) if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))
dev_warn(ctrl->device, dev_warn(ctrl->device,
"IO command:%02x has unhandled effects:%08x\n", "IO command:%02x has unhandled effects:%08x\n",
@@ -1378,14 +1423,23 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
return effects; return effects;
} }
static void nvme_update_formats(struct nvme_ctrl *ctrl) static void nvme_update_formats(struct nvme_ctrl *ctrl, u32 *effects)
{ {
struct nvme_ns *ns; struct nvme_ns *ns;
down_read(&ctrl->namespaces_rwsem); down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list) list_for_each_entry(ns, &ctrl->namespaces, list)
if (ns->disk && nvme_revalidate_disk(ns->disk)) if (_nvme_revalidate_disk(ns->disk))
nvme_set_queue_dying(ns); nvme_set_queue_dying(ns);
else if (blk_queue_is_zoned(ns->disk->queue)) {
/*
* IO commands are required to fully revalidate a zoned
* device. Force the command effects to trigger rescan
* work so report zones can run in a context with
* unfrozen IO queues.
*/
*effects |= NVME_CMD_EFFECTS_NCC;
}
up_read(&ctrl->namespaces_rwsem); up_read(&ctrl->namespaces_rwsem);
} }
@@ -1397,7 +1451,7 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
* this command. * this command.
*/ */
if (effects & NVME_CMD_EFFECTS_LBCC) if (effects & NVME_CMD_EFFECTS_LBCC)
nvme_update_formats(ctrl); nvme_update_formats(ctrl, &effects);
if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
nvme_unfreeze(ctrl); nvme_unfreeze(ctrl);
nvme_mpath_unfreeze(ctrl->subsys); nvme_mpath_unfreeze(ctrl->subsys);
@@ -1512,7 +1566,7 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
* Issue ioctl requests on the first available path. Note that unlike normal * Issue ioctl requests on the first available path. Note that unlike normal
* block layer requests we will not retry failed request on another controller. * block layer requests we will not retry failed request on another controller.
*/ */
static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk, struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
struct nvme_ns_head **head, int *srcu_idx) struct nvme_ns_head **head, int *srcu_idx)
{ {
#ifdef CONFIG_NVME_MULTIPATH #ifdef CONFIG_NVME_MULTIPATH
@@ -1532,7 +1586,7 @@ static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
return disk->private_data; return disk->private_data;
} }
static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx) void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx)
{ {
if (head) if (head)
srcu_read_unlock(&head->srcu, idx); srcu_read_unlock(&head->srcu, idx);
@@ -1798,7 +1852,7 @@ static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
memcpy(ids->eui64, id->eui64, sizeof(id->eui64)); memcpy(ids->eui64, id->eui64, sizeof(id->eui64));
if (ctrl->vs >= NVME_VS(1, 2, 0)) if (ctrl->vs >= NVME_VS(1, 2, 0))
memcpy(ids->nguid, id->nguid, sizeof(id->nguid)); memcpy(ids->nguid, id->nguid, sizeof(id->nguid));
if (ctrl->vs >= NVME_VS(1, 3, 0)) if (ctrl->vs >= NVME_VS(1, 3, 0) || nvme_multi_css(ctrl))
return nvme_identify_ns_descs(ctrl, nsid, ids); return nvme_identify_ns_descs(ctrl, nsid, ids);
return 0; return 0;
} }
@@ -1814,7 +1868,8 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
{ {
return uuid_equal(&a->uuid, &b->uuid) && return uuid_equal(&a->uuid, &b->uuid) &&
memcmp(&a->nguid, &b->nguid, sizeof(a->nguid)) == 0 && memcmp(&a->nguid, &b->nguid, sizeof(a->nguid)) == 0 &&
memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0; memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0 &&
a->csi == b->csi;
} }
static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns, static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
@@ -1924,18 +1979,38 @@ static void nvme_update_disk_info(struct gendisk *disk,
static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
{ {
unsigned lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
struct nvme_ns *ns = disk->private_data; struct nvme_ns *ns = disk->private_data;
struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_ctrl *ctrl = ns->ctrl;
int ret;
u32 iob; u32 iob;
/* /*
* If identify namespace failed, use default 512 byte block size so * If identify namespace failed, use default 512 byte block size so
* block layer can use before failing read/write for 0 capacity. * block layer can use before failing read/write for 0 capacity.
*/ */
ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds; ns->lba_shift = id->lbaf[lbaf].ds;
if (ns->lba_shift == 0) if (ns->lba_shift == 0)
ns->lba_shift = 9; ns->lba_shift = 9;
switch (ns->head->ids.csi) {
case NVME_CSI_NVM:
break;
case NVME_CSI_ZNS:
ret = nvme_update_zone_info(disk, ns, lbaf);
if (ret) {
dev_warn(ctrl->device,
"failed to add zoned namespace:%u ret:%d\n",
ns->head->ns_id, ret);
return ret;
}
break;
default:
dev_warn(ctrl->device, "unknown csi:%u ns:%u\n",
ns->head->ids.csi, ns->head->ns_id);
return -ENODEV;
}
if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
is_power_of_2(ctrl->max_hw_sectors)) is_power_of_2(ctrl->max_hw_sectors))
iob = ctrl->max_hw_sectors; iob = ctrl->max_hw_sectors;
@@ -1943,7 +2018,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob)); iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob));
ns->features = 0; ns->features = 0;
ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
/* the PI implementation requires metadata equal t10 pi tuple size */ /* the PI implementation requires metadata equal t10 pi tuple size */
if (ns->ms == sizeof(struct t10_pi_tuple)) if (ns->ms == sizeof(struct t10_pi_tuple))
ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK; ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
@@ -1986,7 +2061,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
return 0; return 0;
} }
static int nvme_revalidate_disk(struct gendisk *disk) static int _nvme_revalidate_disk(struct gendisk *disk)
{ {
struct nvme_ns *ns = disk->private_data; struct nvme_ns *ns = disk->private_data;
struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_ctrl *ctrl = ns->ctrl;
@@ -2034,6 +2109,28 @@ out:
return ret; return ret;
} }
static int nvme_revalidate_disk(struct gendisk *disk)
{
int ret;
ret = _nvme_revalidate_disk(disk);
if (ret)
return ret;
#ifdef CONFIG_BLK_DEV_ZONED
if (blk_queue_is_zoned(disk->queue)) {
struct nvme_ns *ns = disk->private_data;
struct nvme_ctrl *ctrl = ns->ctrl;
ret = blk_revalidate_disk_zones(disk, NULL);
if (!ret)
blk_queue_max_zone_append_sectors(disk->queue,
ctrl->max_zone_append);
}
#endif
return ret;
}
static char nvme_pr_type(enum pr_type type) static char nvme_pr_type(enum pr_type type)
{ {
switch (type) { switch (type) {
@@ -2164,6 +2261,7 @@ static const struct block_device_operations nvme_fops = {
.release = nvme_release, .release = nvme_release,
.getgeo = nvme_getgeo, .getgeo = nvme_getgeo,
.revalidate_disk= nvme_revalidate_disk, .revalidate_disk= nvme_revalidate_disk,
.report_zones = nvme_report_zones,
.pr_ops = &nvme_pr_ops, .pr_ops = &nvme_pr_ops,
}; };
@@ -2190,6 +2288,7 @@ const struct block_device_operations nvme_ns_head_ops = {
.ioctl = nvme_ioctl, .ioctl = nvme_ioctl,
.compat_ioctl = nvme_compat_ioctl, .compat_ioctl = nvme_compat_ioctl,
.getgeo = nvme_getgeo, .getgeo = nvme_getgeo,
.report_zones = nvme_report_zones,
.pr_ops = &nvme_pr_ops, .pr_ops = &nvme_pr_ops,
}; };
#endif /* CONFIG_NVME_MULTIPATH */ #endif /* CONFIG_NVME_MULTIPATH */
@@ -2271,7 +2370,10 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
ctrl->page_size = 1 << page_shift; ctrl->page_size = 1 << page_shift;
ctrl->ctrl_config = NVME_CC_CSS_NVM; if (NVME_CAP_CSS(ctrl->cap) & NVME_CAP_CSS_CSI)
ctrl->ctrl_config = NVME_CC_CSS_CSI;
else
ctrl->ctrl_config = NVME_CC_CSS_NVM;
ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT; ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE; ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
@@ -2819,7 +2921,7 @@ out_unlock:
return ret; return ret;
} }
int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi,
void *log, size_t size, u64 offset) void *log, size_t size, u64 offset)
{ {
struct nvme_command c = { }; struct nvme_command c = { };
@@ -2833,27 +2935,55 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
c.get_log_page.numdu = cpu_to_le16(dwlen >> 16); c.get_log_page.numdu = cpu_to_le16(dwlen >> 16);
c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset)); c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset));
c.get_log_page.lpou = cpu_to_le32(upper_32_bits(offset)); c.get_log_page.lpou = cpu_to_le32(upper_32_bits(offset));
c.get_log_page.csi = csi;
return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size); return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size);
} }
static int nvme_get_effects_log(struct nvme_ctrl *ctrl) static struct nvme_cel *nvme_find_cel(struct nvme_ctrl *ctrl, u8 csi)
{ {
struct nvme_cel *cel, *ret = NULL;
spin_lock(&ctrl->lock);
list_for_each_entry(cel, &ctrl->cels, entry) {
if (cel->csi == csi) {
ret = cel;
break;
}
}
spin_unlock(&ctrl->lock);
return ret;
}
static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi,
struct nvme_effects_log **log)
{
struct nvme_cel *cel = nvme_find_cel(ctrl, csi);
int ret; int ret;
if (!ctrl->effects) if (cel)
ctrl->effects = kzalloc(sizeof(*ctrl->effects), GFP_KERNEL); goto out;
if (!ctrl->effects) cel = kzalloc(sizeof(*cel), GFP_KERNEL);
return 0; if (!cel)
return -ENOMEM;
ret = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CMD_EFFECTS, 0, ret = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CMD_EFFECTS, 0, csi,
ctrl->effects, sizeof(*ctrl->effects), 0); &cel->log, sizeof(cel->log), 0);
if (ret) { if (ret) {
kfree(ctrl->effects); kfree(cel);
ctrl->effects = NULL; return ret;
} }
return ret;
cel->csi = csi;
spin_lock(&ctrl->lock);
list_add_tail(&cel->entry, &ctrl->cels);
spin_unlock(&ctrl->lock);
out:
*log = &cel->log;
return 0;
} }
/* /*
@@ -2874,7 +3004,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
return ret; return ret;
} }
page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12; page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
ctrl->sqsize = min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->sqsize); ctrl->sqsize = min_t(u16, NVME_CAP_MQES(ctrl->cap), ctrl->sqsize);
if (ctrl->vs >= NVME_VS(1, 1, 0)) if (ctrl->vs >= NVME_VS(1, 1, 0))
ctrl->subsystem = NVME_CAP_NSSRC(ctrl->cap); ctrl->subsystem = NVME_CAP_NSSRC(ctrl->cap);
@@ -2886,7 +3016,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
} }
if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) { if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) {
ret = nvme_get_effects_log(ctrl); ret = nvme_get_effects_log(ctrl, NVME_CSI_NVM, &ctrl->effects);
if (ret < 0) if (ret < 0)
goto out_free; goto out_free;
} }
@@ -2948,7 +3078,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
if (id->rtd3e) { if (id->rtd3e) {
/* us -> s */ /* us -> s */
u32 transition_time = le32_to_cpu(id->rtd3e) / 1000000; u32 transition_time = le32_to_cpu(id->rtd3e) / USEC_PER_SEC;
ctrl->shutdown_timeout = clamp_t(unsigned int, transition_time, ctrl->shutdown_timeout = clamp_t(unsigned int, transition_time,
shutdown_timeout, 60); shutdown_timeout, 60);
@@ -3406,6 +3536,66 @@ static ssize_t nvme_sysfs_show_address(struct device *dev,
} }
static DEVICE_ATTR(address, S_IRUGO, nvme_sysfs_show_address, NULL); static DEVICE_ATTR(address, S_IRUGO, nvme_sysfs_show_address, NULL);
static ssize_t nvme_ctrl_loss_tmo_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
struct nvmf_ctrl_options *opts = ctrl->opts;
if (ctrl->opts->max_reconnects == -1)
return sprintf(buf, "off\n");
return sprintf(buf, "%d\n",
opts->max_reconnects * opts->reconnect_delay);
}
static ssize_t nvme_ctrl_loss_tmo_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
struct nvmf_ctrl_options *opts = ctrl->opts;
int ctrl_loss_tmo, err;
err = kstrtoint(buf, 10, &ctrl_loss_tmo);
if (err)
return -EINVAL;
else if (ctrl_loss_tmo < 0)
opts->max_reconnects = -1;
else
opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
opts->reconnect_delay);
return count;
}
static DEVICE_ATTR(ctrl_loss_tmo, S_IRUGO | S_IWUSR,
nvme_ctrl_loss_tmo_show, nvme_ctrl_loss_tmo_store);
static ssize_t nvme_ctrl_reconnect_delay_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
if (ctrl->opts->reconnect_delay == -1)
return sprintf(buf, "off\n");
return sprintf(buf, "%d\n", ctrl->opts->reconnect_delay);
}
static ssize_t nvme_ctrl_reconnect_delay_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
unsigned int v;
int err;
err = kstrtou32(buf, 10, &v);
if (err || v > UINT_MAX)
return -EINVAL;
ctrl->opts->reconnect_delay = v;
return count;
}
static DEVICE_ATTR(reconnect_delay, S_IRUGO | S_IWUSR,
nvme_ctrl_reconnect_delay_show, nvme_ctrl_reconnect_delay_store);
static struct attribute *nvme_dev_attrs[] = { static struct attribute *nvme_dev_attrs[] = {
&dev_attr_reset_controller.attr, &dev_attr_reset_controller.attr,
&dev_attr_rescan_controller.attr, &dev_attr_rescan_controller.attr,
@@ -3423,6 +3613,8 @@ static struct attribute *nvme_dev_attrs[] = {
&dev_attr_sqsize.attr, &dev_attr_sqsize.attr,
&dev_attr_hostnqn.attr, &dev_attr_hostnqn.attr,
&dev_attr_hostid.attr, &dev_attr_hostid.attr,
&dev_attr_ctrl_loss_tmo.attr,
&dev_attr_reconnect_delay.attr,
NULL NULL
}; };
@@ -3519,6 +3711,13 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
goto out_cleanup_srcu; goto out_cleanup_srcu;
} }
if (head->ids.csi) {
ret = nvme_get_effects_log(ctrl, head->ids.csi, &head->effects);
if (ret)
goto out_cleanup_srcu;
} else
head->effects = ctrl->effects;
ret = nvme_mpath_alloc_disk(ctrl, head); ret = nvme_mpath_alloc_disk(ctrl, head);
if (ret) if (ret)
goto out_cleanup_srcu; goto out_cleanup_srcu;
@@ -3735,7 +3934,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
nvme_mpath_clear_current_path(ns); nvme_mpath_clear_current_path(ns);
synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */ synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */
if (ns->disk && ns->disk->flags & GENHD_FL_UP) { if (ns->disk->flags & GENHD_FL_UP) {
del_gendisk(ns->disk); del_gendisk(ns->disk);
blk_cleanup_queue(ns->queue); blk_cleanup_queue(ns->queue);
if (blk_get_integrity(ns->disk)) if (blk_get_integrity(ns->disk))
@@ -3766,7 +3965,7 @@ static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
ns = nvme_find_get_ns(ctrl, nsid); ns = nvme_find_get_ns(ctrl, nsid);
if (ns) { if (ns) {
if (ns->disk && revalidate_disk(ns->disk)) if (revalidate_disk(ns->disk))
nvme_ns_remove(ns); nvme_ns_remove(ns);
nvme_put_ns(ns); nvme_put_ns(ns);
} else } else
@@ -3859,8 +4058,8 @@ static void nvme_clear_changed_ns_log(struct nvme_ctrl *ctrl)
* raced with us in reading the log page, which could cause us to miss * raced with us in reading the log page, which could cause us to miss
* updates. * updates.
*/ */
error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CHANGED_NS, 0, log, error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CHANGED_NS, 0,
log_size, 0); NVME_CSI_NVM, log, log_size, 0);
if (error) if (error)
dev_warn(ctrl->device, dev_warn(ctrl->device,
"reading changed ns log failed: %d\n", error); "reading changed ns log failed: %d\n", error);
@@ -4004,8 +4203,8 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
if (!log) if (!log)
return; return;
if (nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_FW_SLOT, 0, log, if (nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_FW_SLOT, 0, NVME_CSI_NVM,
sizeof(*log), 0)) log, sizeof(*log), 0))
dev_warn(ctrl->device, "Get FW SLOT INFO log error\n"); dev_warn(ctrl->device, "Get FW SLOT INFO log error\n");
kfree(log); kfree(log);
} }
@@ -4142,11 +4341,16 @@ static void nvme_free_ctrl(struct device *dev)
struct nvme_ctrl *ctrl = struct nvme_ctrl *ctrl =
container_of(dev, struct nvme_ctrl, ctrl_device); container_of(dev, struct nvme_ctrl, ctrl_device);
struct nvme_subsystem *subsys = ctrl->subsys; struct nvme_subsystem *subsys = ctrl->subsys;
struct nvme_cel *cel, *next;
if (subsys && ctrl->instance != subsys->instance) if (subsys && ctrl->instance != subsys->instance)
ida_simple_remove(&nvme_instance_ida, ctrl->instance); ida_simple_remove(&nvme_instance_ida, ctrl->instance);
kfree(ctrl->effects); list_for_each_entry_safe(cel, next, &ctrl->cels, entry) {
list_del(&cel->entry);
kfree(cel);
}
nvme_mpath_uninit(ctrl); nvme_mpath_uninit(ctrl);
__free_page(ctrl->discard_page); __free_page(ctrl->discard_page);
@@ -4177,6 +4381,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
spin_lock_init(&ctrl->lock); spin_lock_init(&ctrl->lock);
mutex_init(&ctrl->scan_lock); mutex_init(&ctrl->scan_lock);
INIT_LIST_HEAD(&ctrl->namespaces); INIT_LIST_HEAD(&ctrl->namespaces);
INIT_LIST_HEAD(&ctrl->cels);
init_rwsem(&ctrl->namespaces_rwsem); init_rwsem(&ctrl->namespaces_rwsem);
ctrl->dev = dev; ctrl->dev = dev;
ctrl->ops = ops; ctrl->ops = ops;
@@ -4373,6 +4578,8 @@ static inline void _nvme_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_command) != 64); BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);

View File

@@ -62,7 +62,7 @@ static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data)
int ret; int ret;
ret = nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0, ret = nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0,
&data->log, sizeof(data->log), 0); NVME_CSI_NVM, &data->log, sizeof(data->log), 0);
return ret <= 0 ? ret : -EIO; return ret <= 0 ? ret : -EIO;
} }

View File

@@ -593,8 +593,8 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev,
dev_meta_off = dev_meta; dev_meta_off = dev_meta;
ret = nvme_get_log(ctrl, ns->head->ns_id, ret = nvme_get_log(ctrl, ns->head->ns_id,
NVME_NVM_LOG_REPORT_CHUNK, 0, dev_meta, len, NVME_NVM_LOG_REPORT_CHUNK, 0, NVME_CSI_NVM,
offset); dev_meta, len, offset);
if (ret) { if (ret) {
dev_err(ctrl->device, "Get REPORT CHUNK log error\n"); dev_err(ctrl->device, "Get REPORT CHUNK log error\n");
break; break;

View File

@@ -527,7 +527,7 @@ static int nvme_read_ana_log(struct nvme_ctrl *ctrl)
int error; int error;
mutex_lock(&ctrl->ana_lock); mutex_lock(&ctrl->ana_lock);
error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_ANA, 0, error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_ANA, 0, NVME_CSI_NVM,
ctrl->ana_log_buf, ctrl->ana_log_size, 0); ctrl->ana_log_buf, ctrl->ana_log_size, 0);
if (error) { if (error) {
dev_warn(ctrl->device, "Failed to get ANA log: %d\n", error); dev_warn(ctrl->device, "Failed to get ANA log: %d\n", error);

View File

@@ -191,6 +191,12 @@ struct nvme_fault_inject {
#endif #endif
}; };
struct nvme_cel {
struct list_head entry;
struct nvme_effects_log log;
u8 csi;
};
struct nvme_ctrl { struct nvme_ctrl {
bool comp_seen; bool comp_seen;
enum nvme_ctrl_state state; enum nvme_ctrl_state state;
@@ -232,6 +238,9 @@ struct nvme_ctrl {
u32 max_hw_sectors; u32 max_hw_sectors;
u32 max_segments; u32 max_segments;
u32 max_integrity_segments; u32 max_integrity_segments;
#ifdef CONFIG_BLK_DEV_ZONED
u32 max_zone_append;
#endif
u16 crdt[3]; u16 crdt[3];
u16 oncs; u16 oncs;
u16 oacs; u16 oacs;
@@ -257,6 +266,7 @@ struct nvme_ctrl {
unsigned long quirks; unsigned long quirks;
struct nvme_id_power_state psd[32]; struct nvme_id_power_state psd[32];
struct nvme_effects_log *effects; struct nvme_effects_log *effects;
struct list_head cels;
struct work_struct scan_work; struct work_struct scan_work;
struct work_struct async_event_work; struct work_struct async_event_work;
struct delayed_work ka_work; struct delayed_work ka_work;
@@ -339,6 +349,7 @@ struct nvme_ns_ids {
u8 eui64[8]; u8 eui64[8];
u8 nguid[16]; u8 nguid[16];
uuid_t uuid; uuid_t uuid;
u8 csi;
}; };
/* /*
@@ -358,6 +369,7 @@ struct nvme_ns_head {
struct kref ref; struct kref ref;
bool shared; bool shared;
int instance; int instance;
struct nvme_effects_log *effects;
#ifdef CONFIG_NVME_MULTIPATH #ifdef CONFIG_NVME_MULTIPATH
struct gendisk *disk; struct gendisk *disk;
struct bio_list requeue_list; struct bio_list requeue_list;
@@ -395,6 +407,9 @@ struct nvme_ns {
u16 sgs; u16 sgs;
u32 sws; u32 sws;
u8 pi_type; u8 pi_type;
#ifdef CONFIG_BLK_DEV_ZONED
u64 zsze;
#endif
unsigned long features; unsigned long features;
unsigned long flags; unsigned long flags;
#define NVME_NS_REMOVING 0 #define NVME_NS_REMOVING 0
@@ -560,8 +575,11 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
int nvme_try_sched_reset(struct nvme_ctrl *ctrl); int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi,
void *log, size_t size, u64 offset); void *log, size_t size, u64 offset);
struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
struct nvme_ns_head **head, int *srcu_idx);
void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx);
extern const struct attribute_group *nvme_ns_id_attr_groups[]; extern const struct attribute_group *nvme_ns_id_attr_groups[];
extern const struct block_device_operations nvme_ns_head_ops; extern const struct block_device_operations nvme_ns_head_ops;
@@ -697,6 +715,36 @@ static inline void nvme_mpath_update_disk_size(struct gendisk *disk)
} }
#endif /* CONFIG_NVME_MULTIPATH */ #endif /* CONFIG_NVME_MULTIPATH */
#ifdef CONFIG_BLK_DEV_ZONED
int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
unsigned lbaf);
int nvme_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmnd,
enum nvme_zone_mgmt_action action);
#else
#define nvme_report_zones NULL
static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns,
struct request *req, struct nvme_command *cmnd,
enum nvme_zone_mgmt_action action)
{
return BLK_STS_NOTSUPP;
}
static inline int nvme_update_zone_info(struct gendisk *disk,
struct nvme_ns *ns,
unsigned lbaf)
{
dev_warn(ns->ctrl->device,
"Please enable CONFIG_BLK_DEV_ZONED to support ZNS devices\n");
return -EPROTONOSUPPORT;
}
#endif
#ifdef CONFIG_NVM #ifdef CONFIG_NVM
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
void nvme_nvm_unregister(struct nvme_ns *ns); void nvme_nvm_unregister(struct nvme_ns *ns);

View File

@@ -61,10 +61,10 @@ MODULE_PARM_DESC(sgl_threshold,
static int io_queue_depth_set(const char *val, const struct kernel_param *kp); static int io_queue_depth_set(const char *val, const struct kernel_param *kp);
static const struct kernel_param_ops io_queue_depth_ops = { static const struct kernel_param_ops io_queue_depth_ops = {
.set = io_queue_depth_set, .set = io_queue_depth_set,
.get = param_get_int, .get = param_get_uint,
}; };
static int io_queue_depth = 1024; static unsigned int io_queue_depth = 1024;
module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2"); MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
@@ -115,7 +115,7 @@ struct nvme_dev {
unsigned max_qid; unsigned max_qid;
unsigned io_queues[HCTX_MAX_TYPES]; unsigned io_queues[HCTX_MAX_TYPES];
unsigned int num_vecs; unsigned int num_vecs;
int q_depth; u16 q_depth;
int io_sqes; int io_sqes;
u32 db_stride; u32 db_stride;
void __iomem *bar; void __iomem *bar;
@@ -151,13 +151,14 @@ struct nvme_dev {
static int io_queue_depth_set(const char *val, const struct kernel_param *kp) static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
{ {
int n = 0, ret; int ret;
u16 n;
ret = kstrtoint(val, 10, &n); ret = kstrtou16(val, 10, &n);
if (ret != 0 || n < 2) if (ret != 0 || n < 2)
return -EINVAL; return -EINVAL;
return param_set_int(val, kp); return param_set_ushort(val, kp);
} }
static inline unsigned int sq_idx(unsigned int qid, u32 stride) static inline unsigned int sq_idx(unsigned int qid, u32 stride)
@@ -361,7 +362,7 @@ static int nvme_pci_npages_sgl(unsigned int num_seg)
return DIV_ROUND_UP(num_seg * sizeof(struct nvme_sgl_desc), PAGE_SIZE); return DIV_ROUND_UP(num_seg * sizeof(struct nvme_sgl_desc), PAGE_SIZE);
} }
static unsigned int nvme_pci_iod_alloc_size(struct nvme_dev *dev, static size_t nvme_pci_iod_alloc_size(struct nvme_dev *dev,
unsigned int size, unsigned int nseg, bool use_sgl) unsigned int size, unsigned int nseg, bool use_sgl)
{ {
size_t alloc_size; size_t alloc_size;
@@ -500,9 +501,6 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
int nseg = blk_rq_nr_phys_segments(req); int nseg = blk_rq_nr_phys_segments(req);
unsigned int avg_seg_size; unsigned int avg_seg_size;
if (nseg == 0)
return false;
avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), nseg); avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), nseg);
if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1)))) if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1))))
@@ -764,7 +762,7 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev,
cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma); cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma);
if (bv->bv_len > first_prp_len) if (bv->bv_len > first_prp_len)
cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len); cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len);
return 0; return BLK_STS_OK;
} }
static blk_status_t nvme_setup_sgl_simple(struct nvme_dev *dev, static blk_status_t nvme_setup_sgl_simple(struct nvme_dev *dev,
@@ -782,7 +780,7 @@ static blk_status_t nvme_setup_sgl_simple(struct nvme_dev *dev,
cmnd->dptr.sgl.addr = cpu_to_le64(iod->first_dma); cmnd->dptr.sgl.addr = cpu_to_le64(iod->first_dma);
cmnd->dptr.sgl.length = cpu_to_le32(iod->dma_len); cmnd->dptr.sgl.length = cpu_to_le32(iod->dma_len);
cmnd->dptr.sgl.type = NVME_SGL_FMT_DATA_DESC << 4; cmnd->dptr.sgl.type = NVME_SGL_FMT_DATA_DESC << 4;
return 0; return BLK_STS_OK;
} }
static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
@@ -846,7 +844,7 @@ static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
if (dma_mapping_error(dev->dev, iod->meta_dma)) if (dma_mapping_error(dev->dev, iod->meta_dma))
return BLK_STS_IOERR; return BLK_STS_IOERR;
cmnd->rw.metadata = cpu_to_le64(iod->meta_dma); cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
return 0; return BLK_STS_OK;
} }
/* /*
@@ -1019,6 +1017,7 @@ static irqreturn_t nvme_irq(int irq, void *data)
static irqreturn_t nvme_irq_check(int irq, void *data) static irqreturn_t nvme_irq_check(int irq, void *data)
{ {
struct nvme_queue *nvmeq = data; struct nvme_queue *nvmeq = data;
if (nvme_cqe_pending(nvmeq)) if (nvme_cqe_pending(nvmeq))
return IRQ_WAKE_THREAD; return IRQ_WAKE_THREAD;
return IRQ_NONE; return IRQ_NONE;
@@ -1154,7 +1153,6 @@ static void abort_endio(struct request *req, blk_status_t error)
static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
{ {
/* If true, indicates loss of adapter communication, possibly by a /* If true, indicates loss of adapter communication, possibly by a
* NVMe Subsystem reset. * NVMe Subsystem reset.
*/ */
@@ -1261,9 +1259,9 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
} }
/* /*
* Shutdown the controller immediately and schedule a reset if the * Shutdown the controller immediately and schedule a reset if the
* command was already aborted once before and still hasn't been * command was already aborted once before and still hasn't been
* returned to the driver, or if this is the admin queue. * returned to the driver, or if this is the admin queue.
*/ */
if (!nvmeq->qid || iod->aborted) { if (!nvmeq->qid || iod->aborted) {
dev_warn(dev->ctrl.device, dev_warn(dev->ctrl.device,
@@ -1402,6 +1400,7 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
if (q_size_aligned * nr_io_queues > dev->cmb_size) { if (q_size_aligned * nr_io_queues > dev->cmb_size) {
u64 mem_per_q = div_u64(dev->cmb_size, nr_io_queues); u64 mem_per_q = div_u64(dev->cmb_size, nr_io_queues);
mem_per_q = round_down(mem_per_q, dev->ctrl.page_size); mem_per_q = round_down(mem_per_q, dev->ctrl.page_size);
q_depth = div_u64(mem_per_q, entry_size); q_depth = div_u64(mem_per_q, entry_size);
@@ -1932,12 +1931,12 @@ out:
static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred) static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
{ {
u32 chunk_size; u64 min_chunk = min_t(u64, preferred, PAGE_SIZE * MAX_ORDER_NR_PAGES);
u64 hmminds = max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2);
u64 chunk_size;
/* start big and work our way down */ /* start big and work our way down */
for (chunk_size = min_t(u64, preferred, PAGE_SIZE * MAX_ORDER_NR_PAGES); for (chunk_size = min_chunk; chunk_size >= hmminds; chunk_size /= 2) {
chunk_size >= max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2);
chunk_size /= 2) {
if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) { if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) {
if (!min || dev->host_mem_size >= min) if (!min || dev->host_mem_size >= min)
return 0; return 0;
@@ -2003,7 +2002,7 @@ static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs)
unsigned int nr_read_queues, nr_write_queues = dev->nr_write_queues; unsigned int nr_read_queues, nr_write_queues = dev->nr_write_queues;
/* /*
* If there is no interupt available for queues, ensure that * If there is no interrupt available for queues, ensure that
* the default queue is set to 1. The affinity set size is * the default queue is set to 1. The affinity set size is
* also set to one, but the irq core ignores it for this case. * also set to one, but the irq core ignores it for this case.
* *
@@ -2261,8 +2260,8 @@ static void nvme_dev_add(struct nvme_dev *dev)
dev->tagset.nr_maps++; dev->tagset.nr_maps++;
dev->tagset.timeout = NVME_IO_TIMEOUT; dev->tagset.timeout = NVME_IO_TIMEOUT;
dev->tagset.numa_node = dev->ctrl.numa_node; dev->tagset.numa_node = dev->ctrl.numa_node;
dev->tagset.queue_depth = dev->tagset.queue_depth = min_t(unsigned int, dev->q_depth,
min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; BLK_MQ_MAX_DEPTH) - 1;
dev->tagset.cmd_size = sizeof(struct nvme_iod); dev->tagset.cmd_size = sizeof(struct nvme_iod);
dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
dev->tagset.driver_data = dev; dev->tagset.driver_data = dev;
@@ -2321,7 +2320,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP); dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1, dev->q_depth = min_t(u16, NVME_CAP_MQES(dev->ctrl.cap) + 1,
io_queue_depth); io_queue_depth);
dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */ dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */
dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap); dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
@@ -2876,6 +2875,7 @@ static void nvme_reset_done(struct pci_dev *pdev)
static void nvme_shutdown(struct pci_dev *pdev) static void nvme_shutdown(struct pci_dev *pdev)
{ {
struct nvme_dev *dev = pci_get_drvdata(pdev); struct nvme_dev *dev = pci_get_drvdata(pdev);
nvme_disable_prepare_reset(dev, true); nvme_disable_prepare_reset(dev, true);
} }
@@ -3006,6 +3006,7 @@ unfreeze:
static int nvme_simple_suspend(struct device *dev) static int nvme_simple_suspend(struct device *dev)
{ {
struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev)); struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
return nvme_disable_prepare_reset(ndev, true); return nvme_disable_prepare_reset(ndev, true);
} }
@@ -3079,16 +3080,16 @@ static const struct pci_error_handlers nvme_err_handler = {
}; };
static const struct pci_device_id nvme_id_table[] = { static const struct pci_device_id nvme_id_table[] = {
{ PCI_VDEVICE(INTEL, 0x0953), { PCI_VDEVICE(INTEL, 0x0953), /* Intel 750/P3500/P3600/P3700 */
.driver_data = NVME_QUIRK_STRIPE_SIZE | .driver_data = NVME_QUIRK_STRIPE_SIZE |
NVME_QUIRK_DEALLOCATE_ZEROES, }, NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0x0a53), { PCI_VDEVICE(INTEL, 0x0a53), /* Intel P3520 */
.driver_data = NVME_QUIRK_STRIPE_SIZE | .driver_data = NVME_QUIRK_STRIPE_SIZE |
NVME_QUIRK_DEALLOCATE_ZEROES, }, NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0x0a54), { PCI_VDEVICE(INTEL, 0x0a54), /* Intel P4500/P4600 */
.driver_data = NVME_QUIRK_STRIPE_SIZE | .driver_data = NVME_QUIRK_STRIPE_SIZE |
NVME_QUIRK_DEALLOCATE_ZEROES, }, NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0x0a55), { PCI_VDEVICE(INTEL, 0x0a55), /* Dell Express Flash P4600 */
.driver_data = NVME_QUIRK_STRIPE_SIZE | .driver_data = NVME_QUIRK_STRIPE_SIZE |
NVME_QUIRK_DEALLOCATE_ZEROES, }, NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */

View File

@@ -46,6 +46,7 @@ struct nvme_tcp_request {
u32 pdu_sent; u32 pdu_sent;
u16 ttag; u16 ttag;
struct list_head entry; struct list_head entry;
struct llist_node lentry;
__le32 ddgst; __le32 ddgst;
struct bio *curr_bio; struct bio *curr_bio;
@@ -75,9 +76,10 @@ struct nvme_tcp_queue {
struct work_struct io_work; struct work_struct io_work;
int io_cpu; int io_cpu;
spinlock_t lock;
struct mutex send_mutex; struct mutex send_mutex;
struct llist_head req_list;
struct list_head send_list; struct list_head send_list;
bool more_requests;
/* recv state */ /* recv state */
void *pdu; void *pdu;
@@ -261,15 +263,13 @@ static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req,
} }
static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
bool sync) bool sync, bool last)
{ {
struct nvme_tcp_queue *queue = req->queue; struct nvme_tcp_queue *queue = req->queue;
bool empty; bool empty;
spin_lock(&queue->lock); empty = llist_add(&req->lentry, &queue->req_list) &&
empty = list_empty(&queue->send_list) && !queue->request; list_empty(&queue->send_list) && !queue->request;
list_add_tail(&req->entry, &queue->send_list);
spin_unlock(&queue->lock);
/* /*
* if we're the first on the send_list and we can try to send * if we're the first on the send_list and we can try to send
@@ -278,25 +278,42 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
*/ */
if (queue->io_cpu == smp_processor_id() && if (queue->io_cpu == smp_processor_id() &&
sync && empty && mutex_trylock(&queue->send_mutex)) { sync && empty && mutex_trylock(&queue->send_mutex)) {
queue->more_requests = !last;
nvme_tcp_try_send(queue); nvme_tcp_try_send(queue);
queue->more_requests = false;
mutex_unlock(&queue->send_mutex); mutex_unlock(&queue->send_mutex);
} else { } else if (last) {
queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
} }
} }
static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue)
{
struct nvme_tcp_request *req;
struct llist_node *node;
for (node = llist_del_all(&queue->req_list); node; node = node->next) {
req = llist_entry(node, struct nvme_tcp_request, lentry);
list_add(&req->entry, &queue->send_list);
}
}
static inline struct nvme_tcp_request * static inline struct nvme_tcp_request *
nvme_tcp_fetch_request(struct nvme_tcp_queue *queue) nvme_tcp_fetch_request(struct nvme_tcp_queue *queue)
{ {
struct nvme_tcp_request *req; struct nvme_tcp_request *req;
spin_lock(&queue->lock);
req = list_first_entry_or_null(&queue->send_list, req = list_first_entry_or_null(&queue->send_list,
struct nvme_tcp_request, entry); struct nvme_tcp_request, entry);
if (req) if (!req) {
list_del(&req->entry); nvme_tcp_process_req_list(queue);
spin_unlock(&queue->lock); req = list_first_entry_or_null(&queue->send_list,
struct nvme_tcp_request, entry);
if (unlikely(!req))
return NULL;
}
list_del(&req->entry);
return req; return req;
} }
@@ -596,7 +613,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
req->state = NVME_TCP_SEND_H2C_PDU; req->state = NVME_TCP_SEND_H2C_PDU;
req->offset = 0; req->offset = 0;
nvme_tcp_queue_request(req, false); nvme_tcp_queue_request(req, false, true);
return 0; return 0;
} }
@@ -863,6 +880,12 @@ done:
read_unlock(&sk->sk_callback_lock); read_unlock(&sk->sk_callback_lock);
} }
static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
{
return !list_empty(&queue->send_list) ||
!llist_empty(&queue->req_list) || queue->more_requests;
}
static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue) static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue)
{ {
queue->request = NULL; queue->request = NULL;
@@ -884,7 +907,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
bool last = nvme_tcp_pdu_last_send(req, len); bool last = nvme_tcp_pdu_last_send(req, len);
int ret, flags = MSG_DONTWAIT; int ret, flags = MSG_DONTWAIT;
if (last && !queue->data_digest) if (last && !queue->data_digest && !nvme_tcp_queue_more(queue))
flags |= MSG_EOR; flags |= MSG_EOR;
else else
flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
@@ -931,7 +954,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
int flags = MSG_DONTWAIT; int flags = MSG_DONTWAIT;
int ret; int ret;
if (inline_data) if (inline_data || nvme_tcp_queue_more(queue))
flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
else else
flags |= MSG_EOR; flags |= MSG_EOR;
@@ -996,12 +1019,17 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
{ {
struct nvme_tcp_queue *queue = req->queue; struct nvme_tcp_queue *queue = req->queue;
int ret; int ret;
struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR }; struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
struct kvec iov = { struct kvec iov = {
.iov_base = &req->ddgst + req->offset, .iov_base = &req->ddgst + req->offset,
.iov_len = NVME_TCP_DIGEST_LENGTH - req->offset .iov_len = NVME_TCP_DIGEST_LENGTH - req->offset
}; };
if (nvme_tcp_queue_more(queue))
msg.msg_flags |= MSG_MORE;
else
msg.msg_flags |= MSG_EOR;
ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
if (unlikely(ret <= 0)) if (unlikely(ret <= 0))
return ret; return ret;
@@ -1344,8 +1372,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
int ret, rcv_pdu_size; int ret, rcv_pdu_size;
queue->ctrl = ctrl; queue->ctrl = ctrl;
init_llist_head(&queue->req_list);
INIT_LIST_HEAD(&queue->send_list); INIT_LIST_HEAD(&queue->send_list);
spin_lock_init(&queue->lock);
mutex_init(&queue->send_mutex); mutex_init(&queue->send_mutex);
INIT_WORK(&queue->io_work, nvme_tcp_io_work); INIT_WORK(&queue->io_work, nvme_tcp_io_work);
queue->queue_size = queue_size; queue->queue_size = queue_size;
@@ -2106,7 +2134,7 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg)
ctrl->async_req.curr_bio = NULL; ctrl->async_req.curr_bio = NULL;
ctrl->async_req.data_len = 0; ctrl->async_req.data_len = 0;
nvme_tcp_queue_request(&ctrl->async_req, true); nvme_tcp_queue_request(&ctrl->async_req, true, true);
} }
static enum blk_eh_timer_return static enum blk_eh_timer_return
@@ -2218,6 +2246,14 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
return 0; return 0;
} }
static void nvme_tcp_commit_rqs(struct blk_mq_hw_ctx *hctx)
{
struct nvme_tcp_queue *queue = hctx->driver_data;
if (!llist_empty(&queue->req_list))
queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
}
static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx, static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd) const struct blk_mq_queue_data *bd)
{ {
@@ -2237,7 +2273,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(rq); blk_mq_start_request(rq);
nvme_tcp_queue_request(req, true); nvme_tcp_queue_request(req, true, bd->last);
return BLK_STS_OK; return BLK_STS_OK;
} }
@@ -2305,6 +2341,7 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
static const struct blk_mq_ops nvme_tcp_mq_ops = { static const struct blk_mq_ops nvme_tcp_mq_ops = {
.queue_rq = nvme_tcp_queue_rq, .queue_rq = nvme_tcp_queue_rq,
.commit_rqs = nvme_tcp_commit_rqs,
.complete = nvme_complete_rq, .complete = nvme_complete_rq,
.init_request = nvme_tcp_init_request, .init_request = nvme_tcp_init_request,
.exit_request = nvme_tcp_exit_request, .exit_request = nvme_tcp_exit_request,

256
drivers/nvme/host/zns.c Normal file
View File

@@ -0,0 +1,256 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2020 Western Digital Corporation or its affiliates.
*/
#include <linux/blkdev.h>
#include <linux/vmalloc.h>
#include "nvme.h"
static int nvme_set_max_append(struct nvme_ctrl *ctrl)
{
struct nvme_command c = { };
struct nvme_id_ctrl_zns *id;
int status;
id = kzalloc(sizeof(*id), GFP_KERNEL);
if (!id)
return -ENOMEM;
c.identify.opcode = nvme_admin_identify;
c.identify.cns = NVME_ID_CNS_CS_CTRL;
c.identify.csi = NVME_CSI_ZNS;
status = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
if (status) {
kfree(id);
return status;
}
if (id->zasl)
ctrl->max_zone_append = 1 << (id->zasl + 3);
else
ctrl->max_zone_append = ctrl->max_hw_sectors;
kfree(id);
return 0;
}
int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
unsigned lbaf)
{
struct nvme_effects_log *log = ns->head->effects;
struct request_queue *q = disk->queue;
struct nvme_command c = { };
struct nvme_id_ns_zns *id;
int status;
/* Driver requires zone append support */
if (!(le32_to_cpu(log->iocs[nvme_cmd_zone_append]) &
NVME_CMD_EFFECTS_CSUPP)) {
dev_warn(ns->ctrl->device,
"append not supported for zoned namespace:%d\n",
ns->head->ns_id);
return -EINVAL;
}
/* Lazily query controller append limit for the first zoned namespace */
if (!ns->ctrl->max_zone_append) {
status = nvme_set_max_append(ns->ctrl);
if (status)
return status;
}
id = kzalloc(sizeof(*id), GFP_KERNEL);
if (!id)
return -ENOMEM;
c.identify.opcode = nvme_admin_identify;
c.identify.nsid = cpu_to_le32(ns->head->ns_id);
c.identify.cns = NVME_ID_CNS_CS_NS;
c.identify.csi = NVME_CSI_ZNS;
status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, id, sizeof(*id));
if (status)
goto free_data;
/*
* We currently do not handle devices requiring any of the zoned
* operation characteristics.
*/
if (id->zoc) {
dev_warn(ns->ctrl->device,
"zone operations:%x not supported for namespace:%u\n",
le16_to_cpu(id->zoc), ns->head->ns_id);
status = -EINVAL;
goto free_data;
}
ns->zsze = nvme_lba_to_sect(ns, le64_to_cpu(id->lbafe[lbaf].zsze));
if (!is_power_of_2(ns->zsze)) {
dev_warn(ns->ctrl->device,
"invalid zone size:%llu for namespace:%u\n",
ns->zsze, ns->head->ns_id);
status = -EINVAL;
goto free_data;
}
q->limits.zoned = BLK_ZONED_HM;
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1);
blk_queue_max_active_zones(q, le32_to_cpu(id->mar) + 1);
free_data:
kfree(id);
return status;
}
static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
unsigned int nr_zones, size_t *buflen)
{
struct request_queue *q = ns->disk->queue;
size_t bufsize;
void *buf;
const size_t min_bufsize = sizeof(struct nvme_zone_report) +
sizeof(struct nvme_zone_descriptor);
nr_zones = min_t(unsigned int, nr_zones,
get_capacity(ns->disk) >> ilog2(ns->zsze));
bufsize = sizeof(struct nvme_zone_report) +
nr_zones * sizeof(struct nvme_zone_descriptor);
bufsize = min_t(size_t, bufsize,
queue_max_hw_sectors(q) << SECTOR_SHIFT);
bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
while (bufsize >= min_bufsize) {
buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
if (buf) {
*buflen = bufsize;
return buf;
}
bufsize >>= 1;
}
return NULL;
}
static int __nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
struct nvme_zone_report *report,
size_t buflen)
{
struct nvme_command c = { };
int ret;
c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns, sector));
c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
c.zmr.zra = NVME_ZRA_ZONE_REPORT;
c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
c.zmr.pr = NVME_REPORT_ZONE_PARTIAL;
ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen);
if (ret)
return ret;
return le64_to_cpu(report->nr_zones);
}
static int nvme_zone_parse_entry(struct nvme_ns *ns,
struct nvme_zone_descriptor *entry,
unsigned int idx, report_zones_cb cb,
void *data)
{
struct blk_zone zone = { };
if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) {
dev_err(ns->ctrl->device, "invalid zone type %#x\n",
entry->zt);
return -EINVAL;
}
zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
zone.cond = entry->zs >> 4;
zone.len = ns->zsze;
zone.capacity = nvme_lba_to_sect(ns, le64_to_cpu(entry->zcap));
zone.start = nvme_lba_to_sect(ns, le64_to_cpu(entry->zslba));
zone.wp = nvme_lba_to_sect(ns, le64_to_cpu(entry->wp));
return cb(&zone, idx, data);
}
static int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data)
{
struct nvme_zone_report *report;
int ret, zone_idx = 0;
unsigned int nz, i;
size_t buflen;
report = nvme_zns_alloc_report_buffer(ns, nr_zones, &buflen);
if (!report)
return -ENOMEM;
sector &= ~(ns->zsze - 1);
while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) {
memset(report, 0, buflen);
ret = __nvme_ns_report_zones(ns, sector, report, buflen);
if (ret < 0)
goto out_free;
nz = min_t(unsigned int, ret, nr_zones);
if (!nz)
break;
for (i = 0; i < nz && zone_idx < nr_zones; i++) {
ret = nvme_zone_parse_entry(ns, &report->entries[i],
zone_idx, cb, data);
if (ret)
goto out_free;
zone_idx++;
}
sector += ns->zsze * nz;
}
if (zone_idx > 0)
ret = zone_idx;
else
ret = -EINVAL;
out_free:
kvfree(report);
return ret;
}
int nvme_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data)
{
struct nvme_ns_head *head = NULL;
struct nvme_ns *ns;
int srcu_idx, ret;
ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
if (unlikely(!ns))
return -EWOULDBLOCK;
if (ns->head->ids.csi == NVME_CSI_ZNS)
ret = nvme_ns_report_zones(ns, sector, nr_zones, cb, data);
else
ret = -EINVAL;
nvme_put_ns_from_disk(head, srcu_idx);
return ret;
}
blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
struct nvme_command *c, enum nvme_zone_mgmt_action action)
{
c->zms.opcode = nvme_cmd_zone_mgmt_send;
c->zms.nsid = cpu_to_le32(ns->head->ns_id);
c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
c->zms.zsa = action;
if (req_op(req) == REQ_OP_ZONE_RESET_ALL)
c->zms.select_all = 1;
return BLK_STS_OK;
}

View File

@@ -427,7 +427,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->awupf = 0; id->awupf = 0;
id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */ id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
if (ctrl->ops->has_keyed_sgls) if (ctrl->ops->flags & NVMF_KEYED_SGLS)
id->sgls |= cpu_to_le32(1 << 2); id->sgls |= cpu_to_le32(1 << 2);
if (req->port->inline_data_size) if (req->port->inline_data_size)
id->sgls |= cpu_to_le32(1 << 20); id->sgls |= cpu_to_le32(1 << 20);

View File

@@ -862,14 +862,14 @@ static ssize_t nvmet_subsys_attr_version_show(struct config_item *item,
struct nvmet_subsys *subsys = to_subsys(item); struct nvmet_subsys *subsys = to_subsys(item);
if (NVME_TERTIARY(subsys->ver)) if (NVME_TERTIARY(subsys->ver))
return snprintf(page, PAGE_SIZE, "%d.%d.%d\n", return snprintf(page, PAGE_SIZE, "%llu.%llu.%llu\n",
(int)NVME_MAJOR(subsys->ver), NVME_MAJOR(subsys->ver),
(int)NVME_MINOR(subsys->ver), NVME_MINOR(subsys->ver),
(int)NVME_TERTIARY(subsys->ver)); NVME_TERTIARY(subsys->ver));
return snprintf(page, PAGE_SIZE, "%d.%d\n", return snprintf(page, PAGE_SIZE, "%llu.%llu\n",
(int)NVME_MAJOR(subsys->ver), NVME_MAJOR(subsys->ver),
(int)NVME_MINOR(subsys->ver)); NVME_MINOR(subsys->ver));
} }
static ssize_t nvmet_subsys_attr_version_store(struct config_item *item, static ssize_t nvmet_subsys_attr_version_store(struct config_item *item,

View File

@@ -336,7 +336,7 @@ int nvmet_enable_port(struct nvmet_port *port)
* If the user requested PI support and the transport isn't pi capable, * If the user requested PI support and the transport isn't pi capable,
* don't enable the port. * don't enable the port.
*/ */
if (port->pi_enable && !ops->metadata_support) { if (port->pi_enable && !(ops->flags & NVMF_METADATA_SUPPORTED)) {
pr_err("T10-PI is not supported by transport type %d\n", pr_err("T10-PI is not supported by transport type %d\n",
port->disc_addr.trtype); port->disc_addr.trtype);
ret = -EINVAL; ret = -EINVAL;

View File

@@ -277,7 +277,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req)
id->maxcmd = cpu_to_le16(NVMET_MAX_CMD); id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */ id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
if (ctrl->ops->has_keyed_sgls) if (ctrl->ops->flags & NVMF_KEYED_SGLS)
id->sgls |= cpu_to_le32(1 << 2); id->sgls |= cpu_to_le32(1 << 2);
if (req->port->inline_data_size) if (req->port->inline_data_size)
id->sgls |= cpu_to_le32(1 << 20); id->sgls |= cpu_to_le32(1 << 20);

View File

@@ -43,6 +43,17 @@ static const match_table_t opt_tokens = {
{ NVMF_OPT_ERR, NULL } { NVMF_OPT_ERR, NULL }
}; };
static int fcloop_verify_addr(substring_t *s)
{
size_t blen = s->to - s->from + 1;
if (strnlen(s->from, blen) != NVME_FC_TRADDR_HEXNAMELEN + 2 ||
strncmp(s->from, "0x", 2))
return -EINVAL;
return 0;
}
static int static int
fcloop_parse_options(struct fcloop_ctrl_options *opts, fcloop_parse_options(struct fcloop_ctrl_options *opts,
const char *buf) const char *buf)
@@ -64,14 +75,16 @@ fcloop_parse_options(struct fcloop_ctrl_options *opts,
opts->mask |= token; opts->mask |= token;
switch (token) { switch (token) {
case NVMF_OPT_WWNN: case NVMF_OPT_WWNN:
if (match_u64(args, &token64)) { if (fcloop_verify_addr(args) ||
match_u64(args, &token64)) {
ret = -EINVAL; ret = -EINVAL;
goto out_free_options; goto out_free_options;
} }
opts->wwnn = token64; opts->wwnn = token64;
break; break;
case NVMF_OPT_WWPN: case NVMF_OPT_WWPN:
if (match_u64(args, &token64)) { if (fcloop_verify_addr(args) ||
match_u64(args, &token64)) {
ret = -EINVAL; ret = -EINVAL;
goto out_free_options; goto out_free_options;
} }
@@ -92,14 +105,16 @@ fcloop_parse_options(struct fcloop_ctrl_options *opts,
opts->fcaddr = token; opts->fcaddr = token;
break; break;
case NVMF_OPT_LPWWNN: case NVMF_OPT_LPWWNN:
if (match_u64(args, &token64)) { if (fcloop_verify_addr(args) ||
match_u64(args, &token64)) {
ret = -EINVAL; ret = -EINVAL;
goto out_free_options; goto out_free_options;
} }
opts->lpwwnn = token64; opts->lpwwnn = token64;
break; break;
case NVMF_OPT_LPWWPN: case NVMF_OPT_LPWWPN:
if (match_u64(args, &token64)) { if (fcloop_verify_addr(args) ||
match_u64(args, &token64)) {
ret = -EINVAL; ret = -EINVAL;
goto out_free_options; goto out_free_options;
} }
@@ -141,14 +156,16 @@ fcloop_parse_nm_options(struct device *dev, u64 *nname, u64 *pname,
token = match_token(p, opt_tokens, args); token = match_token(p, opt_tokens, args);
switch (token) { switch (token) {
case NVMF_OPT_WWNN: case NVMF_OPT_WWNN:
if (match_u64(args, &token64)) { if (fcloop_verify_addr(args) ||
match_u64(args, &token64)) {
ret = -EINVAL; ret = -EINVAL;
goto out_free_options; goto out_free_options;
} }
*nname = token64; *nname = token64;
break; break;
case NVMF_OPT_WWPN: case NVMF_OPT_WWPN:
if (match_u64(args, &token64)) { if (fcloop_verify_addr(args) ||
match_u64(args, &token64)) {
ret = -EINVAL; ret = -EINVAL;
goto out_free_options; goto out_free_options;
} }

View File

@@ -36,7 +36,6 @@ struct nvme_loop_ctrl {
struct nvme_loop_iod async_event_iod; struct nvme_loop_iod async_event_iod;
struct nvme_ctrl ctrl; struct nvme_ctrl ctrl;
struct nvmet_ctrl *target_ctrl;
struct nvmet_port *port; struct nvmet_port *port;
}; };

View File

@@ -286,8 +286,9 @@ struct nvmet_fabrics_ops {
struct module *owner; struct module *owner;
unsigned int type; unsigned int type;
unsigned int msdbd; unsigned int msdbd;
bool has_keyed_sgls : 1; unsigned int flags;
bool metadata_support : 1; #define NVMF_KEYED_SGLS (1 << 0)
#define NVMF_METADATA_SUPPORTED (1 << 1)
void (*queue_response)(struct nvmet_req *req); void (*queue_response)(struct nvmet_req *req);
int (*add_port)(struct nvmet_port *port); int (*add_port)(struct nvmet_port *port);
void (*remove_port)(struct nvmet_port *port); void (*remove_port)(struct nvmet_port *port);

View File

@@ -1970,8 +1970,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.type = NVMF_TRTYPE_RDMA, .type = NVMF_TRTYPE_RDMA,
.msdbd = 1, .msdbd = 1,
.has_keyed_sgls = 1, .flags = NVMF_KEYED_SGLS | NVMF_METADATA_SUPPORTED,
.metadata_support = 1,
.add_port = nvmet_rdma_add_port, .add_port = nvmet_rdma_add_port,
.remove_port = nvmet_rdma_remove_port, .remove_port = nvmet_rdma_remove_port,
.queue_response = nvmet_rdma_queue_response, .queue_response = nvmet_rdma_queue_response,

View File

@@ -459,17 +459,11 @@ static void nvmet_setup_response_pdu(struct nvmet_tcp_cmd *cmd)
static void nvmet_tcp_process_resp_list(struct nvmet_tcp_queue *queue) static void nvmet_tcp_process_resp_list(struct nvmet_tcp_queue *queue)
{ {
struct llist_node *node; struct llist_node *node;
struct nvmet_tcp_cmd *cmd;
node = llist_del_all(&queue->resp_list); for (node = llist_del_all(&queue->resp_list); node; node = node->next) {
if (!node) cmd = llist_entry(node, struct nvmet_tcp_cmd, lentry);
return;
while (node) {
struct nvmet_tcp_cmd *cmd = llist_entry(node,
struct nvmet_tcp_cmd, lentry);
list_add(&cmd->entry, &queue->resp_send_list); list_add(&cmd->entry, &queue->resp_send_list);
node = node->next;
queue->send_list_len++; queue->send_list_len++;
} }
} }
@@ -1717,7 +1711,6 @@ static const struct nvmet_fabrics_ops nvmet_tcp_ops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.type = NVMF_TRTYPE_TCP, .type = NVMF_TRTYPE_TCP,
.msdbd = 1, .msdbd = 1,
.has_keyed_sgls = 0,
.add_port = nvmet_tcp_add_port, .add_port = nvmet_tcp_add_port,
.remove_port = nvmet_tcp_remove_port, .remove_port = nvmet_tcp_remove_port,
.queue_response = nvmet_tcp_queue_response, .queue_response = nvmet_tcp_queue_response,

View File

@@ -319,7 +319,7 @@ dasd_diag_check_device(struct dasd_device *device)
struct dasd_diag_characteristics *rdc_data; struct dasd_diag_characteristics *rdc_data;
struct vtoc_cms_label *label; struct vtoc_cms_label *label;
struct dasd_block *block; struct dasd_block *block;
struct dasd_diag_bio bio; struct dasd_diag_bio *bio;
unsigned int sb, bsize; unsigned int sb, bsize;
blocknum_t end_block; blocknum_t end_block;
int rc; int rc;
@@ -395,29 +395,36 @@ dasd_diag_check_device(struct dasd_device *device)
rc = -ENOMEM; rc = -ENOMEM;
goto out; goto out;
} }
bio = kzalloc(sizeof(*bio), GFP_KERNEL);
if (bio == NULL) {
DBF_DEV_EVENT(DBF_WARNING, device, "%s",
"No memory to allocate initialization bio");
rc = -ENOMEM;
goto out_label;
}
rc = 0; rc = 0;
end_block = 0; end_block = 0;
/* try all sizes - needed for ECKD devices */ /* try all sizes - needed for ECKD devices */
for (bsize = 512; bsize <= PAGE_SIZE; bsize <<= 1) { for (bsize = 512; bsize <= PAGE_SIZE; bsize <<= 1) {
mdsk_init_io(device, bsize, 0, &end_block); mdsk_init_io(device, bsize, 0, &end_block);
memset(&bio, 0, sizeof (struct dasd_diag_bio)); memset(bio, 0, sizeof(*bio));
bio.type = MDSK_READ_REQ; bio->type = MDSK_READ_REQ;
bio.block_number = private->pt_block + 1; bio->block_number = private->pt_block + 1;
bio.buffer = label; bio->buffer = label;
memset(&private->iob, 0, sizeof (struct dasd_diag_rw_io)); memset(&private->iob, 0, sizeof (struct dasd_diag_rw_io));
private->iob.dev_nr = rdc_data->dev_nr; private->iob.dev_nr = rdc_data->dev_nr;
private->iob.key = 0; private->iob.key = 0;
private->iob.flags = 0; /* do synchronous io */ private->iob.flags = 0; /* do synchronous io */
private->iob.block_count = 1; private->iob.block_count = 1;
private->iob.interrupt_params = 0; private->iob.interrupt_params = 0;
private->iob.bio_list = &bio; private->iob.bio_list = bio;
private->iob.flaga = DASD_DIAG_FLAGA_DEFAULT; private->iob.flaga = DASD_DIAG_FLAGA_DEFAULT;
rc = dia250(&private->iob, RW_BIO); rc = dia250(&private->iob, RW_BIO);
if (rc == 3) { if (rc == 3) {
pr_warn("%s: A 64-bit DIAG call failed\n", pr_warn("%s: A 64-bit DIAG call failed\n",
dev_name(&device->cdev->dev)); dev_name(&device->cdev->dev));
rc = -EOPNOTSUPP; rc = -EOPNOTSUPP;
goto out_label; goto out_bio;
} }
mdsk_term_io(device); mdsk_term_io(device);
if (rc == 0) if (rc == 0)
@@ -427,7 +434,7 @@ dasd_diag_check_device(struct dasd_device *device)
pr_warn("%s: Accessing the DASD failed because of an incorrect format (rc=%d)\n", pr_warn("%s: Accessing the DASD failed because of an incorrect format (rc=%d)\n",
dev_name(&device->cdev->dev), rc); dev_name(&device->cdev->dev), rc);
rc = -EIO; rc = -EIO;
goto out_label; goto out_bio;
} }
/* check for label block */ /* check for label block */
if (memcmp(label->label_id, DASD_DIAG_CMS1, if (memcmp(label->label_id, DASD_DIAG_CMS1,
@@ -457,6 +464,8 @@ dasd_diag_check_device(struct dasd_device *device)
(rc == 4) ? ", read-only device" : ""); (rc == 4) ? ", read-only device" : "");
rc = 0; rc = 0;
} }
out_bio:
kfree(bio);
out_label: out_label:
free_page((long) label); free_page((long) label);
out: out:
@@ -506,7 +515,7 @@ static struct dasd_ccw_req *dasd_diag_build_cp(struct dasd_device *memdev,
struct req_iterator iter; struct req_iterator iter;
struct bio_vec bv; struct bio_vec bv;
char *dst; char *dst;
unsigned int count, datasize; unsigned int count;
sector_t recid, first_rec, last_rec; sector_t recid, first_rec, last_rec;
unsigned int blksize, off; unsigned int blksize, off;
unsigned char rw_cmd; unsigned char rw_cmd;
@@ -534,10 +543,8 @@ static struct dasd_ccw_req *dasd_diag_build_cp(struct dasd_device *memdev,
if (count != last_rec - first_rec + 1) if (count != last_rec - first_rec + 1)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
/* Build the request */ /* Build the request */
datasize = sizeof(struct dasd_diag_req) + cqr = dasd_smalloc_request(DASD_DIAG_MAGIC, 0, struct_size(dreq, bio, count),
count*sizeof(struct dasd_diag_bio); memdev, blk_mq_rq_to_pdu(req));
cqr = dasd_smalloc_request(DASD_DIAG_MAGIC, 0, datasize, memdev,
blk_mq_rq_to_pdu(req));
if (IS_ERR(cqr)) if (IS_ERR(cqr))
return cqr; return cqr;

View File

@@ -59,6 +59,7 @@ static int sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf,
zone.non_seq = 1; zone.non_seq = 1;
zone.len = logical_to_sectors(sdp, get_unaligned_be64(&buf[8])); zone.len = logical_to_sectors(sdp, get_unaligned_be64(&buf[8]));
zone.capacity = zone.len;
zone.start = logical_to_sectors(sdp, get_unaligned_be64(&buf[16])); zone.start = logical_to_sectors(sdp, get_unaligned_be64(&buf[16]));
zone.wp = logical_to_sectors(sdp, get_unaligned_be64(&buf[24])); zone.wp = logical_to_sectors(sdp, get_unaligned_be64(&buf[24]));
if (zone.type != ZBC_ZONE_TYPE_CONV && if (zone.type != ZBC_ZONE_TYPE_CONV &&
@@ -716,6 +717,11 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
/* The drive satisfies the kernel restrictions: set it up */ /* The drive satisfies the kernel restrictions: set it up */
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
if (sdkp->zones_max_open == U32_MAX)
blk_queue_max_open_zones(q, 0);
else
blk_queue_max_open_zones(q, sdkp->zones_max_open);
blk_queue_max_active_zones(q, 0);
nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks); nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
/* READ16/WRITE16 is mandatory for ZBC disks */ /* READ16/WRITE16 is mandatory for ZBC disks */

View File

@@ -513,6 +513,8 @@ struct request_queue {
unsigned int nr_zones; unsigned int nr_zones;
unsigned long *conv_zones_bitmap; unsigned long *conv_zones_bitmap;
unsigned long *seq_zones_wlock; unsigned long *seq_zones_wlock;
unsigned int max_open_zones;
unsigned int max_active_zones;
#endif /* CONFIG_BLK_DEV_ZONED */ #endif /* CONFIG_BLK_DEV_ZONED */
/* /*
@@ -722,6 +724,28 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q,
return true; return true;
return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap);
} }
static inline void blk_queue_max_open_zones(struct request_queue *q,
unsigned int max_open_zones)
{
q->max_open_zones = max_open_zones;
}
static inline unsigned int queue_max_open_zones(const struct request_queue *q)
{
return q->max_open_zones;
}
static inline void blk_queue_max_active_zones(struct request_queue *q,
unsigned int max_active_zones)
{
q->max_active_zones = max_active_zones;
}
static inline unsigned int queue_max_active_zones(const struct request_queue *q)
{
return q->max_active_zones;
}
#else /* CONFIG_BLK_DEV_ZONED */ #else /* CONFIG_BLK_DEV_ZONED */
static inline unsigned int blk_queue_nr_zones(struct request_queue *q) static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
{ {
@@ -737,6 +761,14 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q,
{ {
return 0; return 0;
} }
static inline unsigned int queue_max_open_zones(const struct request_queue *q)
{
return 0;
}
static inline unsigned int queue_max_active_zones(const struct request_queue *q)
{
return 0;
}
#endif /* CONFIG_BLK_DEV_ZONED */ #endif /* CONFIG_BLK_DEV_ZONED */
static inline bool rq_is_sync(struct request *rq) static inline bool rq_is_sync(struct request *rq)
@@ -1519,6 +1551,24 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev)
return 0; return 0;
} }
static inline unsigned int bdev_max_open_zones(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
if (q)
return queue_max_open_zones(q);
return 0;
}
static inline unsigned int bdev_max_active_zones(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
if (q)
return queue_max_active_zones(q);
return 0;
}
static inline int queue_dma_alignment(const struct request_queue *q) static inline int queue_dma_alignment(const struct request_queue *q)
{ {
return q ? q->dma_alignment : 511; return q ? q->dma_alignment : 511;

View File

@@ -132,6 +132,7 @@ enum {
#define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff) #define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff)
#define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf) #define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf)
#define NVME_CAP_NSSRC(cap) (((cap) >> 36) & 0x1) #define NVME_CAP_NSSRC(cap) (((cap) >> 36) & 0x1)
#define NVME_CAP_CSS(cap) (((cap) >> 37) & 0xff)
#define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf) #define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf)
#define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf) #define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf)
@@ -162,7 +163,6 @@ enum {
enum { enum {
NVME_CC_ENABLE = 1 << 0, NVME_CC_ENABLE = 1 << 0,
NVME_CC_CSS_NVM = 0 << 4,
NVME_CC_EN_SHIFT = 0, NVME_CC_EN_SHIFT = 0,
NVME_CC_CSS_SHIFT = 4, NVME_CC_CSS_SHIFT = 4,
NVME_CC_MPS_SHIFT = 7, NVME_CC_MPS_SHIFT = 7,
@@ -170,6 +170,9 @@ enum {
NVME_CC_SHN_SHIFT = 14, NVME_CC_SHN_SHIFT = 14,
NVME_CC_IOSQES_SHIFT = 16, NVME_CC_IOSQES_SHIFT = 16,
NVME_CC_IOCQES_SHIFT = 20, NVME_CC_IOCQES_SHIFT = 20,
NVME_CC_CSS_NVM = 0 << NVME_CC_CSS_SHIFT,
NVME_CC_CSS_CSI = 6 << NVME_CC_CSS_SHIFT,
NVME_CC_CSS_MASK = 7 << NVME_CC_CSS_SHIFT,
NVME_CC_AMS_RR = 0 << NVME_CC_AMS_SHIFT, NVME_CC_AMS_RR = 0 << NVME_CC_AMS_SHIFT,
NVME_CC_AMS_WRRU = 1 << NVME_CC_AMS_SHIFT, NVME_CC_AMS_WRRU = 1 << NVME_CC_AMS_SHIFT,
NVME_CC_AMS_VS = 7 << NVME_CC_AMS_SHIFT, NVME_CC_AMS_VS = 7 << NVME_CC_AMS_SHIFT,
@@ -179,6 +182,8 @@ enum {
NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT, NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT,
NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT, NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT,
NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT, NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT,
NVME_CAP_CSS_NVM = 1 << 0,
NVME_CAP_CSS_CSI = 1 << 6,
NVME_CSTS_RDY = 1 << 0, NVME_CSTS_RDY = 1 << 0,
NVME_CSTS_CFS = 1 << 1, NVME_CSTS_CFS = 1 << 1,
NVME_CSTS_NSSRO = 1 << 4, NVME_CSTS_NSSRO = 1 << 4,
@@ -369,11 +374,37 @@ struct nvme_id_ns {
__u8 vs[3712]; __u8 vs[3712];
}; };
struct nvme_zns_lbafe {
__le64 zsze;
__u8 zdes;
__u8 rsvd9[7];
};
struct nvme_id_ns_zns {
__le16 zoc;
__le16 ozcs;
__le32 mar;
__le32 mor;
__le32 rrl;
__le32 frl;
__u8 rsvd20[2796];
struct nvme_zns_lbafe lbafe[16];
__u8 rsvd3072[768];
__u8 vs[256];
};
struct nvme_id_ctrl_zns {
__u8 zasl;
__u8 rsvd1[4095];
};
enum { enum {
NVME_ID_CNS_NS = 0x00, NVME_ID_CNS_NS = 0x00,
NVME_ID_CNS_CTRL = 0x01, NVME_ID_CNS_CTRL = 0x01,
NVME_ID_CNS_NS_ACTIVE_LIST = 0x02, NVME_ID_CNS_NS_ACTIVE_LIST = 0x02,
NVME_ID_CNS_NS_DESC_LIST = 0x03, NVME_ID_CNS_NS_DESC_LIST = 0x03,
NVME_ID_CNS_CS_NS = 0x05,
NVME_ID_CNS_CS_CTRL = 0x06,
NVME_ID_CNS_NS_PRESENT_LIST = 0x10, NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
NVME_ID_CNS_NS_PRESENT = 0x11, NVME_ID_CNS_NS_PRESENT = 0x11,
NVME_ID_CNS_CTRL_NS_LIST = 0x12, NVME_ID_CNS_CTRL_NS_LIST = 0x12,
@@ -383,6 +414,11 @@ enum {
NVME_ID_CNS_UUID_LIST = 0x17, NVME_ID_CNS_UUID_LIST = 0x17,
}; };
enum {
NVME_CSI_NVM = 0,
NVME_CSI_ZNS = 2,
};
enum { enum {
NVME_DIR_IDENTIFY = 0x00, NVME_DIR_IDENTIFY = 0x00,
NVME_DIR_STREAMS = 0x01, NVME_DIR_STREAMS = 0x01,
@@ -435,11 +471,13 @@ struct nvme_ns_id_desc {
#define NVME_NIDT_EUI64_LEN 8 #define NVME_NIDT_EUI64_LEN 8
#define NVME_NIDT_NGUID_LEN 16 #define NVME_NIDT_NGUID_LEN 16
#define NVME_NIDT_UUID_LEN 16 #define NVME_NIDT_UUID_LEN 16
#define NVME_NIDT_CSI_LEN 1
enum { enum {
NVME_NIDT_EUI64 = 0x01, NVME_NIDT_EUI64 = 0x01,
NVME_NIDT_NGUID = 0x02, NVME_NIDT_NGUID = 0x02,
NVME_NIDT_UUID = 0x03, NVME_NIDT_UUID = 0x03,
NVME_NIDT_CSI = 0x04,
}; };
struct nvme_smart_log { struct nvme_smart_log {
@@ -519,6 +557,27 @@ struct nvme_ana_rsp_hdr {
__le16 rsvd10[3]; __le16 rsvd10[3];
}; };
struct nvme_zone_descriptor {
__u8 zt;
__u8 zs;
__u8 za;
__u8 rsvd3[5];
__le64 zcap;
__le64 zslba;
__le64 wp;
__u8 rsvd32[32];
};
enum {
NVME_ZONE_TYPE_SEQWRITE_REQ = 0x2,
};
struct nvme_zone_report {
__le64 nr_zones;
__u8 resv8[56];
struct nvme_zone_descriptor entries[];
};
enum { enum {
NVME_SMART_CRIT_SPARE = 1 << 0, NVME_SMART_CRIT_SPARE = 1 << 0,
NVME_SMART_CRIT_TEMPERATURE = 1 << 1, NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
@@ -613,6 +672,9 @@ enum nvme_opcode {
nvme_cmd_resv_report = 0x0e, nvme_cmd_resv_report = 0x0e,
nvme_cmd_resv_acquire = 0x11, nvme_cmd_resv_acquire = 0x11,
nvme_cmd_resv_release = 0x15, nvme_cmd_resv_release = 0x15,
nvme_cmd_zone_mgmt_send = 0x79,
nvme_cmd_zone_mgmt_recv = 0x7a,
nvme_cmd_zone_append = 0x7d,
}; };
#define nvme_opcode_name(opcode) { opcode, #opcode } #define nvme_opcode_name(opcode) { opcode, #opcode }
@@ -751,6 +813,7 @@ struct nvme_rw_command {
enum { enum {
NVME_RW_LR = 1 << 15, NVME_RW_LR = 1 << 15,
NVME_RW_FUA = 1 << 14, NVME_RW_FUA = 1 << 14,
NVME_RW_APPEND_PIREMAP = 1 << 9,
NVME_RW_DSM_FREQ_UNSPEC = 0, NVME_RW_DSM_FREQ_UNSPEC = 0,
NVME_RW_DSM_FREQ_TYPICAL = 1, NVME_RW_DSM_FREQ_TYPICAL = 1,
NVME_RW_DSM_FREQ_RARE = 2, NVME_RW_DSM_FREQ_RARE = 2,
@@ -816,6 +879,53 @@ struct nvme_write_zeroes_cmd {
__le16 appmask; __le16 appmask;
}; };
enum nvme_zone_mgmt_action {
NVME_ZONE_CLOSE = 0x1,
NVME_ZONE_FINISH = 0x2,
NVME_ZONE_OPEN = 0x3,
NVME_ZONE_RESET = 0x4,
NVME_ZONE_OFFLINE = 0x5,
NVME_ZONE_SET_DESC_EXT = 0x10,
};
struct nvme_zone_mgmt_send_cmd {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__le32 cdw2[2];
__le64 metadata;
union nvme_data_ptr dptr;
__le64 slba;
__le32 cdw12;
__u8 zsa;
__u8 select_all;
__u8 rsvd13[2];
__le32 cdw14[2];
};
struct nvme_zone_mgmt_recv_cmd {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__le64 rsvd2[2];
union nvme_data_ptr dptr;
__le64 slba;
__le32 numd;
__u8 zra;
__u8 zrasf;
__u8 pr;
__u8 rsvd13;
__le32 cdw14[2];
};
enum {
NVME_ZRA_ZONE_REPORT = 0,
NVME_ZRASF_ZONE_REPORT_ALL = 0,
NVME_REPORT_ZONE_PARTIAL = 1,
};
/* Features */ /* Features */
enum { enum {
@@ -972,7 +1082,9 @@ struct nvme_identify {
__u8 cns; __u8 cns;
__u8 rsvd3; __u8 rsvd3;
__le16 ctrlid; __le16 ctrlid;
__u32 rsvd11[5]; __u8 rsvd11[3];
__u8 csi;
__u32 rsvd12[4];
}; };
#define NVME_IDENTIFY_DATA_SIZE 4096 #define NVME_IDENTIFY_DATA_SIZE 4096
@@ -1086,7 +1198,9 @@ struct nvme_get_log_page_command {
}; };
__le64 lpo; __le64 lpo;
}; };
__u32 rsvd14[2]; __u8 rsvd14[3];
__u8 csi;
__u32 rsvd15;
}; };
struct nvme_directive_cmd { struct nvme_directive_cmd {
@@ -1283,6 +1397,8 @@ struct nvme_command {
struct nvme_format_cmd format; struct nvme_format_cmd format;
struct nvme_dsm_cmd dsm; struct nvme_dsm_cmd dsm;
struct nvme_write_zeroes_cmd write_zeroes; struct nvme_write_zeroes_cmd write_zeroes;
struct nvme_zone_mgmt_send_cmd zms;
struct nvme_zone_mgmt_recv_cmd zmr;
struct nvme_abort_cmd abort; struct nvme_abort_cmd abort;
struct nvme_get_log_page_command get_log_page; struct nvme_get_log_page_command get_log_page;
struct nvmf_common_command fabrics; struct nvmf_common_command fabrics;
@@ -1416,6 +1532,18 @@ enum {
NVME_SC_DISCOVERY_RESTART = 0x190, NVME_SC_DISCOVERY_RESTART = 0x190,
NVME_SC_AUTH_REQUIRED = 0x191, NVME_SC_AUTH_REQUIRED = 0x191,
/*
* I/O Command Set Specific - Zoned commands:
*/
NVME_SC_ZONE_BOUNDARY_ERROR = 0x1b8,
NVME_SC_ZONE_FULL = 0x1b9,
NVME_SC_ZONE_READ_ONLY = 0x1ba,
NVME_SC_ZONE_OFFLINE = 0x1bb,
NVME_SC_ZONE_INVALID_WRITE = 0x1bc,
NVME_SC_ZONE_TOO_MANY_ACTIVE = 0x1bd,
NVME_SC_ZONE_TOO_MANY_OPEN = 0x1be,
NVME_SC_ZONE_INVALID_TRANSITION = 0x1bf,
/* /*
* Media and Data Integrity Errors: * Media and Data Integrity Errors:
*/ */

View File

@@ -73,6 +73,15 @@ enum blk_zone_cond {
BLK_ZONE_COND_OFFLINE = 0xF, BLK_ZONE_COND_OFFLINE = 0xF,
}; };
/**
* enum blk_zone_report_flags - Feature flags of reported zone descriptors.
*
* @BLK_ZONE_REP_CAPACITY: Zone descriptor has capacity field.
*/
enum blk_zone_report_flags {
BLK_ZONE_REP_CAPACITY = (1 << 0),
};
/** /**
* struct blk_zone - Zone descriptor for BLKREPORTZONE ioctl. * struct blk_zone - Zone descriptor for BLKREPORTZONE ioctl.
* *
@@ -99,7 +108,9 @@ struct blk_zone {
__u8 cond; /* Zone condition */ __u8 cond; /* Zone condition */
__u8 non_seq; /* Non-sequential write resources active */ __u8 non_seq; /* Non-sequential write resources active */
__u8 reset; /* Reset write pointer recommended */ __u8 reset; /* Reset write pointer recommended */
__u8 reserved[36]; __u8 resv[4];
__u64 capacity; /* Zone capacity in number of sectors */
__u8 reserved[24];
}; };
/** /**
@@ -115,7 +126,7 @@ struct blk_zone {
struct blk_zone_report { struct blk_zone_report {
__u64 sector; __u64 sector;
__u32 nr_zones; __u32 nr_zones;
__u8 reserved[4]; __u32 flags;
struct blk_zone zones[0]; struct blk_zone zones[0];
}; };