Merge branch 'for-4.16/block' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "This is the main pull request for block IO related changes for the 4.16 kernel. Nothing major in this pull request, but a good amount of improvements and fixes all over the map. This contains: - BFQ improvements, fixes, and cleanups from Angelo, Chiara, and Paolo. - Support for SMR zones for deadline and mq-deadline from Damien and Christoph. - Set of fixes for bcache by way of Michael Lyle, including fixes from himself, Kent, Rui, Tang, and Coly. - Series from Matias for lightnvm with fixes from Hans Holmberg, Javier, and Matias. Mostly centered around pblk, and the removing rrpc 1.2 in preparation for supporting 2.0. - A couple of NVMe pull requests from Christoph. Nothing major in here, just fixes and cleanups, and support for command tracing from Johannes. - Support for blk-throttle for tracking reads and writes separately. From Joseph Qi. A few cleanups/fixes also for blk-throttle from Weiping. - Series from Mike Snitzer that enables dm to register its queue more logically, something that's alwways been problematic on dm since it's a stacked device. - Series from Ming cleaning up some of the bio accessor use, in preparation for supporting multipage bvecs. - Various fixes from Ming closing up holes around queue mapping and quiescing. - BSD partition fix from Richard Narron, fixing a problem where we can't mount newer (10/11) FreeBSD partitions. - Series from Tejun reworking blk-mq timeout handling. The previous scheme relied on atomic bits, but it had races where we would think a request had timed out if it to reused at the wrong time. - null_blk now supports faking timeouts, to enable us to better exercise and test that functionality separately. From me. - Kill the separate atomic poll bit in the request struct. After this, we don't use the atomic bits on blk-mq anymore at all. From me. - sgl_alloc/free helpers from Bart. - Heavily contended tag case scalability improvement from me. - Various little fixes and cleanups from Arnd, Bart, Corentin, Douglas, Eryu, Goldwyn, and myself" * 'for-4.16/block' of git://git.kernel.dk/linux-block: (186 commits) block: remove smart1,2.h nvme: add tracepoint for nvme_complete_rq nvme: add tracepoint for nvme_setup_cmd nvme-pci: introduce RECONNECTING state to mark initializing procedure nvme-rdma: remove redundant boolean for inline_data nvme: don't free uuid pointer before printing it nvme-pci: Suspend queues after deleting them bsg: use pr_debug instead of hand crafted macros blk-mq-debugfs: don't allow write on attributes with seq_operations set nvme-pci: Fix queue double allocations block: Set BIO_TRACE_COMPLETION on new bio during split blk-throttle: use queue_is_rq_based block: Remove kblockd_schedule_delayed_work{,_on}() blk-mq: Avoid that blk_mq_delay_run_hw_queue() introduces unintended delays blk-mq: Rename blk_mq_request_direct_issue() into blk_mq_request_issue_directly() lib/scatterlist: Fix chaining support in sgl_alloc_order() blk-throttle: track read and write request individually block: add bdev_read_only() checks to common helpers block: fail op_is_write() requests to read-only partitions blk-throttle: export io_serviced_recursive, io_service_bytes_recursive ...
This commit is contained in:
@@ -27,6 +27,8 @@
|
||||
#include <linux/percpu-refcount.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/blkzoned.h>
|
||||
#include <linux/seqlock.h>
|
||||
#include <linux/u64_stats_sync.h>
|
||||
|
||||
struct module;
|
||||
struct scsi_ioctl_command;
|
||||
@@ -121,6 +123,12 @@ typedef __u32 __bitwise req_flags_t;
|
||||
/* Look at ->special_vec for the actual data payload instead of the
|
||||
bio chain. */
|
||||
#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18))
|
||||
/* The per-zone write lock is held for this request */
|
||||
#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
|
||||
/* timeout is expired */
|
||||
#define RQF_MQ_TIMEOUT_EXPIRED ((__force req_flags_t)(1 << 20))
|
||||
/* already slept for hybrid poll */
|
||||
#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 21))
|
||||
|
||||
/* flags that prevent us from merging requests: */
|
||||
#define RQF_NOMERGE_FLAGS \
|
||||
@@ -133,12 +141,6 @@ typedef __u32 __bitwise req_flags_t;
|
||||
* especially blk_mq_rq_ctx_init() to take care of the added fields.
|
||||
*/
|
||||
struct request {
|
||||
struct list_head queuelist;
|
||||
union {
|
||||
struct __call_single_data csd;
|
||||
u64 fifo_time;
|
||||
};
|
||||
|
||||
struct request_queue *q;
|
||||
struct blk_mq_ctx *mq_ctx;
|
||||
|
||||
@@ -148,8 +150,6 @@ struct request {
|
||||
|
||||
int internal_tag;
|
||||
|
||||
unsigned long atomic_flags;
|
||||
|
||||
/* the following two fields are internal, NEVER access directly */
|
||||
unsigned int __data_len; /* total data len */
|
||||
int tag;
|
||||
@@ -158,6 +158,8 @@ struct request {
|
||||
struct bio *bio;
|
||||
struct bio *biotail;
|
||||
|
||||
struct list_head queuelist;
|
||||
|
||||
/*
|
||||
* The hash is used inside the scheduler, and killed once the
|
||||
* request reaches the dispatch list. The ipi_list is only used
|
||||
@@ -205,19 +207,16 @@ struct request {
|
||||
struct hd_struct *part;
|
||||
unsigned long start_time;
|
||||
struct blk_issue_stat issue_stat;
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
struct request_list *rl; /* rl this rq is alloced from */
|
||||
unsigned long long start_time_ns;
|
||||
unsigned long long io_start_time_ns; /* when passed to hardware */
|
||||
#endif
|
||||
/* Number of scatter-gather DMA addr+len pairs after
|
||||
* physical address coalescing is performed.
|
||||
*/
|
||||
unsigned short nr_phys_segments;
|
||||
|
||||
#if defined(CONFIG_BLK_DEV_INTEGRITY)
|
||||
unsigned short nr_integrity_segments;
|
||||
#endif
|
||||
|
||||
unsigned short write_hint;
|
||||
unsigned short ioprio;
|
||||
|
||||
unsigned int timeout;
|
||||
@@ -226,11 +225,37 @@ struct request {
|
||||
|
||||
unsigned int extra_len; /* length of alignment and padding */
|
||||
|
||||
unsigned short write_hint;
|
||||
/*
|
||||
* On blk-mq, the lower bits of ->gstate (generation number and
|
||||
* state) carry the MQ_RQ_* state value and the upper bits the
|
||||
* generation number which is monotonically incremented and used to
|
||||
* distinguish the reuse instances.
|
||||
*
|
||||
* ->gstate_seq allows updates to ->gstate and other fields
|
||||
* (currently ->deadline) during request start to be read
|
||||
* atomically from the timeout path, so that it can operate on a
|
||||
* coherent set of information.
|
||||
*/
|
||||
seqcount_t gstate_seq;
|
||||
u64 gstate;
|
||||
|
||||
/*
|
||||
* ->aborted_gstate is used by the timeout to claim a specific
|
||||
* recycle instance of this request. See blk_mq_timeout_work().
|
||||
*/
|
||||
struct u64_stats_sync aborted_gstate_sync;
|
||||
u64 aborted_gstate;
|
||||
|
||||
/* access through blk_rq_set_deadline, blk_rq_deadline */
|
||||
unsigned long __deadline;
|
||||
|
||||
unsigned long deadline;
|
||||
struct list_head timeout_list;
|
||||
|
||||
union {
|
||||
struct __call_single_data csd;
|
||||
u64 fifo_time;
|
||||
};
|
||||
|
||||
/*
|
||||
* completion callback.
|
||||
*/
|
||||
@@ -239,6 +264,12 @@ struct request {
|
||||
|
||||
/* for bidi */
|
||||
struct request *next_rq;
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
struct request_list *rl; /* rl this rq is alloced from */
|
||||
unsigned long long start_time_ns;
|
||||
unsigned long long io_start_time_ns; /* when passed to hardware */
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline bool blk_op_is_scsi(unsigned int op)
|
||||
@@ -563,6 +594,22 @@ struct request_queue {
|
||||
|
||||
struct queue_limits limits;
|
||||
|
||||
/*
|
||||
* Zoned block device information for request dispatch control.
|
||||
* nr_zones is the total number of zones of the device. This is always
|
||||
* 0 for regular block devices. seq_zones_bitmap is a bitmap of nr_zones
|
||||
* bits which indicates if a zone is conventional (bit clear) or
|
||||
* sequential (bit set). seq_zones_wlock is a bitmap of nr_zones
|
||||
* bits which indicates if a zone is write locked, that is, if a write
|
||||
* request targeting the zone was dispatched. All three fields are
|
||||
* initialized by the low level device driver (e.g. scsi/sd.c).
|
||||
* Stacking drivers (device mappers) may or may not initialize
|
||||
* these fields.
|
||||
*/
|
||||
unsigned int nr_zones;
|
||||
unsigned long *seq_zones_bitmap;
|
||||
unsigned long *seq_zones_wlock;
|
||||
|
||||
/*
|
||||
* sg stuff
|
||||
*/
|
||||
@@ -807,6 +854,27 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
|
||||
return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
|
||||
}
|
||||
|
||||
static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
|
||||
{
|
||||
return q->nr_zones;
|
||||
}
|
||||
|
||||
static inline unsigned int blk_queue_zone_no(struct request_queue *q,
|
||||
sector_t sector)
|
||||
{
|
||||
if (!blk_queue_is_zoned(q))
|
||||
return 0;
|
||||
return sector >> ilog2(q->limits.chunk_sectors);
|
||||
}
|
||||
|
||||
static inline bool blk_queue_zone_is_seq(struct request_queue *q,
|
||||
sector_t sector)
|
||||
{
|
||||
if (!blk_queue_is_zoned(q) || !q->seq_zones_bitmap)
|
||||
return false;
|
||||
return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap);
|
||||
}
|
||||
|
||||
static inline bool rq_is_sync(struct request *rq)
|
||||
{
|
||||
return op_is_sync(rq->cmd_flags);
|
||||
@@ -1046,6 +1114,16 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
|
||||
return blk_rq_cur_bytes(rq) >> 9;
|
||||
}
|
||||
|
||||
static inline unsigned int blk_rq_zone_no(struct request *rq)
|
||||
{
|
||||
return blk_queue_zone_no(rq->q, blk_rq_pos(rq));
|
||||
}
|
||||
|
||||
static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
|
||||
{
|
||||
return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq));
|
||||
}
|
||||
|
||||
/*
|
||||
* Some commands like WRITE SAME have a payload or data transfer size which
|
||||
* is different from the size of the request. Any driver that supports such
|
||||
@@ -1595,7 +1673,15 @@ static inline unsigned int bdev_zone_sectors(struct block_device *bdev)
|
||||
|
||||
if (q)
|
||||
return blk_queue_zone_sectors(q);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline unsigned int bdev_nr_zones(struct block_device *bdev)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
|
||||
if (q)
|
||||
return blk_queue_nr_zones(q);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1731,8 +1817,6 @@ static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
|
||||
|
||||
int kblockd_schedule_work(struct work_struct *work);
|
||||
int kblockd_schedule_work_on(int cpu, struct work_struct *work);
|
||||
int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay);
|
||||
int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
|
||||
int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
@@ -1971,6 +2055,60 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
|
||||
extern int bdev_read_page(struct block_device *, sector_t, struct page *);
|
||||
extern int bdev_write_page(struct block_device *, sector_t, struct page *,
|
||||
struct writeback_control *);
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
bool blk_req_needs_zone_write_lock(struct request *rq);
|
||||
void __blk_req_zone_write_lock(struct request *rq);
|
||||
void __blk_req_zone_write_unlock(struct request *rq);
|
||||
|
||||
static inline void blk_req_zone_write_lock(struct request *rq)
|
||||
{
|
||||
if (blk_req_needs_zone_write_lock(rq))
|
||||
__blk_req_zone_write_lock(rq);
|
||||
}
|
||||
|
||||
static inline void blk_req_zone_write_unlock(struct request *rq)
|
||||
{
|
||||
if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED)
|
||||
__blk_req_zone_write_unlock(rq);
|
||||
}
|
||||
|
||||
static inline bool blk_req_zone_is_write_locked(struct request *rq)
|
||||
{
|
||||
return rq->q->seq_zones_wlock &&
|
||||
test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock);
|
||||
}
|
||||
|
||||
static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
|
||||
{
|
||||
if (!blk_req_needs_zone_write_lock(rq))
|
||||
return true;
|
||||
return !blk_req_zone_is_write_locked(rq);
|
||||
}
|
||||
#else
|
||||
static inline bool blk_req_needs_zone_write_lock(struct request *rq)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void blk_req_zone_write_lock(struct request *rq)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void blk_req_zone_write_unlock(struct request *rq)
|
||||
{
|
||||
}
|
||||
static inline bool blk_req_zone_is_write_locked(struct request *rq)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#endif /* CONFIG_BLK_DEV_ZONED */
|
||||
|
||||
#else /* CONFIG_BLOCK */
|
||||
|
||||
struct block_device;
|
||||
|
Reference in New Issue
Block a user