Merge branch 'for-4.12/block' of git://git.kernel.dk/linux-block

Pull block layer updates from Jens Axboe:

 - Add BFQ IO scheduler under the new blk-mq scheduling framework. BFQ
   was initially a fork of CFQ, but subsequently changed to implement
   fairness based on B-WF2Q+, a modified variant of WF2Q. BFQ is meant
   to be used on desktop type single drives, providing good fairness.
   From Paolo.

 - Add Kyber IO scheduler. This is a full multiqueue aware scheduler,
   using a scalable token based algorithm that throttles IO based on
   live completion IO stats, similary to blk-wbt. From Omar.

 - A series from Jan, moving users to separately allocated backing
   devices. This continues the work of separating backing device life
   times, solving various problems with hot removal.

 - A series of updates for lightnvm, mostly from Javier. Includes a
   'pblk' target that exposes an open channel SSD as a physical block
   device.

 - A series of fixes and improvements for nbd from Josef.

 - A series from Omar, removing queue sharing between devices on mostly
   legacy drivers. This helps us clean up other bits, if we know that a
   queue only has a single device backing. This has been overdue for
   more than a decade.

 - Fixes for the blk-stats, and improvements to unify the stats and user
   windows. This both improves blk-wbt, and enables other users to
   register a need to receive IO stats for a device. From Omar.

 - blk-throttle improvements from Shaohua. This provides a scalable
   framework for implementing scalable priotization - particularly for
   blk-mq, but applicable to any type of block device. The interface is
   marked experimental for now.

 - Bucketized IO stats for IO polling from Stephen Bates. This improves
   efficiency of polled workloads in the presence of mixed block size
   IO.

 - A few fixes for opal, from Scott.

 - A few pulls for NVMe, including a lot of fixes for NVMe-over-fabrics.
   From a variety of folks, mostly Sagi and James Smart.

 - A series from Bart, improving our exposed info and capabilities from
   the blk-mq debugfs support.

 - A series from Christoph, cleaning up how handle WRITE_ZEROES.

 - A series from Christoph, cleaning up the block layer handling of how
   we track errors in a request. On top of being a nice cleanup, it also
   shrinks the size of struct request a bit.

 - Removal of mg_disk and hd (sorry Linus) by Christoph. The former was
   never used by platforms, and the latter has outlived it's usefulness.

 - Various little bug fixes and cleanups from a wide variety of folks.

* 'for-4.12/block' of git://git.kernel.dk/linux-block: (329 commits)
  block: hide badblocks attribute by default
  blk-mq: unify hctx delay_work and run_work
  block: add kblock_mod_delayed_work_on()
  blk-mq: unify hctx delayed_run_work and run_work
  nbd: fix use after free on module unload
  MAINTAINERS: bfq: Add Paolo as maintainer for the BFQ I/O scheduler
  blk-mq-sched: alloate reserved tags out of normal pool
  mtip32xx: use runtime tag to initialize command header
  scsi: Implement blk_mq_ops.show_rq()
  blk-mq: Add blk_mq_ops.show_rq()
  blk-mq: Show operation, cmd_flags and rq_flags names
  blk-mq: Make blk_flags_show() callers append a newline character
  blk-mq: Move the "state" debugfs attribute one level down
  blk-mq: Unregister debugfs attributes earlier
  blk-mq: Only unregister hctxs for which registration succeeded
  blk-mq-debugfs: Rename functions for registering and unregistering the mq directory
  blk-mq: Let blk_mq_debugfs_register() look up the queue name
  blk-mq: Register <dev>/queue/mq after having registered <dev>/queue
  ide-pm: always pass 0 error to ide_complete_rq in ide_do_devset
  ide-pm: always pass 0 error to __blk_end_request_all
  ..
This commit is contained in:
Linus Torvalds
2017-05-01 10:39:57 -07:00
255 changed files with 24703 additions and 6212 deletions

View File

@@ -40,15 +40,20 @@ struct blkcg_gq;
struct blk_flush_queue;
struct pr_ops;
struct rq_wb;
struct blk_queue_stats;
struct blk_stat_callback;
#define BLKDEV_MIN_RQ 4
#define BLKDEV_MAX_RQ 128 /* Default maximum */
/* Must be consisitent with blk_mq_poll_stats_bkt() */
#define BLK_MQ_POLL_STATS_BKTS 16
/*
* Maximum number of blkcg policies allowed to be registered concurrently.
* Defined here to simplify include dependency.
*/
#define BLKCG_MAX_POLS 2
#define BLKCG_MAX_POLS 3
typedef void (rq_end_io_fn)(struct request *, int);
@@ -173,6 +178,7 @@ struct request {
struct rb_node rb_node; /* sort/lookup */
struct bio_vec special_vec;
void *completion_data;
int error_count; /* for legacy drivers, don't use */
};
/*
@@ -213,16 +219,14 @@ struct request {
unsigned short ioprio;
void *special; /* opaque pointer available for LLD use */
unsigned int timeout;
int errors;
void *special; /* opaque pointer available for LLD use */
unsigned int extra_len; /* length of alignment and padding */
unsigned long deadline;
struct list_head timeout_list;
unsigned int timeout;
int retries;
/*
* completion callback.
@@ -337,7 +341,6 @@ struct queue_limits {
unsigned char misaligned;
unsigned char discard_misaligned;
unsigned char cluster;
unsigned char discard_zeroes_data;
unsigned char raid_partial_stripes_expensive;
enum blk_zoned_model zoned;
};
@@ -388,6 +391,7 @@ struct request_queue {
int nr_rqs[2]; /* # allocated [a]sync rqs */
int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
struct blk_queue_stats *stats;
struct rq_wb *rq_wb;
/*
@@ -505,8 +509,6 @@ struct request_queue {
unsigned int nr_sorted;
unsigned int in_flight[2];
struct blk_rq_stat rq_stats[2];
/*
* Number of active block driver functions for which blk_drain_queue()
* must wait. Must be incremented around functions that unlock the
@@ -516,6 +518,10 @@ struct request_queue {
unsigned int rq_timeout;
int poll_nsec;
struct blk_stat_callback *poll_cb;
struct blk_rq_stat poll_stat[BLK_MQ_POLL_STATS_BKTS];
struct timer_list timeout;
struct work_struct timeout_work;
struct list_head timeout_list;
@@ -610,6 +616,8 @@ struct request_queue {
#define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */
#define QUEUE_FLAG_DAX 26 /* device supports DAX */
#define QUEUE_FLAG_STATS 27 /* track rq completion times */
#define QUEUE_FLAG_POLL_STATS 28 /* collecting stats for hybrid polling */
#define QUEUE_FLAG_REGISTERED 29 /* queue has been registered to a disk */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_STACKABLE) | \
@@ -918,6 +926,7 @@ extern int blk_register_queue(struct gendisk *disk);
extern void blk_unregister_queue(struct gendisk *disk);
extern blk_qc_t generic_make_request(struct bio *bio);
extern void blk_rq_init(struct request_queue *q, struct request *rq);
extern void blk_init_request_from_bio(struct request *req, struct bio *bio);
extern void blk_put_request(struct request *);
extern void __blk_put_request(struct request_queue *, struct request *);
extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
@@ -963,7 +972,7 @@ extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, uns
extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
struct rq_map_data *, const struct iov_iter *,
gfp_t);
extern int blk_execute_rq(struct request_queue *, struct gendisk *,
extern void blk_execute_rq(struct request_queue *, struct gendisk *,
struct request *, int);
extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
struct request *, int, rq_end_io_fn *);
@@ -1080,20 +1089,6 @@ static inline unsigned int blk_rq_count_bios(struct request *rq)
return nr_bios;
}
/*
* blk_rq_set_prio - associate a request with prio from ioc
* @rq: request of interest
* @ioc: target iocontext
*
* Assocate request prio with ioc prio so request based drivers
* can leverage priority information.
*/
static inline void blk_rq_set_prio(struct request *rq, struct io_context *ioc)
{
if (ioc)
rq->ioprio = ioc->ioprio;
}
/*
* Request issue related functions.
*/
@@ -1120,13 +1115,10 @@ extern void blk_finish_request(struct request *rq, int error);
extern bool blk_end_request(struct request *rq, int error,
unsigned int nr_bytes);
extern void blk_end_request_all(struct request *rq, int error);
extern bool blk_end_request_cur(struct request *rq, int error);
extern bool blk_end_request_err(struct request *rq, int error);
extern bool __blk_end_request(struct request *rq, int error,
unsigned int nr_bytes);
extern void __blk_end_request_all(struct request *rq, int error);
extern bool __blk_end_request_cur(struct request *rq, int error);
extern bool __blk_end_request_err(struct request *rq, int error);
extern void blk_complete_request(struct request *);
extern void __blk_complete_request(struct request *);
@@ -1329,23 +1321,27 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
return bqt->tag_index[tag];
}
extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct page *page);
#define BLKDEV_DISCARD_SECURE (1 << 0) /* issue a secure erase */
#define BLKDEV_DISCARD_ZERO (1 << 1) /* must reliably zero data */
extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, int flags,
struct bio **biop);
extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct page *page);
#define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */
#define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */
extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
bool discard);
unsigned flags);
extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, bool discard);
sector_t nr_sects, gfp_t gfp_mask, unsigned flags);
static inline int sb_issue_discard(struct super_block *sb, sector_t block,
sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
{
@@ -1359,7 +1355,7 @@ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
return blkdev_issue_zeroout(sb->s_bdev,
block << (sb->s_blocksize_bits - 9),
nr_blocks << (sb->s_blocksize_bits - 9),
gfp_mask, true);
gfp_mask, 0);
}
extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
@@ -1529,19 +1525,6 @@ static inline int bdev_discard_alignment(struct block_device *bdev)
return q->limits.discard_alignment;
}
static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
{
if (q->limits.max_discard_sectors && q->limits.discard_zeroes_data == 1)
return 1;
return 0;
}
static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev)
{
return queue_discard_zeroes_data(bdev_get_queue(bdev));
}
static inline unsigned int bdev_write_same(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
@@ -1726,6 +1709,7 @@ int kblockd_schedule_work(struct work_struct *work);
int kblockd_schedule_work_on(int cpu, struct work_struct *work);
int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay);
int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
#ifdef CONFIG_BLK_CGROUP
/*