Merge branch 'blk-mq/core' into for-3.13/core

Signed-off-by: Jens Axboe <axboe@kernel.dk>

Conflicts:
	block/blk-timeout.c
This commit is contained in:
Jens Axboe
2013-11-08 09:08:12 -07:00
28 changed files with 3720 additions and 189 deletions

View File

@@ -420,6 +420,8 @@ static inline void bio_list_init(struct bio_list *bl)
bl->head = bl->tail = NULL;
}
#define BIO_EMPTY_LIST { NULL, NULL }
#define bio_list_for_each(bio, bl) \
for (bio = (bl)->head; bio; bio = bio->bi_next)

183
include/linux/blk-mq.h Normal file
View File

@@ -0,0 +1,183 @@
#ifndef BLK_MQ_H
#define BLK_MQ_H
#include <linux/blkdev.h>
struct blk_mq_tags;
struct blk_mq_cpu_notifier {
struct list_head list;
void *data;
void (*notify)(void *data, unsigned long action, unsigned int cpu);
};
struct blk_mq_hw_ctx {
struct {
spinlock_t lock;
struct list_head dispatch;
} ____cacheline_aligned_in_smp;
unsigned long state; /* BLK_MQ_S_* flags */
struct delayed_work delayed_work;
unsigned long flags; /* BLK_MQ_F_* flags */
struct request_queue *queue;
unsigned int queue_num;
void *driver_data;
unsigned int nr_ctx;
struct blk_mq_ctx **ctxs;
unsigned int nr_ctx_map;
unsigned long *ctx_map;
struct request **rqs;
struct list_head page_list;
struct blk_mq_tags *tags;
unsigned long queued;
unsigned long run;
#define BLK_MQ_MAX_DISPATCH_ORDER 10
unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER];
unsigned int queue_depth;
unsigned int numa_node;
unsigned int cmd_size; /* per-request extra data */
struct blk_mq_cpu_notifier cpu_notifier;
struct kobject kobj;
};
struct blk_mq_reg {
struct blk_mq_ops *ops;
unsigned int nr_hw_queues;
unsigned int queue_depth;
unsigned int reserved_tags;
unsigned int cmd_size; /* per-request extra data */
int numa_node;
unsigned int timeout;
unsigned int flags; /* BLK_MQ_F_* */
};
typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *);
typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int);
typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_reg *,unsigned int);
typedef void (free_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
struct blk_mq_ops {
/*
* Queue request
*/
queue_rq_fn *queue_rq;
/*
* Map to specific hardware queue
*/
map_queue_fn *map_queue;
/*
* Called on request timeout
*/
rq_timed_out_fn *timeout;
/*
* Override for hctx allocations (should probably go)
*/
alloc_hctx_fn *alloc_hctx;
free_hctx_fn *free_hctx;
/*
* Called when the block layer side of a hardware queue has been
* set up, allowing the driver to allocate/init matching structures.
* Ditto for exit/teardown.
*/
init_hctx_fn *init_hctx;
exit_hctx_fn *exit_hctx;
};
enum {
BLK_MQ_RQ_QUEUE_OK = 0, /* queued fine */
BLK_MQ_RQ_QUEUE_BUSY = 1, /* requeue IO for later */
BLK_MQ_RQ_QUEUE_ERROR = 2, /* end IO with error */
BLK_MQ_F_SHOULD_MERGE = 1 << 0,
BLK_MQ_F_SHOULD_SORT = 1 << 1,
BLK_MQ_F_SHOULD_IPI = 1 << 2,
BLK_MQ_S_STOPPED = 1 << 0,
BLK_MQ_MAX_DEPTH = 2048,
};
struct request_queue *blk_mq_init_queue(struct blk_mq_reg *, void *);
void blk_mq_free_queue(struct request_queue *);
int blk_mq_register_disk(struct gendisk *);
void blk_mq_unregister_disk(struct gendisk *);
void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data);
void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
void blk_mq_insert_request(struct request_queue *, struct request *, bool);
void blk_mq_run_queues(struct request_queue *q, bool async);
void blk_mq_free_request(struct request *rq);
bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, bool reserved);
struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp);
struct request *blk_mq_rq_from_tag(struct request_queue *q, unsigned int tag);
struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_reg *, unsigned int);
void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int);
void blk_mq_end_io(struct request *rq, int error);
void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
void blk_mq_stop_hw_queues(struct request_queue *q);
void blk_mq_start_stopped_hw_queues(struct request_queue *q);
/*
* Driver command data is immediately after the request. So subtract request
* size to get back to the original request.
*/
static inline struct request *blk_mq_rq_from_pdu(void *pdu)
{
return pdu - sizeof(struct request);
}
static inline void *blk_mq_rq_to_pdu(struct request *rq)
{
return (void *) rq + sizeof(*rq);
}
static inline struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx,
unsigned int tag)
{
return hctx->rqs[tag];
}
#define queue_for_each_hw_ctx(q, hctx, i) \
for ((i) = 0, hctx = (q)->queue_hw_ctx[0]; \
(i) < (q)->nr_hw_queues; (i)++, hctx = (q)->queue_hw_ctx[i])
#define queue_for_each_ctx(q, ctx, i) \
for ((i) = 0, ctx = per_cpu_ptr((q)->queue_ctx, 0); \
(i) < (q)->nr_queues; (i)++, ctx = per_cpu_ptr(q->queue_ctx, (i)))
#define hctx_for_each_ctx(hctx, ctx, i) \
for ((i) = 0, ctx = (hctx)->ctxs[0]; \
(i) < (hctx)->nr_ctx; (i)++, ctx = (hctx)->ctxs[(i)])
#define blk_ctx_sum(q, sum) \
({ \
struct blk_mq_ctx *__x; \
unsigned int __ret = 0, __i; \
\
queue_for_each_ctx((q), __x, __i) \
__ret += sum; \
__ret; \
})
#endif

View File

@@ -178,19 +178,20 @@ enum rq_flag_bits {
__REQ_MIXED_MERGE, /* merge of different types, fail separately */
__REQ_KERNEL, /* direct IO to kernel pages */
__REQ_PM, /* runtime pm request */
__REQ_END, /* last of chain of requests */
__REQ_NR_BITS, /* stops here */
};
#define REQ_WRITE (1 << __REQ_WRITE)
#define REQ_FAILFAST_DEV (1 << __REQ_FAILFAST_DEV)
#define REQ_FAILFAST_TRANSPORT (1 << __REQ_FAILFAST_TRANSPORT)
#define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER)
#define REQ_SYNC (1 << __REQ_SYNC)
#define REQ_META (1 << __REQ_META)
#define REQ_PRIO (1 << __REQ_PRIO)
#define REQ_DISCARD (1 << __REQ_DISCARD)
#define REQ_WRITE_SAME (1 << __REQ_WRITE_SAME)
#define REQ_NOIDLE (1 << __REQ_NOIDLE)
#define REQ_WRITE (1ULL << __REQ_WRITE)
#define REQ_FAILFAST_DEV (1ULL << __REQ_FAILFAST_DEV)
#define REQ_FAILFAST_TRANSPORT (1ULL << __REQ_FAILFAST_TRANSPORT)
#define REQ_FAILFAST_DRIVER (1ULL << __REQ_FAILFAST_DRIVER)
#define REQ_SYNC (1ULL << __REQ_SYNC)
#define REQ_META (1ULL << __REQ_META)
#define REQ_PRIO (1ULL << __REQ_PRIO)
#define REQ_DISCARD (1ULL << __REQ_DISCARD)
#define REQ_WRITE_SAME (1ULL << __REQ_WRITE_SAME)
#define REQ_NOIDLE (1ULL << __REQ_NOIDLE)
#define REQ_FAILFAST_MASK \
(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
@@ -206,28 +207,29 @@ enum rq_flag_bits {
#define REQ_NOMERGE_FLAGS \
(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA)
#define REQ_RAHEAD (1 << __REQ_RAHEAD)
#define REQ_THROTTLED (1 << __REQ_THROTTLED)
#define REQ_RAHEAD (1ULL << __REQ_RAHEAD)
#define REQ_THROTTLED (1ULL << __REQ_THROTTLED)
#define REQ_SORTED (1 << __REQ_SORTED)
#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER)
#define REQ_FUA (1 << __REQ_FUA)
#define REQ_NOMERGE (1 << __REQ_NOMERGE)
#define REQ_STARTED (1 << __REQ_STARTED)
#define REQ_DONTPREP (1 << __REQ_DONTPREP)
#define REQ_QUEUED (1 << __REQ_QUEUED)
#define REQ_ELVPRIV (1 << __REQ_ELVPRIV)
#define REQ_FAILED (1 << __REQ_FAILED)
#define REQ_QUIET (1 << __REQ_QUIET)
#define REQ_PREEMPT (1 << __REQ_PREEMPT)
#define REQ_ALLOCED (1 << __REQ_ALLOCED)
#define REQ_COPY_USER (1 << __REQ_COPY_USER)
#define REQ_FLUSH (1 << __REQ_FLUSH)
#define REQ_FLUSH_SEQ (1 << __REQ_FLUSH_SEQ)
#define REQ_IO_STAT (1 << __REQ_IO_STAT)
#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE)
#define REQ_SECURE (1 << __REQ_SECURE)
#define REQ_KERNEL (1 << __REQ_KERNEL)
#define REQ_PM (1 << __REQ_PM)
#define REQ_SORTED (1ULL << __REQ_SORTED)
#define REQ_SOFTBARRIER (1ULL << __REQ_SOFTBARRIER)
#define REQ_FUA (1ULL << __REQ_FUA)
#define REQ_NOMERGE (1ULL << __REQ_NOMERGE)
#define REQ_STARTED (1ULL << __REQ_STARTED)
#define REQ_DONTPREP (1ULL << __REQ_DONTPREP)
#define REQ_QUEUED (1ULL << __REQ_QUEUED)
#define REQ_ELVPRIV (1ULL << __REQ_ELVPRIV)
#define REQ_FAILED (1ULL << __REQ_FAILED)
#define REQ_QUIET (1ULL << __REQ_QUIET)
#define REQ_PREEMPT (1ULL << __REQ_PREEMPT)
#define REQ_ALLOCED (1ULL << __REQ_ALLOCED)
#define REQ_COPY_USER (1ULL << __REQ_COPY_USER)
#define REQ_FLUSH (1ULL << __REQ_FLUSH)
#define REQ_FLUSH_SEQ (1ULL << __REQ_FLUSH_SEQ)
#define REQ_IO_STAT (1ULL << __REQ_IO_STAT)
#define REQ_MIXED_MERGE (1ULL << __REQ_MIXED_MERGE)
#define REQ_SECURE (1ULL << __REQ_SECURE)
#define REQ_KERNEL (1ULL << __REQ_KERNEL)
#define REQ_PM (1ULL << __REQ_PM)
#define REQ_END (1ULL << __REQ_END)
#endif /* __LINUX_BLK_TYPES_H */

View File

@@ -8,6 +8,7 @@
#include <linux/major.h>
#include <linux/genhd.h>
#include <linux/list.h>
#include <linux/llist.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
#include <linux/pagemap.h>
@@ -94,12 +95,19 @@ enum rq_cmd_type_bits {
* as well!
*/
struct request {
struct list_head queuelist;
struct call_single_data csd;
union {
struct list_head queuelist;
struct llist_node ll_list;
};
union {
struct call_single_data csd;
struct work_struct mq_flush_data;
};
struct request_queue *q;
struct blk_mq_ctx *mq_ctx;
unsigned int cmd_flags;
u64 cmd_flags;
enum rq_cmd_type_bits cmd_type;
unsigned long atomic_flags;
@@ -160,8 +168,6 @@ struct request {
unsigned short ioprio;
int ref_count;
void *special; /* opaque pointer available for LLD use */
char *buffer; /* kaddr of the current segment if available */
@@ -215,6 +221,8 @@ struct request_pm_state
#include <linux/elevator.h>
struct blk_queue_ctx;
typedef void (request_fn_proc) (struct request_queue *q);
typedef void (make_request_fn) (struct request_queue *q, struct bio *bio);
typedef int (prep_rq_fn) (struct request_queue *, struct request *);
@@ -313,6 +321,18 @@ struct request_queue {
dma_drain_needed_fn *dma_drain_needed;
lld_busy_fn *lld_busy_fn;
struct blk_mq_ops *mq_ops;
unsigned int *mq_map;
/* sw queues */
struct blk_mq_ctx *queue_ctx;
unsigned int nr_queues;
/* hw dispatch queues */
struct blk_mq_hw_ctx **queue_hw_ctx;
unsigned int nr_hw_queues;
/*
* Dispatch queue sorting
*/
@@ -361,6 +381,11 @@ struct request_queue {
*/
struct kobject kobj;
/*
* mq queue kobject
*/
struct kobject mq_kobj;
#ifdef CONFIG_PM_RUNTIME
struct device *dev;
int rpm_status;
@@ -425,7 +450,13 @@ struct request_queue {
unsigned long flush_pending_since;
struct list_head flush_queue[2];
struct list_head flush_data_in_flight;
struct request flush_rq;
union {
struct request flush_rq;
struct {
spinlock_t mq_flush_lock;
struct work_struct mq_flush_work;
};
};
struct mutex sysfs_lock;
@@ -437,14 +468,14 @@ struct request_queue {
struct bsg_class_device bsg_dev;
#endif
#ifdef CONFIG_BLK_CGROUP
struct list_head all_q_node;
#endif
#ifdef CONFIG_BLK_DEV_THROTTLING
/* Throttle data */
struct throtl_data *td;
#endif
struct rcu_head rcu_head;
wait_queue_head_t mq_freeze_wq;
struct percpu_counter mq_usage_counter;
struct list_head all_q_node;
};
#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
@@ -467,6 +498,7 @@ struct request_queue {
#define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */
#define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */
#define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */
#define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_STACKABLE) | \
@@ -539,6 +571,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
#define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags)
#define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
#define blk_queue_noxmerges(q) \
test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
@@ -570,7 +603,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
#define rq_data_dir(rq) ((rq)->cmd_flags & 1)
#define rq_data_dir(rq) (((rq)->cmd_flags & 1) != 0)
static inline unsigned int blk_queue_cluster(struct request_queue *q)
{
@@ -1013,6 +1046,7 @@ static inline void blk_post_runtime_resume(struct request_queue *q, int err) {}
struct blk_plug {
unsigned long magic; /* detect uninitialized use-cases */
struct list_head list; /* requests */
struct list_head mq_list; /* blk-mq requests */
struct list_head cb_list; /* md requires an unplug callback */
};
#define BLK_MAX_REQUEST_COUNT 16
@@ -1050,7 +1084,10 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
{
struct blk_plug *plug = tsk->plug;
return plug && (!list_empty(&plug->list) || !list_empty(&plug->cb_list));
return plug &&
(!list_empty(&plug->list) ||
!list_empty(&plug->mq_list) ||
!list_empty(&plug->cb_list));
}
/*
@@ -1325,6 +1362,7 @@ static inline void put_dev_sector(Sector p)
struct work_struct;
int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay);
#ifdef CONFIG_BLK_CGROUP
/*

View File

@@ -16,6 +16,8 @@ struct percpu_ida {
* percpu_ida_init()
*/
unsigned nr_tags;
unsigned percpu_max_size;
unsigned percpu_batch_size;
struct percpu_ida_cpu __percpu *tag_cpu;
@@ -51,10 +53,29 @@ struct percpu_ida {
} ____cacheline_aligned_in_smp;
};
/*
* Number of tags we move between the percpu freelist and the global freelist at
* a time
*/
#define IDA_DEFAULT_PCPU_BATCH_MOVE 32U
/* Max size of percpu freelist, */
#define IDA_DEFAULT_PCPU_SIZE ((IDA_DEFAULT_PCPU_BATCH_MOVE * 3) / 2)
int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp);
void percpu_ida_free(struct percpu_ida *pool, unsigned tag);
void percpu_ida_destroy(struct percpu_ida *pool);
int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags);
int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
unsigned long max_size, unsigned long batch_size);
static inline int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
{
return __percpu_ida_init(pool, nr_tags, IDA_DEFAULT_PCPU_SIZE,
IDA_DEFAULT_PCPU_BATCH_MOVE);
}
typedef int (*percpu_ida_cb)(unsigned, void *);
int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
void *data);
unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu);
#endif /* __PERCPU_IDA_H__ */