block: add support for IO CPU affinity
This patch adds support for controlling the IO completion CPU of either all requests on a queue, or on a per-request basis. We export a sysfs variable (rq_affinity) which, if set, migrates completions of requests to the CPU that originally submitted it. A bio helper (bio_set_completion_cpu()) is also added, so that queuers can ask for completion on that specific CPU. In testing, this has been show to cut the system time by as much as 20-40% on synthetic workloads where CPU affinity is desired. This requires a little help from the architecture, so it'll only work as designed for archs that are using the new generic smp helper infrastructure. Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
This commit is contained in:
@@ -81,6 +81,8 @@ struct bio {
|
||||
|
||||
unsigned int bi_max_vecs; /* max bvl_vecs we can hold */
|
||||
|
||||
unsigned int bi_comp_cpu; /* completion CPU */
|
||||
|
||||
struct bio_vec *bi_io_vec; /* the actual vec list */
|
||||
|
||||
bio_end_io_t *bi_end_io;
|
||||
@@ -105,6 +107,7 @@ struct bio {
|
||||
#define BIO_BOUNCED 5 /* bio is a bounce bio */
|
||||
#define BIO_USER_MAPPED 6 /* contains user pages */
|
||||
#define BIO_EOPNOTSUPP 7 /* not supported */
|
||||
#define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */
|
||||
#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
|
||||
|
||||
/*
|
||||
@@ -342,6 +345,14 @@ void zero_fill_bio(struct bio *bio);
|
||||
extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *);
|
||||
extern unsigned int bvec_nr_vecs(unsigned short idx);
|
||||
|
||||
/*
|
||||
* Allow queuer to specify a completion CPU for this bio
|
||||
*/
|
||||
static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu)
|
||||
{
|
||||
bio->bi_comp_cpu = cpu;
|
||||
}
|
||||
|
||||
/*
|
||||
* bio_set is used to allow other portions of the IO system to
|
||||
* allocate their own private memory pools for bio and iovec structures.
|
||||
|
@@ -17,6 +17,7 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/stringify.h>
|
||||
#include <linux/bsg.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/scatterlist.h>
|
||||
|
||||
@@ -139,7 +140,8 @@ enum rq_flag_bits {
|
||||
*/
|
||||
struct request {
|
||||
struct list_head queuelist;
|
||||
struct list_head donelist;
|
||||
struct call_single_data csd;
|
||||
int cpu;
|
||||
|
||||
struct request_queue *q;
|
||||
|
||||
@@ -420,6 +422,7 @@ struct request_queue
|
||||
#define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */
|
||||
#define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */
|
||||
#define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */
|
||||
#define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */
|
||||
|
||||
static inline int queue_is_locked(struct request_queue *q)
|
||||
{
|
||||
|
@@ -173,15 +173,15 @@ enum {
|
||||
#define rb_entry_rq(node) rb_entry((node), struct request, rb_node)
|
||||
|
||||
/*
|
||||
* Hack to reuse the donelist list_head as the fifo time holder while
|
||||
* Hack to reuse the csd.list list_head as the fifo time holder while
|
||||
* the request is in the io scheduler. Saves an unsigned long in rq.
|
||||
*/
|
||||
#define rq_fifo_time(rq) ((unsigned long) (rq)->donelist.next)
|
||||
#define rq_set_fifo_time(rq,exp) ((rq)->donelist.next = (void *) (exp))
|
||||
#define rq_fifo_time(rq) ((unsigned long) (rq)->csd.list.next)
|
||||
#define rq_set_fifo_time(rq,exp) ((rq)->csd.list.next = (void *) (exp))
|
||||
#define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
|
||||
#define rq_fifo_clear(rq) do { \
|
||||
list_del_init(&(rq)->queuelist); \
|
||||
INIT_LIST_HEAD(&(rq)->donelist); \
|
||||
INIT_LIST_HEAD(&(rq)->csd.list); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
|
Reference in New Issue
Block a user