Merge branch 'irq/for-block' into irq/core

Add the new irq spreading infrastructure.
This commit is contained in:
Thomas Gleixner
2016-09-15 20:54:40 +02:00
9 changed files with 295 additions and 125 deletions

View File

@@ -4,60 +4,151 @@
#include <linux/slab.h>
#include <linux/cpu.h>
static int get_first_sibling(unsigned int cpu)
static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
int cpus_per_vec)
{
unsigned int ret;
const struct cpumask *siblmsk;
int cpu, sibl;
ret = cpumask_first(topology_sibling_cpumask(cpu));
if (ret < nr_cpu_ids)
return ret;
return cpu;
for ( ; cpus_per_vec > 0; ) {
cpu = cpumask_first(nmsk);
/* Should not happen, but I'm too lazy to think about it */
if (cpu >= nr_cpu_ids)
return;
cpumask_clear_cpu(cpu, nmsk);
cpumask_set_cpu(cpu, irqmsk);
cpus_per_vec--;
/* If the cpu has siblings, use them first */
siblmsk = topology_sibling_cpumask(cpu);
for (sibl = -1; cpus_per_vec > 0; ) {
sibl = cpumask_next(sibl, siblmsk);
if (sibl >= nr_cpu_ids)
break;
if (!cpumask_test_and_clear_cpu(sibl, nmsk))
continue;
cpumask_set_cpu(sibl, irqmsk);
cpus_per_vec--;
}
}
}
/*
* Take a map of online CPUs and the number of available interrupt vectors
* and generate an output cpumask suitable for spreading MSI/MSI-X vectors
* so that they are distributed as good as possible around the CPUs. If
* more vectors than CPUs are available we'll map one to each CPU,
* otherwise we map one to the first sibling of each socket.
*
* If there are more vectors than CPUs we will still only have one bit
* set per CPU, but interrupt code will keep on assigning the vectors from
* the start of the bitmap until we run out of vectors.
*/
struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs)
static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk)
{
struct cpumask *affinity_mask;
unsigned int max_vecs = *nr_vecs;
int n, nodes;
if (max_vecs == 1)
return NULL;
affinity_mask = kzalloc(cpumask_size(), GFP_KERNEL);
if (!affinity_mask) {
*nr_vecs = 1;
return NULL;
/* Calculate the number of nodes in the supplied affinity mask */
for (n = 0, nodes = 0; n < num_online_nodes(); n++) {
if (cpumask_intersects(mask, cpumask_of_node(n))) {
node_set(n, *nodemsk);
nodes++;
}
}
return nodes;
}
/**
* irq_create_affinity_masks - Create affinity masks for multiqueue spreading
* @affinity: The affinity mask to spread. If NULL cpu_online_mask
* is used
* @nvecs: The number of vectors
*
* Returns the masks pointer or NULL if allocation failed.
*/
struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity,
int nvec)
{
int n, nodes, vecs_per_node, cpus_per_vec, extra_vecs, curvec = 0;
nodemask_t nodemsk = NODE_MASK_NONE;
struct cpumask *masks;
cpumask_var_t nmsk;
if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
return NULL;
masks = kzalloc(nvec * sizeof(*masks), GFP_KERNEL);
if (!masks)
goto out;
/* Stabilize the cpumasks */
get_online_cpus();
if (max_vecs >= num_online_cpus()) {
cpumask_copy(affinity_mask, cpu_online_mask);
*nr_vecs = num_online_cpus();
} else {
unsigned int vecs = 0, cpu;
/* If the supplied affinity mask is NULL, use cpu online mask */
if (!affinity)
affinity = cpu_online_mask;
for_each_online_cpu(cpu) {
if (cpu == get_first_sibling(cpu)) {
cpumask_set_cpu(cpu, affinity_mask);
vecs++;
}
nodes = get_nodes_in_cpumask(affinity, &nodemsk);
if (--max_vecs == 0)
/*
* If the number of nodes in the mask is less than or equal the
* number of vectors we just spread the vectors across the nodes.
*/
if (nvec <= nodes) {
for_each_node_mask(n, nodemsk) {
cpumask_copy(masks + curvec, cpumask_of_node(n));
if (++curvec == nvec)
break;
}
*nr_vecs = vecs;
goto outonl;
}
put_online_cpus();
return affinity_mask;
/* Spread the vectors per node */
vecs_per_node = nvec / nodes;
/* Account for rounding errors */
extra_vecs = nvec - (nodes * vecs_per_node);
for_each_node_mask(n, nodemsk) {
int ncpus, v, vecs_to_assign = vecs_per_node;
/* Get the cpus on this node which are in the mask */
cpumask_and(nmsk, affinity, cpumask_of_node(n));
/* Calculate the number of cpus per vector */
ncpus = cpumask_weight(nmsk);
for (v = 0; curvec < nvec && v < vecs_to_assign; curvec++, v++) {
cpus_per_vec = ncpus / vecs_to_assign;
/* Account for extra vectors to compensate rounding errors */
if (extra_vecs) {
cpus_per_vec++;
if (!--extra_vecs)
vecs_per_node++;
}
irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec);
}
if (curvec >= nvec)
break;
}
outonl:
put_online_cpus();
out:
free_cpumask_var(nmsk);
return masks;
}
/**
* irq_calc_affinity_vectors - Calculate to optimal number of vectors for a given affinity mask
* @affinity: The affinity mask to spread. If NULL cpu_online_mask
* is used
* @maxvec: The maximum number of vectors available
*/
int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec)
{
int cpus, ret;
/* Stabilize the cpumasks */
get_online_cpus();
/* If the supplied affinity mask is NULL, use cpu online mask */
if (!affinity)
affinity = cpu_online_mask;
cpus = cpumask_weight(affinity);
ret = (cpus < maxvec) ? cpus : maxvec;
put_online_cpus();
return ret;
}

View File

@@ -424,25 +424,24 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node,
const struct cpumask *mask = NULL;
struct irq_desc *desc;
unsigned int flags;
int i, cpu = -1;
int i;
if (affinity && cpumask_empty(affinity))
return -EINVAL;
/* Validate affinity mask(s) */
if (affinity) {
for (i = 0, mask = affinity; i < cnt; i++, mask++) {
if (cpumask_empty(mask))
return -EINVAL;
}
}
flags = affinity ? IRQD_AFFINITY_MANAGED : 0;
mask = NULL;
for (i = 0; i < cnt; i++) {
if (affinity) {
cpu = cpumask_next(cpu, affinity);
if (cpu >= nr_cpu_ids)
cpu = cpumask_first(affinity);
node = cpu_to_node(cpu);
/*
* For single allocations we use the caller provided
* mask otherwise we use the mask of the target cpu
*/
mask = cnt == 1 ? affinity : cpumask_of(cpu);
node = cpu_to_node(cpumask_first(affinity));
mask = affinity;
affinity++;
}
desc = alloc_desc(start + i, node, flags, mask, owner);
if (!desc)
@@ -670,9 +669,9 @@ EXPORT_SYMBOL_GPL(irq_free_descs);
* @cnt: Number of consecutive irqs to allocate.
* @node: Preferred node on which the irq descriptor should be allocated
* @owner: Owning module (can be NULL)
* @affinity: Optional pointer to an affinity mask which hints where the
* irq descriptors should be allocated and which default
* affinities to use
* @affinity: Optional pointer to an affinity mask array of size @cnt which
* hints where the irq descriptors should be allocated and which
* default affinities to use
*
* Returns the first irq number or error code
*/

View File

@@ -18,20 +18,42 @@
/* Temparory solution for building, will be removed later */
#include <linux/pci.h>
struct msi_desc *alloc_msi_entry(struct device *dev)
/**
* alloc_msi_entry - Allocate an initialize msi_entry
* @dev: Pointer to the device for which this is allocated
* @nvec: The number of vectors used in this entry
* @affinity: Optional pointer to an affinity mask array size of @nvec
*
* If @affinity is not NULL then a an affinity array[@nvec] is allocated
* and the affinity masks from @affinity are copied.
*/
struct msi_desc *
alloc_msi_entry(struct device *dev, int nvec, const struct cpumask *affinity)
{
struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
struct msi_desc *desc;
desc = kzalloc(sizeof(*desc), GFP_KERNEL);
if (!desc)
return NULL;
INIT_LIST_HEAD(&desc->list);
desc->dev = dev;
desc->nvec_used = nvec;
if (affinity) {
desc->affinity = kmemdup(affinity,
nvec * sizeof(*desc->affinity), GFP_KERNEL);
if (!desc->affinity) {
kfree(desc);
return NULL;
}
}
return desc;
}
void free_msi_entry(struct msi_desc *entry)
{
kfree(entry->affinity);
kfree(entry);
}