sparse irq_desc[] array: core kernel and x86 changes
Impact: new feature Problem on distro kernels: irq_desc[NR_IRQS] takes megabytes of RAM with NR_CPUS set to large values. The goal is to be able to scale up to much larger NR_IRQS value without impacting the (important) common case. To solve this, we generalize irq_desc[NR_IRQS] to an (optional) array of irq_desc pointers. When CONFIG_SPARSE_IRQ=y is used, we use kzalloc_node to get irq_desc, this also makes the IRQ descriptors NUMA-local (to the site that calls request_irq()). This gets rid of the irq_cfg[] static array on x86 as well: irq_cfg now uses desc->chip_data for x86 to store irq_cfg. Signed-off-by: Yinghai Lu <yinghai@kernel.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
@@ -40,6 +40,9 @@ unsigned long probe_irq_on(void)
|
||||
* flush such a longstanding irq before considering it as spurious.
|
||||
*/
|
||||
for_each_irq_desc_reverse(i, desc) {
|
||||
if (!desc)
|
||||
continue;
|
||||
|
||||
spin_lock_irq(&desc->lock);
|
||||
if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
|
||||
/*
|
||||
@@ -68,6 +71,9 @@ unsigned long probe_irq_on(void)
|
||||
* happened in the previous stage, it may have masked itself)
|
||||
*/
|
||||
for_each_irq_desc_reverse(i, desc) {
|
||||
if (!desc)
|
||||
continue;
|
||||
|
||||
spin_lock_irq(&desc->lock);
|
||||
if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
|
||||
desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
|
||||
@@ -86,6 +92,9 @@ unsigned long probe_irq_on(void)
|
||||
* Now filter out any obviously spurious interrupts
|
||||
*/
|
||||
for_each_irq_desc(i, desc) {
|
||||
if (!desc)
|
||||
continue;
|
||||
|
||||
spin_lock_irq(&desc->lock);
|
||||
status = desc->status;
|
||||
|
||||
@@ -124,6 +133,9 @@ unsigned int probe_irq_mask(unsigned long val)
|
||||
int i;
|
||||
|
||||
for_each_irq_desc(i, desc) {
|
||||
if (!desc)
|
||||
continue;
|
||||
|
||||
spin_lock_irq(&desc->lock);
|
||||
status = desc->status;
|
||||
|
||||
@@ -166,6 +178,9 @@ int probe_irq_off(unsigned long val)
|
||||
unsigned int status;
|
||||
|
||||
for_each_irq_desc(i, desc) {
|
||||
if (!desc)
|
||||
continue;
|
||||
|
||||
spin_lock_irq(&desc->lock);
|
||||
status = desc->status;
|
||||
|
||||
|
@@ -24,9 +24,10 @@
|
||||
*/
|
||||
void dynamic_irq_init(unsigned int irq)
|
||||
{
|
||||
struct irq_desc *desc = irq_to_desc(irq);
|
||||
struct irq_desc *desc;
|
||||
unsigned long flags;
|
||||
|
||||
desc = irq_to_desc(irq);
|
||||
if (!desc) {
|
||||
WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
|
||||
return;
|
||||
|
@@ -15,9 +15,16 @@
|
||||
#include <linux/random.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/rculist.h>
|
||||
#include <linux/hash.h>
|
||||
|
||||
#include "internals.h"
|
||||
|
||||
/*
|
||||
* lockdep: we want to handle all irq_desc locks as a single lock-class:
|
||||
*/
|
||||
static struct lock_class_key irq_desc_lock_class;
|
||||
|
||||
/**
|
||||
* handle_bad_irq - handle spurious and unhandled irqs
|
||||
* @irq: the interrupt number
|
||||
@@ -49,6 +56,155 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc)
|
||||
int nr_irqs = NR_IRQS;
|
||||
EXPORT_SYMBOL_GPL(nr_irqs);
|
||||
|
||||
void __init __attribute__((weak)) arch_early_irq_init(void)
|
||||
{
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SPARSE_IRQ
|
||||
static struct irq_desc irq_desc_init = {
|
||||
.irq = -1,
|
||||
.status = IRQ_DISABLED,
|
||||
.chip = &no_irq_chip,
|
||||
.handle_irq = handle_bad_irq,
|
||||
.depth = 1,
|
||||
.lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
|
||||
#ifdef CONFIG_SMP
|
||||
.affinity = CPU_MASK_ALL
|
||||
#endif
|
||||
};
|
||||
|
||||
static void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
|
||||
{
|
||||
unsigned long bytes;
|
||||
char *ptr;
|
||||
int node;
|
||||
|
||||
/* Compute how many bytes we need per irq and allocate them */
|
||||
bytes = nr * sizeof(unsigned int);
|
||||
|
||||
node = cpu_to_node(cpu);
|
||||
ptr = kzalloc_node(bytes, GFP_ATOMIC, node);
|
||||
printk(KERN_DEBUG " alloc kstat_irqs on cpu %d node %d\n", cpu, node);
|
||||
|
||||
if (ptr)
|
||||
desc->kstat_irqs = (unsigned int *)ptr;
|
||||
}
|
||||
|
||||
void __attribute__((weak)) arch_init_chip_data(struct irq_desc *desc, int cpu)
|
||||
{
|
||||
}
|
||||
|
||||
static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
|
||||
{
|
||||
memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
|
||||
desc->irq = irq;
|
||||
#ifdef CONFIG_SMP
|
||||
desc->cpu = cpu;
|
||||
#endif
|
||||
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
|
||||
init_kstat_irqs(desc, cpu, nr_cpu_ids);
|
||||
if (!desc->kstat_irqs) {
|
||||
printk(KERN_ERR "can not alloc kstat_irqs\n");
|
||||
BUG_ON(1);
|
||||
}
|
||||
arch_init_chip_data(desc, cpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* Protect the sparse_irqs:
|
||||
*/
|
||||
static DEFINE_SPINLOCK(sparse_irq_lock);
|
||||
|
||||
struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly;
|
||||
|
||||
static struct irq_desc irq_desc_legacy[16] __cacheline_aligned_in_smp = {
|
||||
[0 ... 15] = {
|
||||
.irq = -1,
|
||||
.status = IRQ_DISABLED,
|
||||
.chip = &no_irq_chip,
|
||||
.handle_irq = handle_bad_irq,
|
||||
.depth = 1,
|
||||
.lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
|
||||
#ifdef CONFIG_SMP
|
||||
.affinity = CPU_MASK_ALL
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
/* FIXME: use bootmem alloc ...*/
|
||||
static unsigned int kstat_irqs_legacy[16][NR_CPUS];
|
||||
|
||||
void __init early_irq_init(void)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
int legacy_count;
|
||||
int i;
|
||||
|
||||
desc = irq_desc_legacy;
|
||||
legacy_count = ARRAY_SIZE(irq_desc_legacy);
|
||||
|
||||
for (i = 0; i < legacy_count; i++) {
|
||||
desc[i].irq = i;
|
||||
desc[i].kstat_irqs = kstat_irqs_legacy[i];
|
||||
|
||||
irq_desc_ptrs[i] = desc + i;
|
||||
}
|
||||
|
||||
for (i = legacy_count; i < NR_IRQS; i++)
|
||||
irq_desc_ptrs[i] = NULL;
|
||||
|
||||
arch_early_irq_init();
|
||||
}
|
||||
|
||||
struct irq_desc *irq_to_desc(unsigned int irq)
|
||||
{
|
||||
return (irq < NR_IRQS) ? irq_desc_ptrs[irq] : NULL;
|
||||
}
|
||||
|
||||
struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
unsigned long flags;
|
||||
int node;
|
||||
|
||||
if (irq >= NR_IRQS) {
|
||||
printk(KERN_WARNING "irq >= NR_IRQS in irq_to_desc_alloc: %d %d\n",
|
||||
irq, NR_IRQS);
|
||||
WARN_ON(1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
desc = irq_desc_ptrs[irq];
|
||||
if (desc)
|
||||
return desc;
|
||||
|
||||
spin_lock_irqsave(&sparse_irq_lock, flags);
|
||||
|
||||
/* We have to check it to avoid races with another CPU */
|
||||
desc = irq_desc_ptrs[irq];
|
||||
if (desc)
|
||||
goto out_unlock;
|
||||
|
||||
node = cpu_to_node(cpu);
|
||||
desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
|
||||
printk(KERN_DEBUG " alloc irq_desc for %d on cpu %d node %d\n",
|
||||
irq, cpu, node);
|
||||
if (!desc) {
|
||||
printk(KERN_ERR "can not alloc irq_desc\n");
|
||||
BUG_ON(1);
|
||||
}
|
||||
init_one_irq_desc(irq, desc, cpu);
|
||||
|
||||
irq_desc_ptrs[irq] = desc;
|
||||
|
||||
out_unlock:
|
||||
spin_unlock_irqrestore(&sparse_irq_lock, flags);
|
||||
|
||||
return desc;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
|
||||
[0 ... NR_IRQS-1] = {
|
||||
.status = IRQ_DISABLED,
|
||||
@@ -62,6 +218,8 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* What should we do if we get a hw irq event on an illegal vector?
|
||||
* Each architecture has to answer this themself.
|
||||
@@ -261,17 +419,28 @@ out:
|
||||
|
||||
|
||||
#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
/*
|
||||
* lockdep: we want to handle all irq_desc locks as a single lock-class:
|
||||
*/
|
||||
static struct lock_class_key irq_desc_lock_class;
|
||||
|
||||
void early_init_irq_lock_class(void)
|
||||
{
|
||||
#ifndef CONFIG_SPARSE_IRQ
|
||||
struct irq_desc *desc;
|
||||
int i;
|
||||
|
||||
for_each_irq_desc(i, desc)
|
||||
for_each_irq_desc(i, desc) {
|
||||
if (!desc)
|
||||
continue;
|
||||
|
||||
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SPARSE_IRQ
|
||||
unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
|
||||
{
|
||||
struct irq_desc *desc = irq_to_desc(irq);
|
||||
return desc->kstat_irqs[cpu];
|
||||
}
|
||||
#endif
|
||||
EXPORT_SYMBOL(kstat_irqs_cpu);
|
||||
|
||||
|
@@ -243,7 +243,11 @@ void init_irq_proc(void)
|
||||
/*
|
||||
* Create entries for all existing IRQs.
|
||||
*/
|
||||
for_each_irq_desc(irq, desc)
|
||||
for_each_irq_desc(irq, desc) {
|
||||
if (!desc)
|
||||
continue;
|
||||
|
||||
register_irq_proc(irq, desc);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -91,6 +91,9 @@ static int misrouted_irq(int irq)
|
||||
int i, ok = 0;
|
||||
|
||||
for_each_irq_desc(i, desc) {
|
||||
if (!desc)
|
||||
continue;
|
||||
|
||||
if (!i)
|
||||
continue;
|
||||
|
||||
@@ -112,6 +115,8 @@ static void poll_spurious_irqs(unsigned long dummy)
|
||||
for_each_irq_desc(i, desc) {
|
||||
unsigned int status;
|
||||
|
||||
if (!desc)
|
||||
continue;
|
||||
if (!i)
|
||||
continue;
|
||||
|
||||
|
Reference in New Issue
Block a user