Merge branch 'x86-apic-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 APIC updates from Thomas Gleixner:
 "This update provides a major overhaul of the APIC initialization and
  vector allocation code:

   - Unification of the APIC and interrupt mode setup which was
     scattered all over the place and was hard to follow. This also
     distangles the timer setup from the APIC initialization which
     brings a clear separation of functionality.

     Great detective work from Dou Lyiang!

   - Refactoring of the x86 vector allocation mechanism. The existing
     code was based on nested loops and rather convoluted APIC callbacks
     which had a horrible worst case behaviour and tried to serve all
     different use cases in one go. This led to quite odd hacks when
     supporting the new managed interupt facility for multiqueue devices
     and made it more or less impossible to deal with the vector space
     exhaustion which was a major roadblock for server hibernation.

     Aside of that the code dealing with cpu hotplug and the system
     vectors was disconnected from the actual vector management and
     allocation code, which made it hard to follow and maintain.

     Utilizing the new bitmap matrix allocator core mechanism, the new
     allocator and management code consolidates the handling of system
     vectors, legacy vectors, cpu hotplug mechanisms and the actual
     allocation which needs to be aware of system and legacy vectors and
     hotplug constraints into a single consistent entity.

     This has one visible change: The support for multi CPU targets of
     interrupts, which is only available on a certain subset of
     CPUs/APIC variants has been removed in favour of single interrupt
     targets. A proper analysis of the multi CPU target feature revealed
     that there is no real advantage as the vast majority of interrupts
     end up on the CPU with the lowest APIC id in the set of target CPUs
     anyway. That change was agreed on by the relevant folks and allowed
     to simplify the implementation significantly and to replace rather
     fragile constructs like the vector cleanup IPI with straight
     forward and solid code.

     Furthermore this allowed to cleanly separate the allocation details
     for legacy, normal and managed interrupts:

      * Legacy interrupts are not longer wasting 16 vectors
        unconditionally

      * Managed interrupts have now a guaranteed vector reservation, but
        the actual vector assignment happens when the interrupt is
        requested. It's guaranteed not to fail.

      * Normal interrupts no longer allocate vectors unconditionally
        when the interrupt is set up (IO/APIC init or MSI(X) enable).
        The mechanism has been switched to a best effort reservation
        mode. The actual allocation happens when the interrupt is
        requested. Contrary to managed interrupts the request can fail
        due to vector space exhaustion, but drivers must handle a fail
        of request_irq() anyway. When the interrupt is freed, the vector
        is handed back as well.

        This solves a long standing problem with large unconditional
        vector allocations for a certain class of enterprise devices
        which prevented server hibernation due to vector space
        exhaustion when the unused allocated vectors had to be migrated
        to CPU0 while unplugging all non boot CPUs.

     The code has been equipped with trace points and detailed debugfs
     information to aid analysis of the vector space"

* 'x86-apic-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (60 commits)
  x86/vector/msi: Select CONFIG_GENERIC_IRQ_RESERVATION_MODE
  PCI/MSI: Set MSI_FLAG_MUST_REACTIVATE in core code
  genirq: Add config option for reservation mode
  x86/vector: Use correct per cpu variable in free_moved_vector()
  x86/apic/vector: Ignore set_affinity call for inactive interrupts
  x86/apic: Fix spelling mistake: "symmectic" -> "symmetric"
  x86/apic: Use dead_cpu instead of current CPU when cleaning up
  ACPI/init: Invoke early ACPI initialization earlier
  x86/vector: Respect affinity mask in irq descriptor
  x86/irq: Simplify hotplug vector accounting
  x86/vector: Switch IOAPIC to global reservation mode
  x86/vector/msi: Switch to global reservation mode
  x86/vector: Handle managed interrupts proper
  x86/io_apic: Reevaluate vector configuration on activate()
  iommu/amd: Reevaluate vector configuration on activate()
  iommu/vt-d: Reevaluate vector configuration on activate()
  x86/apic/msi: Force reactivation of interrupts at startup time
  x86/vector: Untangle internal state from irq_cfg
  x86/vector: Compile SMP only code conditionally
  x86/apic: Remove unused callbacks
  ...
This commit is contained in:
Linus Torvalds
2017-11-13 18:29:23 -08:00
43 changed files with 1556 additions and 1380 deletions

View File

@@ -211,11 +211,7 @@ static inline int lapic_get_version(void)
*/
static inline int lapic_is_integrated(void)
{
#ifdef CONFIG_X86_64
return 1;
#else
return APIC_INTEGRATED(lapic_get_version());
#endif
}
/*
@@ -298,14 +294,11 @@ int get_physical_broadcast(void)
*/
int lapic_get_maxlvt(void)
{
unsigned int v;
v = apic_read(APIC_LVR);
/*
* - we always have APIC integrated on 64bit mode
* - 82489DXs do not report # of LVT entries
*/
return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2;
}
/*
@@ -1229,53 +1222,100 @@ void __init sync_Arb_IDs(void)
APIC_INT_LEVELTRIG | APIC_DM_INIT);
}
/*
* An initial setup of the virtual wire mode.
*/
void __init init_bsp_APIC(void)
enum apic_intr_mode_id apic_intr_mode;
static int __init apic_intr_mode_select(void)
{
unsigned int value;
/* Check kernel option */
if (disable_apic) {
pr_info("APIC disabled via kernel command line\n");
return APIC_PIC;
}
/*
* Don't do the setup now if we have a SMP BIOS as the
* through-I/O-APIC virtual wire mode might be active.
*/
if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
return;
/* Check BIOS */
#ifdef CONFIG_X86_64
/* On 64-bit, the APIC must be integrated, Check local APIC only */
if (!boot_cpu_has(X86_FEATURE_APIC)) {
disable_apic = 1;
pr_info("APIC disabled by BIOS\n");
return APIC_PIC;
}
#else
/* On 32-bit, the APIC may be integrated APIC or 82489DX */
/*
* Do not trust the local APIC being empty at bootup.
*/
clear_local_APIC();
/* Neither 82489DX nor integrated APIC ? */
if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) {
disable_apic = 1;
return APIC_PIC;
}
/*
* Enable APIC.
*/
value = apic_read(APIC_SPIV);
value &= ~APIC_VECTOR_MASK;
value |= APIC_SPIV_APIC_ENABLED;
#ifdef CONFIG_X86_32
/* This bit is reserved on P4/Xeon and should be cleared */
if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
(boot_cpu_data.x86 == 15))
value &= ~APIC_SPIV_FOCUS_DISABLED;
else
/* If the BIOS pretends there is an integrated APIC ? */
if (!boot_cpu_has(X86_FEATURE_APIC) &&
APIC_INTEGRATED(boot_cpu_apic_version)) {
disable_apic = 1;
pr_err(FW_BUG "Local APIC %d not detected, force emulation\n",
boot_cpu_physical_apicid);
return APIC_PIC;
}
#endif
value |= APIC_SPIV_FOCUS_DISABLED;
value |= SPURIOUS_APIC_VECTOR;
apic_write(APIC_SPIV, value);
/*
* Set up the virtual wire mode.
*/
apic_write(APIC_LVT0, APIC_DM_EXTINT);
value = APIC_DM_NMI;
if (!lapic_is_integrated()) /* 82489DX */
value |= APIC_LVT_LEVEL_TRIGGER;
if (apic_extnmi == APIC_EXTNMI_NONE)
value |= APIC_LVT_MASKED;
apic_write(APIC_LVT1, value);
/* Check MP table or ACPI MADT configuration */
if (!smp_found_config) {
disable_ioapic_support();
if (!acpi_lapic) {
pr_info("APIC: ACPI MADT or MP tables are not detected\n");
return APIC_VIRTUAL_WIRE_NO_CONFIG;
}
return APIC_VIRTUAL_WIRE;
}
#ifdef CONFIG_SMP
/* If SMP should be disabled, then really disable it! */
if (!setup_max_cpus) {
pr_info("APIC: SMP mode deactivated\n");
return APIC_SYMMETRIC_IO_NO_ROUTING;
}
if (read_apic_id() != boot_cpu_physical_apicid) {
panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
read_apic_id(), boot_cpu_physical_apicid);
/* Or can we switch back to PIC here? */
}
#endif
return APIC_SYMMETRIC_IO;
}
/* Init the interrupt delivery mode for the BSP */
void __init apic_intr_mode_init(void)
{
bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT);
apic_intr_mode = apic_intr_mode_select();
switch (apic_intr_mode) {
case APIC_PIC:
pr_info("APIC: Keep in PIC mode(8259)\n");
return;
case APIC_VIRTUAL_WIRE:
pr_info("APIC: Switch to virtual wire mode setup\n");
default_setup_apic_routing();
break;
case APIC_VIRTUAL_WIRE_NO_CONFIG:
pr_info("APIC: Switch to virtual wire mode setup with no configuration\n");
upmode = true;
default_setup_apic_routing();
break;
case APIC_SYMMETRIC_IO:
pr_info("APIC: Switch to symmetric I/O mode setup\n");
default_setup_apic_routing();
break;
case APIC_SYMMETRIC_IO_NO_ROUTING:
pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n");
break;
}
apic_bsp_setup(upmode);
}
static void lapic_setup_esr(void)
@@ -1499,7 +1539,9 @@ void setup_local_APIC(void)
value = APIC_DM_NMI;
else
value = APIC_DM_NMI | APIC_LVT_MASKED;
if (!lapic_is_integrated()) /* 82489DX */
/* Is 82489DX ? */
if (!lapic_is_integrated())
value |= APIC_LVT_LEVEL_TRIGGER;
apic_write(APIC_LVT1, value);
@@ -1885,8 +1927,8 @@ void __init init_apic_mappings(void)
* yeah -- we lie about apic_version
* in case if apic was disabled via boot option
* but it's not a problem for SMP compiled kernel
* since smp_sanity_check is prepared for such a case
* and disable smp mode
* since apic_intr_mode_select is prepared for such
* a case and disable smp mode
*/
boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
}
@@ -2242,44 +2284,6 @@ int hard_smp_processor_id(void)
return read_apic_id();
}
void default_init_apic_ldr(void)
{
unsigned long val;
apic_write(APIC_DFR, APIC_DFR_VALUE);
val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
apic_write(APIC_LDR, val);
}
int default_cpu_mask_to_apicid(const struct cpumask *mask,
struct irq_data *irqdata,
unsigned int *apicid)
{
unsigned int cpu = cpumask_first(mask);
if (cpu >= nr_cpu_ids)
return -EINVAL;
*apicid = per_cpu(x86_cpu_to_apicid, cpu);
irq_data_update_effective_affinity(irqdata, cpumask_of(cpu));
return 0;
}
int flat_cpu_mask_to_apicid(const struct cpumask *mask,
struct irq_data *irqdata,
unsigned int *apicid)
{
struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata);
unsigned long cpu_mask = cpumask_bits(mask)[0] & APIC_ALL_CPUS;
if (!cpu_mask)
return -EINVAL;
*apicid = (unsigned int)cpu_mask;
cpumask_bits(effmsk)[0] = cpu_mask;
return 0;
}
/*
* Override the generic EOI implementation with an optimized version.
* Only called during early boot when only one CPU is active and with
@@ -2322,72 +2326,27 @@ static void __init apic_bsp_up_setup(void)
* Returns:
* apic_id of BSP APIC
*/
int __init apic_bsp_setup(bool upmode)
void __init apic_bsp_setup(bool upmode)
{
int id;
connect_bsp_APIC();
if (upmode)
apic_bsp_up_setup();
setup_local_APIC();
if (x2apic_mode)
id = apic_read(APIC_LDR);
else
id = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
enable_IO_APIC();
end_local_APIC_setup();
irq_remap_enable_fault_handling();
setup_IO_APIC();
/* Setup local timer */
x86_init.timers.setup_percpu_clockev();
return id;
}
/*
* This initializes the IO-APIC and APIC hardware if this is
* a UP kernel.
*/
int __init APIC_init_uniprocessor(void)
{
if (disable_apic) {
pr_info("Apic disabled\n");
return -1;
}
#ifdef CONFIG_X86_64
if (!boot_cpu_has(X86_FEATURE_APIC)) {
disable_apic = 1;
pr_info("Apic disabled by BIOS\n");
return -1;
}
#else
if (!smp_found_config && !boot_cpu_has(X86_FEATURE_APIC))
return -1;
/*
* Complain if the BIOS pretends there is one.
*/
if (!boot_cpu_has(X86_FEATURE_APIC) &&
APIC_INTEGRATED(boot_cpu_apic_version)) {
pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
boot_cpu_physical_apicid);
return -1;
}
#endif
if (!smp_found_config)
disable_ioapic_support();
default_setup_apic_routing();
apic_bsp_setup(true);
return 0;
}
#ifdef CONFIG_UP_LATE_INIT
void __init up_late_init(void)
{
APIC_init_uniprocessor();
if (apic_intr_mode == APIC_PIC)
return;
/* Setup local timer */
x86_init.timers.setup_percpu_clockev();
}
#endif