Merge branch 'x86-bsp-hotplug-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 BSP hotplug changes from Ingo Molnar:
 "This tree enables CPU#0 (the boot processor) to be onlined/offlined on
  x86, just like any other CPU.  Enabled on Intel CPUs for now.

  Allowing this required the identification and fixing of latent CPU#0
  assumptions (such as CPU#0 initializations, etc.) in the x86
  architecture code, plus the identification of barriers to
  BSP-offlining, such as active PIC interrupts which can only be
  serviced on the BSP.

  It's behind a default-off option, and there's a debug option that
  allows the automatic testing of this feature.

  The motivation of this feature is to allow and prepare for true
  CPU-hotplug hardware support: recent changes to MCE support enable us
  to detect a deteriorating but not yet hard-failing L1/L2 cache on a
  CPU that could be soft-unplugged - or a failing L3 cache on a
  multi-socket system.

  Note that true hardware hot-plug is not yet fully enabled by this,
  because that requires a special platform wakeup sequence to be sent to
  the freshly powered up CPU#0.  Future patches for this are planned,
  once such a platform exists.  Chicken and egg"

* 'x86-bsp-hotplug-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86, topology: Debug CPU0 hotplug
  x86/i387.c: Initialize thread xstate only on CPU0 only once
  x86, hotplug: Handle retrigger irq by the first available CPU
  x86, hotplug: The first online processor saves the MTRR state
  x86, hotplug: During CPU0 online, enable x2apic, set_numa_node.
  x86, hotplug: Wake up CPU0 via NMI instead of INIT, SIPI, SIPI
  x86-32, hotplug: Add start_cpu0() entry point to head_32.S
  x86-64, hotplug: Add start_cpu0() entry point to head_64.S
  kernel/cpu.c: Add comment for priority in cpu_hotplug_pm_callback
  x86, hotplug, suspend: Online CPU0 for suspend or hibernate
  x86, hotplug: Support functions for CPU0 online/offline
  x86, topology: Don't offline CPU0 if any PIC irq can not be migrated out of it
  x86, Kconfig: Add config switch for CPU0 hotplug
  doc: Add x86 CPU0 online/offline feature
This commit is contained in:
Linus Torvalds
2012-12-11 19:56:33 -08:00
15 changed files with 437 additions and 42 deletions

View File

@@ -127,8 +127,8 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
atomic_t init_deasserted;
/*
* Report back to the Boot Processor.
* Running on AP.
* Report back to the Boot Processor during boot time or to the caller processor
* during CPU online.
*/
static void __cpuinit smp_callin(void)
{
@@ -140,15 +140,17 @@ static void __cpuinit smp_callin(void)
* we may get here before an INIT-deassert IPI reaches
* our local APIC. We have to wait for the IPI or we'll
* lock up on an APIC access.
*
* Since CPU0 is not wakened up by INIT, it doesn't wait for the IPI.
*/
if (apic->wait_for_init_deassert)
cpuid = smp_processor_id();
if (apic->wait_for_init_deassert && cpuid != 0)
apic->wait_for_init_deassert(&init_deasserted);
/*
* (This works even if the APIC is not enabled.)
*/
phys_id = read_apic_id();
cpuid = smp_processor_id();
if (cpumask_test_cpu(cpuid, cpu_callin_mask)) {
panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
phys_id, cpuid);
@@ -230,6 +232,8 @@ static void __cpuinit smp_callin(void)
cpumask_set_cpu(cpuid, cpu_callin_mask);
}
static int cpu0_logical_apicid;
static int enable_start_cpu0;
/*
* Activate a secondary processor.
*/
@@ -245,6 +249,8 @@ notrace static void __cpuinit start_secondary(void *unused)
preempt_disable();
smp_callin();
enable_start_cpu0 = 0;
#ifdef CONFIG_X86_32
/* switch away from the initial page table */
load_cr3(swapper_pg_dir);
@@ -281,19 +287,30 @@ notrace static void __cpuinit start_secondary(void *unused)
cpu_idle();
}
void __init smp_store_boot_cpu_info(void)
{
int id = 0; /* CPU 0 */
struct cpuinfo_x86 *c = &cpu_data(id);
*c = boot_cpu_data;
c->cpu_index = id;
}
/*
* The bootstrap kernel entry code has set these up. Save them for
* a given CPU
*/
void __cpuinit smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = &cpu_data(id);
*c = boot_cpu_data;
c->cpu_index = id;
if (id != 0)
identify_secondary_cpu(c);
/*
* During boot time, CPU0 has this setup already. Save the info when
* bringing up AP or offlined CPU0.
*/
identify_secondary_cpu(c);
}
static bool __cpuinit
@@ -483,7 +500,7 @@ void __inquire_remote_apic(int apicid)
* won't ... remember to clear down the APIC, etc later.
*/
int __cpuinit
wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
{
unsigned long send_status, accept_status = 0;
int maxlvt;
@@ -491,7 +508,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
/* Target chip */
/* Boot on the stack */
/* Kick the second */
apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid);
apic_icr_write(APIC_DM_NMI | apic->dest_logical, apicid);
pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
@@ -651,6 +668,63 @@ static void __cpuinit announce_cpu(int cpu, int apicid)
node, cpu, apicid);
}
static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs)
{
int cpu;
cpu = smp_processor_id();
if (cpu == 0 && !cpu_online(cpu) && enable_start_cpu0)
return NMI_HANDLED;
return NMI_DONE;
}
/*
* Wake up AP by INIT, INIT, STARTUP sequence.
*
* Instead of waiting for STARTUP after INITs, BSP will execute the BIOS
* boot-strap code which is not a desired behavior for waking up BSP. To
* void the boot-strap code, wake up CPU0 by NMI instead.
*
* This works to wake up soft offlined CPU0 only. If CPU0 is hard offlined
* (i.e. physically hot removed and then hot added), NMI won't wake it up.
* We'll change this code in the future to wake up hard offlined CPU0 if
* real platform and request are available.
*/
static int __cpuinit
wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid,
int *cpu0_nmi_registered)
{
int id;
int boot_error;
/*
* Wake up AP by INIT, INIT, STARTUP sequence.
*/
if (cpu)
return wakeup_secondary_cpu_via_init(apicid, start_ip);
/*
* Wake up BSP by nmi.
*
* Register a NMI handler to help wake up CPU0.
*/
boot_error = register_nmi_handler(NMI_LOCAL,
wakeup_cpu0_nmi, 0, "wake_cpu0");
if (!boot_error) {
enable_start_cpu0 = 1;
*cpu0_nmi_registered = 1;
if (apic->dest_logical == APIC_DEST_LOGICAL)
id = cpu0_logical_apicid;
else
id = apicid;
boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip);
}
return boot_error;
}
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -666,6 +740,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
unsigned long boot_error = 0;
int timeout;
int cpu0_nmi_registered = 0;
/* Just in case we booted with a single CPU. */
alternatives_enable_smp();
@@ -713,13 +788,16 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
}
/*
* Kick the secondary CPU. Use the method in the APIC driver
* if it's defined - or use an INIT boot APIC message otherwise:
* Wake up a CPU in difference cases:
* - Use the method in the APIC driver if it's defined
* Otherwise,
* - Use an INIT boot APIC message for APs or NMI for BSP.
*/
if (apic->wakeup_secondary_cpu)
boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
else
boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
&cpu0_nmi_registered);
if (!boot_error) {
/*
@@ -784,6 +862,13 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
*/
smpboot_restore_warm_reset_vector();
}
/*
* Clean up the nmi handler. Do this after the callin and callout sync
* to avoid impact of possible long unregister time.
*/
if (cpu0_nmi_registered)
unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");
return boot_error;
}
@@ -797,7 +882,7 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle)
pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu);
if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
if (apicid == BAD_APICID ||
!physid_isset(apicid, phys_cpu_present_map) ||
!apic->apic_id_valid(apicid)) {
pr_err("%s: bad cpu %d\n", __func__, cpu);
@@ -995,7 +1080,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
/*
* Setup boot CPU information
*/
smp_store_cpu_info(0); /* Final full version of the data */
smp_store_boot_cpu_info(); /* Final full version of the data */
cpumask_copy(cpu_callin_mask, cpumask_of(0));
mb();
@@ -1031,6 +1116,11 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
*/
setup_local_APIC();
if (x2apic_mode)
cpu0_logical_apicid = apic_read(APIC_LDR);
else
cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
/*
* Enable IO APIC before setting up error vector
*/
@@ -1219,19 +1309,6 @@ void cpu_disable_common(void)
int native_cpu_disable(void)
{
int cpu = smp_processor_id();
/*
* Perhaps use cpufreq to drop frequency, but that could go
* into generic code.
*
* We won't take down the boot processor on i386 due to some
* interrupts only being able to be serviced by the BSP.
* Especially so if we're not using an IOAPIC -zwane
*/
if (cpu == 0)
return -EBUSY;
clear_local_APIC();
cpu_disable_common();
@@ -1271,6 +1348,14 @@ void play_dead_common(void)
local_irq_disable();
}
static bool wakeup_cpu0(void)
{
if (smp_processor_id() == 0 && enable_start_cpu0)
return true;
return false;
}
/*
* We need to flush the caches before going to sleep, lest we have
* dirty data in our caches when we come back up.
@@ -1334,6 +1419,11 @@ static inline void mwait_play_dead(void)
__monitor(mwait_ptr, 0, 0);
mb();
__mwait(eax, 0);
/*
* If NMI wants to wake up CPU0, start CPU0.
*/
if (wakeup_cpu0())
start_cpu0();
}
}
@@ -1344,6 +1434,11 @@ static inline void hlt_play_dead(void)
while (1) {
native_halt();
/*
* If NMI wants to wake up CPU0, start CPU0.
*/
if (wakeup_cpu0())
start_cpu0();
}
}