Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar: "Misc fixes: - fix hotplug bugs - fix irq live lock - fix various topology handling bugs - fix APIC ACK ordering - fix PV iopl handling - fix speling - fix/tweak memcpy_mcsafe() return value - fix fbcon bug - remove stray prototypes" * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/msr: Remove unused native_read_tscp() x86/apic: Remove declaration of unused hw_nmi_is_cpu_stuck x86/oprofile/nmi: Add missing hotplug FROZEN handling x86/hpet: Use proper mask to modify hotplug action x86/apic/uv: Fix the hotplug notifier x86/apb/timer: Use proper mask to modify hotplug action x86/topology: Use total_cpus not nr_cpu_ids for logical packages x86/topology: Fix Intel HT disable x86/topology: Fix logical package mapping x86/irq: Cure live lock in fixup_irqs() x86/tsc: Prevent NULL pointer deref in calibrate_delay_is_known() x86/apic: Fix suspicious RCU usage in smp_trace_call_function_interrupt() x86/iopl: Fix iopl capability check on Xen PV x86/iopl/64: Properly context-switch IOPL on Xen PV selftests/x86: Add an iopl test x86/mm, x86/mce: Fix return type/value for memcpy_mcsafe() x86/video: Don't assume all FB devices are PCI devices arch/x86/irq: Purge useless handler declarations from hw_irq.h x86: Fix misspellings in comments
This commit is contained in:
@@ -956,7 +956,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
|
||||
|
||||
/*
|
||||
* Note that the LAPIC address is obtained from the MADT (32-bit value)
|
||||
* and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
|
||||
* and (optionally) overridden by a LAPIC_ADDR_OVR entry (64-bit value).
|
||||
*/
|
||||
|
||||
count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
|
||||
@@ -984,7 +984,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
|
||||
|
||||
/*
|
||||
* Note that the LAPIC address is obtained from the MADT (32-bit value)
|
||||
* and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
|
||||
* and (optionally) overridden by a LAPIC_ADDR_OVR entry (64-bit value).
|
||||
*/
|
||||
|
||||
count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
|
||||
|
@@ -221,7 +221,7 @@ static int apbt_cpuhp_notify(struct notifier_block *n,
|
||||
unsigned long cpu = (unsigned long)hcpu;
|
||||
struct apbt_dev *adev = &per_cpu(cpu_apbt_dev, cpu);
|
||||
|
||||
switch (action & 0xf) {
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_DEAD:
|
||||
dw_apb_clockevent_pause(adev->timer);
|
||||
if (system_state == SYSTEM_RUNNING) {
|
||||
|
@@ -1611,7 +1611,7 @@ void __init enable_IR_x2apic(void)
|
||||
legacy_pic->mask_all();
|
||||
mask_ioapic_entries();
|
||||
|
||||
/* If irq_remapping_prepare() succeded, try to enable it */
|
||||
/* If irq_remapping_prepare() succeeded, try to enable it */
|
||||
if (ir_stat >= 0)
|
||||
ir_stat = try_to_enable_IR();
|
||||
/* ir_stat contains the remap mode or an error code */
|
||||
|
@@ -213,6 +213,7 @@ update:
|
||||
*/
|
||||
cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
|
||||
d->move_in_progress = !cpumask_empty(d->old_domain);
|
||||
d->cfg.old_vector = d->move_in_progress ? d->cfg.vector : 0;
|
||||
d->cfg.vector = vector;
|
||||
cpumask_copy(d->domain, vector_cpumask);
|
||||
success:
|
||||
@@ -655,46 +656,97 @@ void irq_complete_move(struct irq_cfg *cfg)
|
||||
}
|
||||
|
||||
/*
|
||||
* Called with @desc->lock held and interrupts disabled.
|
||||
* Called from fixup_irqs() with @desc->lock held and interrupts disabled.
|
||||
*/
|
||||
void irq_force_complete_move(struct irq_desc *desc)
|
||||
{
|
||||
struct irq_data *irqdata = irq_desc_get_irq_data(desc);
|
||||
struct apic_chip_data *data = apic_chip_data(irqdata);
|
||||
struct irq_cfg *cfg = data ? &data->cfg : NULL;
|
||||
unsigned int cpu;
|
||||
|
||||
if (!cfg)
|
||||
return;
|
||||
|
||||
__irq_complete_move(cfg, cfg->vector);
|
||||
|
||||
/*
|
||||
* This is tricky. If the cleanup of @data->old_domain has not been
|
||||
* done yet, then the following setaffinity call will fail with
|
||||
* -EBUSY. This can leave the interrupt in a stale state.
|
||||
*
|
||||
* The cleanup cannot make progress because we hold @desc->lock. So in
|
||||
* case @data->old_domain is not yet cleaned up, we need to drop the
|
||||
* lock and acquire it again. @desc cannot go away, because the
|
||||
* hotplug code holds the sparse irq lock.
|
||||
* All CPUs are stuck in stop machine with interrupts disabled so
|
||||
* calling __irq_complete_move() would be completely pointless.
|
||||
*/
|
||||
raw_spin_lock(&vector_lock);
|
||||
/* Clean out all offline cpus (including ourself) first. */
|
||||
/*
|
||||
* Clean out all offline cpus (including the outgoing one) from the
|
||||
* old_domain mask.
|
||||
*/
|
||||
cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
|
||||
while (!cpumask_empty(data->old_domain)) {
|
||||
|
||||
/*
|
||||
* If move_in_progress is cleared and the old_domain mask is empty,
|
||||
* then there is nothing to cleanup. fixup_irqs() will take care of
|
||||
* the stale vectors on the outgoing cpu.
|
||||
*/
|
||||
if (!data->move_in_progress && cpumask_empty(data->old_domain)) {
|
||||
raw_spin_unlock(&vector_lock);
|
||||
raw_spin_unlock(&desc->lock);
|
||||
cpu_relax();
|
||||
raw_spin_lock(&desc->lock);
|
||||
/*
|
||||
* Reevaluate apic_chip_data. It might have been cleared after
|
||||
* we dropped @desc->lock.
|
||||
*/
|
||||
data = apic_chip_data(irqdata);
|
||||
if (!data)
|
||||
return;
|
||||
raw_spin_lock(&vector_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* 1) The interrupt is in move_in_progress state. That means that we
|
||||
* have not seen an interrupt since the io_apic was reprogrammed to
|
||||
* the new vector.
|
||||
*
|
||||
* 2) The interrupt has fired on the new vector, but the cleanup IPIs
|
||||
* have not been processed yet.
|
||||
*/
|
||||
if (data->move_in_progress) {
|
||||
/*
|
||||
* In theory there is a race:
|
||||
*
|
||||
* set_ioapic(new_vector) <-- Interrupt is raised before update
|
||||
* is effective, i.e. it's raised on
|
||||
* the old vector.
|
||||
*
|
||||
* So if the target cpu cannot handle that interrupt before
|
||||
* the old vector is cleaned up, we get a spurious interrupt
|
||||
* and in the worst case the ioapic irq line becomes stale.
|
||||
*
|
||||
* But in case of cpu hotplug this should be a non issue
|
||||
* because if the affinity update happens right before all
|
||||
* cpus rendevouz in stop machine, there is no way that the
|
||||
* interrupt can be blocked on the target cpu because all cpus
|
||||
* loops first with interrupts enabled in stop machine, so the
|
||||
* old vector is not yet cleaned up when the interrupt fires.
|
||||
*
|
||||
* So the only way to run into this issue is if the delivery
|
||||
* of the interrupt on the apic/system bus would be delayed
|
||||
* beyond the point where the target cpu disables interrupts
|
||||
* in stop machine. I doubt that it can happen, but at least
|
||||
* there is a theroretical chance. Virtualization might be
|
||||
* able to expose this, but AFAICT the IOAPIC emulation is not
|
||||
* as stupid as the real hardware.
|
||||
*
|
||||
* Anyway, there is nothing we can do about that at this point
|
||||
* w/o refactoring the whole fixup_irq() business completely.
|
||||
* We print at least the irq number and the old vector number,
|
||||
* so we have the necessary information when a problem in that
|
||||
* area arises.
|
||||
*/
|
||||
pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
|
||||
irqdata->irq, cfg->old_vector);
|
||||
}
|
||||
/*
|
||||
* If old_domain is not empty, then other cpus still have the irq
|
||||
* descriptor set in their vector array. Clean it up.
|
||||
*/
|
||||
for_each_cpu(cpu, data->old_domain)
|
||||
per_cpu(vector_irq, cpu)[cfg->old_vector] = VECTOR_UNUSED;
|
||||
|
||||
/* Cleanup the left overs of the (half finished) move */
|
||||
cpumask_clear(data->old_domain);
|
||||
data->move_in_progress = 0;
|
||||
raw_spin_unlock(&vector_lock);
|
||||
}
|
||||
#endif
|
||||
|
@@ -792,7 +792,8 @@ static int uv_scir_cpu_notify(struct notifier_block *self, unsigned long action,
|
||||
{
|
||||
long cpu = (long)hcpu;
|
||||
|
||||
switch (action) {
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_DOWN_FAILED:
|
||||
case CPU_ONLINE:
|
||||
uv_heartbeat_enable(cpu);
|
||||
break;
|
||||
@@ -860,7 +861,7 @@ int uv_set_vga_state(struct pci_dev *pdev, bool decode,
|
||||
*/
|
||||
void uv_cpu_init(void)
|
||||
{
|
||||
/* CPU 0 initilization will be done via uv_system_init. */
|
||||
/* CPU 0 initialization will be done via uv_system_init. */
|
||||
if (!uv_blade_info)
|
||||
return;
|
||||
|
||||
|
@@ -1088,7 +1088,7 @@ static int apm_get_battery_status(u_short which, u_short *status,
|
||||
* @device: identity of device
|
||||
* @enable: on/off
|
||||
*
|
||||
* Activate or deactive power management on either a specific device
|
||||
* Activate or deactivate power management on either a specific device
|
||||
* or the entire system (%APM_DEVICE_ALL).
|
||||
*/
|
||||
|
||||
|
@@ -85,7 +85,7 @@ static void init_amd_k5(struct cpuinfo_x86 *c)
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* General Systems BIOSen alias the cpu frequency registers
|
||||
* of the Elan at 0x000df000. Unfortuantly, one of the Linux
|
||||
* of the Elan at 0x000df000. Unfortunately, one of the Linux
|
||||
* drivers subsequently pokes it, and changes the CPU speed.
|
||||
* Workaround : Remove the unneeded alias.
|
||||
*/
|
||||
|
@@ -968,7 +968,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
|
||||
if (this_cpu->c_identify)
|
||||
this_cpu->c_identify(c);
|
||||
|
||||
/* Clear/Set all flags overriden by options, after probe */
|
||||
/* Clear/Set all flags overridden by options, after probe */
|
||||
for (i = 0; i < NCAPINTS; i++) {
|
||||
c->x86_capability[i] &= ~cpu_caps_cleared[i];
|
||||
c->x86_capability[i] |= cpu_caps_set[i];
|
||||
@@ -1028,7 +1028,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
|
||||
setup_pku(c);
|
||||
|
||||
/*
|
||||
* Clear/Set all flags overriden by options, need do it
|
||||
* Clear/Set all flags overridden by options, need do it
|
||||
* before following smp all cpus cap AND.
|
||||
*/
|
||||
for (i = 0; i < NCAPINTS; i++) {
|
||||
|
@@ -42,7 +42,7 @@ EXPORT_SYMBOL_GPL(mtrr_state);
|
||||
* "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
|
||||
* Opteron Processors" (26094 Rev. 3.30 February 2006), section
|
||||
* "13.2.1.2 SYSCFG Register": "The MtrrFixDramModEn bit should be set
|
||||
* to 1 during BIOS initalization of the fixed MTRRs, then cleared to
|
||||
* to 1 during BIOS initialization of the fixed MTRRs, then cleared to
|
||||
* 0 for operation."
|
||||
*/
|
||||
static inline void k8_check_syscfg_dram_mod_en(void)
|
||||
|
@@ -287,7 +287,7 @@ static __init void early_pci_serial_init(char *s)
|
||||
}
|
||||
|
||||
/*
|
||||
* Lastly, initalize the hardware
|
||||
* Lastly, initialize the hardware
|
||||
*/
|
||||
if (*s) {
|
||||
if (strcmp(s, "nocfg") == 0)
|
||||
|
@@ -8,7 +8,7 @@
|
||||
/*
|
||||
* The xstateregs_active() routine is the same as the regset_fpregs_active() routine,
|
||||
* as the "regset->n" for the xstate regset will be updated based on the feature
|
||||
* capabilites supported by the xsave.
|
||||
* capabilities supported by the xsave.
|
||||
*/
|
||||
int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
|
||||
{
|
||||
|
@@ -717,7 +717,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n,
|
||||
struct hpet_work_struct work;
|
||||
struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu);
|
||||
|
||||
switch (action & 0xf) {
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_ONLINE:
|
||||
INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work);
|
||||
init_completion(&work.complete);
|
||||
|
@@ -96,9 +96,14 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
|
||||
SYSCALL_DEFINE1(iopl, unsigned int, level)
|
||||
{
|
||||
struct pt_regs *regs = current_pt_regs();
|
||||
unsigned int old = (regs->flags >> 12) & 3;
|
||||
struct thread_struct *t = ¤t->thread;
|
||||
|
||||
/*
|
||||
* Careful: the IOPL bits in regs->flags are undefined under Xen PV
|
||||
* and changing them has no effect.
|
||||
*/
|
||||
unsigned int old = t->iopl >> X86_EFLAGS_IOPL_BIT;
|
||||
|
||||
if (level > 3)
|
||||
return -EINVAL;
|
||||
/* Trying to gain more privileges? */
|
||||
@@ -106,8 +111,9 @@ SYSCALL_DEFINE1(iopl, unsigned int, level)
|
||||
if (!capable(CAP_SYS_RAWIO))
|
||||
return -EPERM;
|
||||
}
|
||||
regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12);
|
||||
t->iopl = level << 12;
|
||||
regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
|
||||
(level << X86_EFLAGS_IOPL_BIT);
|
||||
t->iopl = level << X86_EFLAGS_IOPL_BIT;
|
||||
set_iopl_mask(t->iopl);
|
||||
|
||||
return 0;
|
||||
|
@@ -271,7 +271,7 @@ static int bzImage64_probe(const char *buf, unsigned long len)
|
||||
int ret = -ENOEXEC;
|
||||
struct setup_header *header;
|
||||
|
||||
/* kernel should be atleast two sectors long */
|
||||
/* kernel should be at least two sectors long */
|
||||
if (len < 2 * 512) {
|
||||
pr_err("File is too short to be a bzImage\n");
|
||||
return ret;
|
||||
|
@@ -609,9 +609,9 @@ static struct notifier_block kgdb_notifier = {
|
||||
};
|
||||
|
||||
/**
|
||||
* kgdb_arch_init - Perform any architecture specific initalization.
|
||||
* kgdb_arch_init - Perform any architecture specific initialization.
|
||||
*
|
||||
* This function will handle the initalization of any architecture
|
||||
* This function will handle the initialization of any architecture
|
||||
* specific callbacks.
|
||||
*/
|
||||
int kgdb_arch_init(void)
|
||||
|
@@ -226,7 +226,7 @@ static void kvm_setup_secondary_clock(void)
|
||||
* registered memory location. If the guest happens to shutdown, this memory
|
||||
* won't be valid. In cases like kexec, in which you install a new kernel, this
|
||||
* means a random memory location will be kept being written. So before any
|
||||
* kind of shutdown from our side, we unregister the clock by writting anything
|
||||
* kind of shutdown from our side, we unregister the clock by writing anything
|
||||
* that does not have the 'enable' bit set in the msr
|
||||
*/
|
||||
#ifdef CONFIG_KEXEC_CORE
|
||||
|
@@ -48,6 +48,7 @@
|
||||
#include <asm/syscalls.h>
|
||||
#include <asm/debugreg.h>
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/xen/hypervisor.h>
|
||||
|
||||
asmlinkage extern void ret_from_fork(void);
|
||||
|
||||
@@ -413,6 +414,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
|
||||
__switch_to_xtra(prev_p, next_p, tss);
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
/*
|
||||
* On Xen PV, IOPL bits in pt_regs->flags have no effect, and
|
||||
* current_pt_regs()->flags may not match the current task's
|
||||
* intended IOPL. We need to switch it manually.
|
||||
*/
|
||||
if (unlikely(static_cpu_has(X86_FEATURE_XENPV) &&
|
||||
prev->iopl != next->iopl))
|
||||
xen_set_iopl_mask(next->iopl);
|
||||
#endif
|
||||
|
||||
if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
|
||||
/*
|
||||
* AMD CPUs have a misfeature: SYSRET sets the SS selector but
|
||||
|
@@ -274,11 +274,6 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu)
|
||||
if (test_and_set_bit(pkg, physical_package_map))
|
||||
goto found;
|
||||
|
||||
if (pkg < __max_logical_packages) {
|
||||
set_bit(pkg, logical_package_map);
|
||||
physical_to_logical_pkg[pkg] = pkg;
|
||||
goto found;
|
||||
}
|
||||
new = find_first_zero_bit(logical_package_map, __max_logical_packages);
|
||||
if (new >= __max_logical_packages) {
|
||||
physical_to_logical_pkg[pkg] = -1;
|
||||
@@ -317,9 +312,27 @@ static void __init smp_init_package_map(void)
|
||||
/*
|
||||
* Today neither Intel nor AMD support heterogenous systems. That
|
||||
* might change in the future....
|
||||
*
|
||||
* While ideally we'd want '* smp_num_siblings' in the below @ncpus
|
||||
* computation, this won't actually work since some Intel BIOSes
|
||||
* report inconsistent HT data when they disable HT.
|
||||
*
|
||||
* In particular, they reduce the APIC-IDs to only include the cores,
|
||||
* but leave the CPUID topology to say there are (2) siblings.
|
||||
* This means we don't know how many threads there will be until
|
||||
* after the APIC enumeration.
|
||||
*
|
||||
* By not including this we'll sometimes over-estimate the number of
|
||||
* logical packages by the amount of !present siblings, but this is
|
||||
* still better than MAX_LOCAL_APIC.
|
||||
*
|
||||
* We use total_cpus not nr_cpu_ids because nr_cpu_ids can be limited
|
||||
* on the command line leading to a similar issue as the HT disable
|
||||
* problem because the hyperthreads are usually enumerated after the
|
||||
* primary cores.
|
||||
*/
|
||||
ncpus = boot_cpu_data.x86_max_cores * smp_num_siblings;
|
||||
__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
|
||||
ncpus = boot_cpu_data.x86_max_cores;
|
||||
__max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
|
||||
|
||||
/*
|
||||
* Possibly larger than what we need as the number of apic ids per
|
||||
|
@@ -881,7 +881,7 @@ void tsc_restore_sched_clock_state(void)
|
||||
local_irq_save(flags);
|
||||
|
||||
/*
|
||||
* We're comming out of suspend, there's no concurrency yet; don't
|
||||
* We're coming out of suspend, there's no concurrency yet; don't
|
||||
* bother being nice about the RCU stuff, just write to both
|
||||
* data fields.
|
||||
*/
|
||||
@@ -1306,11 +1306,15 @@ void __init tsc_init(void)
|
||||
unsigned long calibrate_delay_is_known(void)
|
||||
{
|
||||
int sibling, cpu = smp_processor_id();
|
||||
struct cpumask *mask = topology_core_cpumask(cpu);
|
||||
|
||||
if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
|
||||
return 0;
|
||||
|
||||
sibling = cpumask_any_but(topology_core_cpumask(cpu), cpu);
|
||||
if (!mask)
|
||||
return 0;
|
||||
|
||||
sibling = cpumask_any_but(mask, cpu);
|
||||
if (sibling < nr_cpu_ids)
|
||||
return cpu_data(sibling).loops_per_jiffy;
|
||||
return 0;
|
||||
|
Reference in New Issue
Block a user