Merge branch 'master' into for-next
Conflicts: drivers/devfreq/exynos4_bus.c Sync with Linus' tree to be able to apply patches that are against newer code (mvneta).
This commit is contained in:
@@ -9,7 +9,6 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
|
||||
ifdef CONFIG_FUNCTION_TRACER
|
||||
# Do not profile debug and lowlevel utilities
|
||||
CFLAGS_REMOVE_tsc.o = -pg
|
||||
CFLAGS_REMOVE_rtc.o = -pg
|
||||
CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
|
||||
CFLAGS_REMOVE_pvclock.o = -pg
|
||||
CFLAGS_REMOVE_kvmclock.o = -pg
|
||||
@@ -62,6 +61,7 @@ obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
|
||||
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
|
||||
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
|
||||
obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
|
||||
obj-$(CONFIG_X86_TSC) += trace_clock.o
|
||||
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
|
||||
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
|
||||
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
|
||||
|
@@ -574,6 +574,12 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
|
||||
|
||||
return irq;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(acpi_register_gsi);
|
||||
|
||||
void acpi_unregister_gsi(u32 gsi)
|
||||
{
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
|
||||
|
||||
void __init acpi_set_irq_model_pic(void)
|
||||
{
|
||||
@@ -1700,3 +1706,9 @@ int __acpi_release_global_lock(unsigned int *lock)
|
||||
} while (unlikely (val != old));
|
||||
return old & 0x1;
|
||||
}
|
||||
|
||||
void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
|
||||
{
|
||||
e820_add_region(addr, size, E820_ACPI);
|
||||
update_e820();
|
||||
}
|
||||
|
@@ -101,6 +101,8 @@ static int __init acpi_sleep_setup(char *str)
|
||||
#endif
|
||||
if (strncmp(str, "nonvs", 5) == 0)
|
||||
acpi_nvs_nosave();
|
||||
if (strncmp(str, "nonvs_s3", 8) == 0)
|
||||
acpi_nvs_nosave_s3();
|
||||
if (strncmp(str, "old_ordering", 12) == 0)
|
||||
acpi_old_suspend_ordering();
|
||||
str = strchr(str, ',');
|
||||
|
@@ -90,21 +90,6 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
|
||||
*/
|
||||
DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
|
||||
|
||||
/*
|
||||
* Knob to control our willingness to enable the local APIC.
|
||||
*
|
||||
* +1=force-enable
|
||||
*/
|
||||
static int force_enable_local_apic __initdata;
|
||||
/*
|
||||
* APIC command line parameters
|
||||
*/
|
||||
static int __init parse_lapic(char *arg)
|
||||
{
|
||||
force_enable_local_apic = 1;
|
||||
return 0;
|
||||
}
|
||||
early_param("lapic", parse_lapic);
|
||||
/* Local APIC was disabled by the BIOS and enabled by the kernel */
|
||||
static int enabled_via_apicbase;
|
||||
|
||||
@@ -133,6 +118,25 @@ static inline void imcr_apic_to_pic(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Knob to control our willingness to enable the local APIC.
|
||||
*
|
||||
* +1=force-enable
|
||||
*/
|
||||
static int force_enable_local_apic __initdata;
|
||||
/*
|
||||
* APIC command line parameters
|
||||
*/
|
||||
static int __init parse_lapic(char *arg)
|
||||
{
|
||||
if (config_enabled(CONFIG_X86_32) && !arg)
|
||||
force_enable_local_apic = 1;
|
||||
else if (!strncmp(arg, "notscdeadline", 13))
|
||||
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
|
||||
return 0;
|
||||
}
|
||||
early_param("lapic", parse_lapic);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static int apic_calibrate_pmtmr __initdata;
|
||||
static __init int setup_apicpmtimer(char *s)
|
||||
@@ -315,6 +319,7 @@ int lapic_get_maxlvt(void)
|
||||
|
||||
/* Clock divisor */
|
||||
#define APIC_DIVISOR 16
|
||||
#define TSC_DIVISOR 32
|
||||
|
||||
/*
|
||||
* This function sets up the local APIC timer, with a timeout of
|
||||
@@ -333,6 +338,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
|
||||
lvtt_value = LOCAL_TIMER_VECTOR;
|
||||
if (!oneshot)
|
||||
lvtt_value |= APIC_LVT_TIMER_PERIODIC;
|
||||
else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
|
||||
lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE;
|
||||
|
||||
if (!lapic_is_integrated())
|
||||
lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
|
||||
|
||||
@@ -341,6 +349,11 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
|
||||
|
||||
apic_write(APIC_LVTT, lvtt_value);
|
||||
|
||||
if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
|
||||
printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Divide PICLK by 16
|
||||
*/
|
||||
@@ -453,6 +466,16 @@ static int lapic_next_event(unsigned long delta,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int lapic_next_deadline(unsigned long delta,
|
||||
struct clock_event_device *evt)
|
||||
{
|
||||
u64 tsc;
|
||||
|
||||
rdtscll(tsc);
|
||||
wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup the lapic timer in periodic or oneshot mode
|
||||
*/
|
||||
@@ -533,7 +556,15 @@ static void __cpuinit setup_APIC_timer(void)
|
||||
memcpy(levt, &lapic_clockevent, sizeof(*levt));
|
||||
levt->cpumask = cpumask_of(smp_processor_id());
|
||||
|
||||
clockevents_register_device(levt);
|
||||
if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
|
||||
levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |
|
||||
CLOCK_EVT_FEAT_DUMMY);
|
||||
levt->set_next_event = lapic_next_deadline;
|
||||
clockevents_config_and_register(levt,
|
||||
(tsc_khz / TSC_DIVISOR) * 1000,
|
||||
0xF, ~0UL);
|
||||
} else
|
||||
clockevents_register_device(levt);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -661,7 +692,9 @@ static int __init calibrate_APIC_clock(void)
|
||||
* in the clockevent structure and return.
|
||||
*/
|
||||
|
||||
if (lapic_timer_frequency) {
|
||||
if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
|
||||
return 0;
|
||||
} else if (lapic_timer_frequency) {
|
||||
apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
|
||||
lapic_timer_frequency);
|
||||
lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
|
||||
@@ -674,6 +707,9 @@ static int __init calibrate_APIC_clock(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
|
||||
"calibrating APIC timer ...\n");
|
||||
|
||||
local_irq_disable();
|
||||
|
||||
/* Replace the global interrupt handler */
|
||||
@@ -811,9 +847,6 @@ void __init setup_boot_APIC_clock(void)
|
||||
return;
|
||||
}
|
||||
|
||||
apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
|
||||
"calibrating APIC timer ...\n");
|
||||
|
||||
if (calibrate_APIC_clock()) {
|
||||
/* No broadcast on UP ! */
|
||||
if (num_possible_cpus() > 1)
|
||||
|
@@ -22,6 +22,7 @@
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/delay.h>
|
||||
|
||||
#include <asm/numachip/numachip.h>
|
||||
#include <asm/numachip/numachip_csr.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/apic.h>
|
||||
@@ -179,6 +180,7 @@ static int __init numachip_system_init(void)
|
||||
return 0;
|
||||
|
||||
x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
|
||||
x86_init.pci.arch_init = pci_numachip_init;
|
||||
|
||||
map_csrs();
|
||||
|
||||
|
@@ -234,11 +234,11 @@ int __init arch_early_irq_init(void)
|
||||
zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
|
||||
/*
|
||||
* For legacy IRQ's, start with assigning irq0 to irq15 to
|
||||
* IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
|
||||
* IRQ0_VECTOR to IRQ15_VECTOR for all cpu's.
|
||||
*/
|
||||
if (i < legacy_pic->nr_legacy_irqs) {
|
||||
cfg[i].vector = IRQ0_VECTOR + i;
|
||||
cpumask_set_cpu(0, cfg[i].domain);
|
||||
cpumask_setall(cfg[i].domain);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1141,7 +1141,8 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
|
||||
* allocation for the members that are not used anymore.
|
||||
*/
|
||||
cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask);
|
||||
cfg->move_in_progress = 1;
|
||||
cfg->move_in_progress =
|
||||
cpumask_intersects(cfg->old_domain, cpu_online_mask);
|
||||
cpumask_and(cfg->domain, cfg->domain, tmp_mask);
|
||||
break;
|
||||
}
|
||||
@@ -1172,8 +1173,9 @@ next:
|
||||
current_vector = vector;
|
||||
current_offset = offset;
|
||||
if (cfg->vector) {
|
||||
cfg->move_in_progress = 1;
|
||||
cpumask_copy(cfg->old_domain, cfg->domain);
|
||||
cfg->move_in_progress =
|
||||
cpumask_intersects(cfg->old_domain, cpu_online_mask);
|
||||
}
|
||||
for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
|
||||
per_cpu(vector_irq, new_cpu)[vector] = irq;
|
||||
@@ -1241,12 +1243,6 @@ void __setup_vector_irq(int cpu)
|
||||
cfg = irq_get_chip_data(irq);
|
||||
if (!cfg)
|
||||
continue;
|
||||
/*
|
||||
* If it is a legacy IRQ handled by the legacy PIC, this cpu
|
||||
* will be part of the irq_cfg's domain.
|
||||
*/
|
||||
if (irq < legacy_pic->nr_legacy_irqs && !IO_APIC_IRQ(irq))
|
||||
cpumask_set_cpu(cpu, cfg->domain);
|
||||
|
||||
if (!cpumask_test_cpu(cpu, cfg->domain))
|
||||
continue;
|
||||
@@ -1356,16 +1352,6 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
|
||||
if (!IO_APIC_IRQ(irq))
|
||||
return;
|
||||
|
||||
/*
|
||||
* For legacy irqs, cfg->domain starts with cpu 0. Now that IO-APIC
|
||||
* can handle this irq and the apic driver is finialized at this point,
|
||||
* update the cfg->domain.
|
||||
*/
|
||||
if (irq < legacy_pic->nr_legacy_irqs &&
|
||||
cpumask_equal(cfg->domain, cpumask_of(0)))
|
||||
apic->vector_allocation_domain(0, cfg->domain,
|
||||
apic->target_cpus());
|
||||
|
||||
if (assign_irq_vector(irq, cfg, apic->target_cpus()))
|
||||
return;
|
||||
|
||||
@@ -2199,9 +2185,11 @@ static int ioapic_retrigger_irq(struct irq_data *data)
|
||||
{
|
||||
struct irq_cfg *cfg = data->chip_data;
|
||||
unsigned long flags;
|
||||
int cpu;
|
||||
|
||||
raw_spin_lock_irqsave(&vector_lock, flags);
|
||||
apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
|
||||
cpu = cpumask_first_and(cfg->domain, cpu_online_mask);
|
||||
apic->send_IPI_mask(cpumask_of(cpu), cfg->vector);
|
||||
raw_spin_unlock_irqrestore(&vector_lock, flags);
|
||||
|
||||
return 1;
|
||||
@@ -3317,8 +3305,9 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
|
||||
int ret;
|
||||
|
||||
if (irq_remapping_enabled) {
|
||||
if (!setup_hpet_msi_remapped(irq, id))
|
||||
return -1;
|
||||
ret = setup_hpet_msi_remapped(irq, id);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = msi_compose_msg(NULL, irq, &msg, id);
|
||||
|
@@ -304,7 +304,7 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
/* get information required for multi-node processors */
|
||||
if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
|
||||
if (cpu_has_topoext) {
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
|
||||
@@ -631,6 +631,20 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The way access filter has a performance penalty on some workloads.
|
||||
* Disable it on the affected CPUs.
|
||||
*/
|
||||
if ((c->x86 == 0x15) &&
|
||||
(c->x86_model >= 0x02) && (c->x86_model < 0x20)) {
|
||||
u64 val;
|
||||
|
||||
if (!rdmsrl_safe(0xc0011021, &val) && !(val & 0x1E)) {
|
||||
val |= 0x1E;
|
||||
wrmsrl_safe(0xc0011021, val);
|
||||
}
|
||||
}
|
||||
|
||||
cpu_detect_cache_sizes(c);
|
||||
|
||||
/* Multi core CPU? */
|
||||
@@ -643,12 +657,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
|
||||
detect_ht(c);
|
||||
#endif
|
||||
|
||||
if (c->extended_cpuid_level >= 0x80000006) {
|
||||
if (cpuid_edx(0x80000006) & 0xf000)
|
||||
num_cache_leaves = 4;
|
||||
else
|
||||
num_cache_leaves = 3;
|
||||
}
|
||||
init_amd_cacheinfo(c);
|
||||
|
||||
if (c->x86 >= 0xf)
|
||||
set_cpu_cap(c, X86_FEATURE_K8);
|
||||
@@ -739,9 +748,6 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
|
||||
|
||||
static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (!cpu_has_invlpg)
|
||||
return;
|
||||
|
||||
tlb_flushall_shift = 5;
|
||||
|
||||
if (c->x86 <= 0x11)
|
||||
|
@@ -106,54 +106,18 @@ static void __init check_hlt(void)
|
||||
pr_cont("OK\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Most 386 processors have a bug where a POPAD can lock the
|
||||
* machine even from user space.
|
||||
*/
|
||||
|
||||
static void __init check_popad(void)
|
||||
{
|
||||
#ifndef CONFIG_X86_POPAD_OK
|
||||
int res, inp = (int) &res;
|
||||
|
||||
pr_info("Checking for popad bug... ");
|
||||
__asm__ __volatile__(
|
||||
"movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
|
||||
: "=&a" (res)
|
||||
: "d" (inp)
|
||||
: "ecx", "edi");
|
||||
/*
|
||||
* If this fails, it means that any user program may lock the
|
||||
* CPU hard. Too bad.
|
||||
*/
|
||||
if (res != 12345678)
|
||||
pr_cont("Buggy\n");
|
||||
else
|
||||
pr_cont("OK\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether we are able to run this kernel safely on SMP.
|
||||
*
|
||||
* - In order to run on a i386, we need to be compiled for i386
|
||||
* (for due to lack of "invlpg" and working WP on a i386)
|
||||
* - i386 is no longer supported.
|
||||
* - In order to run on anything without a TSC, we need to be
|
||||
* compiled for a i486.
|
||||
*/
|
||||
|
||||
static void __init check_config(void)
|
||||
{
|
||||
/*
|
||||
* We'd better not be a i386 if we're configured to use some
|
||||
* i486+ only features! (WP works in supervisor mode and the
|
||||
* new "invlpg" and "bswap" instructions)
|
||||
*/
|
||||
#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || \
|
||||
defined(CONFIG_X86_BSWAP)
|
||||
if (boot_cpu_data.x86 == 3)
|
||||
if (boot_cpu_data.x86 < 4)
|
||||
panic("Kernel requires i486+ for 'invlpg' and other features");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -166,7 +130,6 @@ void __init check_bugs(void)
|
||||
#endif
|
||||
check_config();
|
||||
check_hlt();
|
||||
check_popad();
|
||||
init_utsname()->machine[1] =
|
||||
'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
|
||||
alternative_instructions();
|
||||
|
@@ -1173,15 +1173,6 @@ DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
|
||||
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
|
||||
#endif
|
||||
|
||||
/* Make sure %fs and %gs are initialized properly in idle threads */
|
||||
struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
|
||||
{
|
||||
memset(regs, 0, sizeof(struct pt_regs));
|
||||
regs->fs = __KERNEL_PERCPU;
|
||||
regs->gs = __KERNEL_STACK_CANARY;
|
||||
|
||||
return regs;
|
||||
}
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
/*
|
||||
@@ -1237,7 +1228,7 @@ void __cpuinit cpu_init(void)
|
||||
oist = &per_cpu(orig_ist, cpu);
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
if (cpu != 0 && this_cpu_read(numa_node) == 0 &&
|
||||
if (this_cpu_read(numa_node) == 0 &&
|
||||
early_cpu_to_node(cpu) != NUMA_NO_NODE)
|
||||
set_numa_node(early_cpu_to_node(cpu));
|
||||
#endif
|
||||
@@ -1269,8 +1260,7 @@ void __cpuinit cpu_init(void)
|
||||
barrier();
|
||||
|
||||
x86_configure_nx();
|
||||
if (cpu != 0)
|
||||
enable_x2apic();
|
||||
enable_x2apic();
|
||||
|
||||
/*
|
||||
* set up and load the per-CPU TSS
|
||||
|
@@ -612,10 +612,6 @@ static void __cpuinit intel_tlb_lookup(const unsigned char desc)
|
||||
|
||||
static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (!cpu_has_invlpg) {
|
||||
tlb_flushall_shift = -1;
|
||||
return;
|
||||
}
|
||||
switch ((c->x86 << 8) + c->x86_model) {
|
||||
case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
|
||||
case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
|
||||
|
@@ -538,7 +538,11 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
|
||||
unsigned edx;
|
||||
|
||||
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
|
||||
amd_cpuid4(index, &eax, &ebx, &ecx);
|
||||
if (cpu_has_topoext)
|
||||
cpuid_count(0x8000001d, index, &eax.full,
|
||||
&ebx.full, &ecx.full, &edx);
|
||||
else
|
||||
amd_cpuid4(index, &eax, &ebx, &ecx);
|
||||
amd_init_l3_cache(this_leaf, index);
|
||||
} else {
|
||||
cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
|
||||
@@ -557,21 +561,39 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __cpuinit find_num_cache_leaves(void)
|
||||
static int __cpuinit find_num_cache_leaves(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
unsigned int eax, ebx, ecx, edx, op;
|
||||
union _cpuid4_leaf_eax cache_eax;
|
||||
int i = -1;
|
||||
|
||||
if (c->x86_vendor == X86_VENDOR_AMD)
|
||||
op = 0x8000001d;
|
||||
else
|
||||
op = 4;
|
||||
|
||||
do {
|
||||
++i;
|
||||
/* Do cpuid(4) loop to find out num_cache_leaves */
|
||||
cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
|
||||
/* Do cpuid(op) loop to find out num_cache_leaves */
|
||||
cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
|
||||
cache_eax.full = eax;
|
||||
} while (cache_eax.split.type != CACHE_TYPE_NULL);
|
||||
return i;
|
||||
}
|
||||
|
||||
void __cpuinit init_amd_cacheinfo(struct cpuinfo_x86 *c)
|
||||
{
|
||||
|
||||
if (cpu_has_topoext) {
|
||||
num_cache_leaves = find_num_cache_leaves(c);
|
||||
} else if (c->extended_cpuid_level >= 0x80000006) {
|
||||
if (cpuid_edx(0x80000006) & 0xf000)
|
||||
num_cache_leaves = 4;
|
||||
else
|
||||
num_cache_leaves = 3;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
|
||||
{
|
||||
/* Cache sizes */
|
||||
@@ -588,7 +610,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
|
||||
|
||||
if (is_initialized == 0) {
|
||||
/* Init num_cache_leaves from boot CPU */
|
||||
num_cache_leaves = find_num_cache_leaves();
|
||||
num_cache_leaves = find_num_cache_leaves(c);
|
||||
is_initialized++;
|
||||
}
|
||||
|
||||
@@ -728,12 +750,36 @@ static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
|
||||
static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
|
||||
{
|
||||
struct _cpuid4_info *this_leaf;
|
||||
int ret, i, sibling;
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
int i, sibling;
|
||||
|
||||
ret = 0;
|
||||
if (index == 3) {
|
||||
ret = 1;
|
||||
if (cpu_has_topoext) {
|
||||
unsigned int apicid, nshared, first, last;
|
||||
|
||||
if (!per_cpu(ici_cpuid4_info, cpu))
|
||||
return 0;
|
||||
|
||||
this_leaf = CPUID4_INFO_IDX(cpu, index);
|
||||
nshared = this_leaf->base.eax.split.num_threads_sharing + 1;
|
||||
apicid = cpu_data(cpu).apicid;
|
||||
first = apicid - (apicid % nshared);
|
||||
last = first + nshared - 1;
|
||||
|
||||
for_each_online_cpu(i) {
|
||||
apicid = cpu_data(i).apicid;
|
||||
if ((apicid < first) || (apicid > last))
|
||||
continue;
|
||||
if (!per_cpu(ici_cpuid4_info, i))
|
||||
continue;
|
||||
this_leaf = CPUID4_INFO_IDX(i, index);
|
||||
|
||||
for_each_online_cpu(sibling) {
|
||||
apicid = cpu_data(sibling).apicid;
|
||||
if ((apicid < first) || (apicid > last))
|
||||
continue;
|
||||
set_bit(sibling, this_leaf->shared_cpu_map);
|
||||
}
|
||||
}
|
||||
} else if (index == 3) {
|
||||
for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
|
||||
if (!per_cpu(ici_cpuid4_info, i))
|
||||
continue;
|
||||
@@ -744,21 +790,10 @@ static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
|
||||
set_bit(sibling, this_leaf->shared_cpu_map);
|
||||
}
|
||||
}
|
||||
} else if ((c->x86 == 0x15) && ((index == 1) || (index == 2))) {
|
||||
ret = 1;
|
||||
for_each_cpu(i, cpu_sibling_mask(cpu)) {
|
||||
if (!per_cpu(ici_cpuid4_info, i))
|
||||
continue;
|
||||
this_leaf = CPUID4_INFO_IDX(i, index);
|
||||
for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
|
||||
if (!cpu_online(sibling))
|
||||
continue;
|
||||
set_bit(sibling, this_leaf->shared_cpu_map);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else
|
||||
return 0;
|
||||
|
||||
return ret;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
|
||||
|
@@ -24,8 +24,6 @@ struct mce_bank {
|
||||
int mce_severity(struct mce *a, int tolerant, char **msg);
|
||||
struct dentry *mce_get_debugfs_dir(void);
|
||||
|
||||
extern int mce_ser;
|
||||
|
||||
extern struct mce_bank *mce_banks;
|
||||
|
||||
#ifdef CONFIG_X86_MCE_INTEL
|
||||
|
@@ -193,9 +193,9 @@ int mce_severity(struct mce *m, int tolerant, char **msg)
|
||||
continue;
|
||||
if ((m->mcgstatus & s->mcgmask) != s->mcgres)
|
||||
continue;
|
||||
if (s->ser == SER_REQUIRED && !mce_ser)
|
||||
if (s->ser == SER_REQUIRED && !mca_cfg.ser)
|
||||
continue;
|
||||
if (s->ser == NO_SER && mce_ser)
|
||||
if (s->ser == NO_SER && mca_cfg.ser)
|
||||
continue;
|
||||
if (s->context && ctx != s->context)
|
||||
continue;
|
||||
|
@@ -58,34 +58,26 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/mce.h>
|
||||
|
||||
int mce_disabled __read_mostly;
|
||||
|
||||
#define SPINUNIT 100 /* 100ns */
|
||||
|
||||
atomic_t mce_entry;
|
||||
|
||||
DEFINE_PER_CPU(unsigned, mce_exception_count);
|
||||
|
||||
/*
|
||||
* Tolerant levels:
|
||||
* 0: always panic on uncorrected errors, log corrected errors
|
||||
* 1: panic or SIGBUS on uncorrected errors, log corrected errors
|
||||
* 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
|
||||
* 3: never panic or SIGBUS, log all errors (for testing only)
|
||||
*/
|
||||
static int tolerant __read_mostly = 1;
|
||||
static int banks __read_mostly;
|
||||
static int rip_msr __read_mostly;
|
||||
static int mce_bootlog __read_mostly = -1;
|
||||
static int monarch_timeout __read_mostly = -1;
|
||||
static int mce_panic_timeout __read_mostly;
|
||||
static int mce_dont_log_ce __read_mostly;
|
||||
int mce_cmci_disabled __read_mostly;
|
||||
int mce_ignore_ce __read_mostly;
|
||||
int mce_ser __read_mostly;
|
||||
int mce_bios_cmci_threshold __read_mostly;
|
||||
struct mce_bank *mce_banks __read_mostly;
|
||||
|
||||
struct mce_bank *mce_banks __read_mostly;
|
||||
struct mca_config mca_cfg __read_mostly = {
|
||||
.bootlog = -1,
|
||||
/*
|
||||
* Tolerant levels:
|
||||
* 0: always panic on uncorrected errors, log corrected errors
|
||||
* 1: panic or SIGBUS on uncorrected errors, log corrected errors
|
||||
* 2: SIGBUS or log uncorrected errors (if possible), log corr. errors
|
||||
* 3: never panic or SIGBUS, log all errors (for testing only)
|
||||
*/
|
||||
.tolerant = 1,
|
||||
.monarch_timeout = -1
|
||||
};
|
||||
|
||||
/* User mode helper program triggered by machine check event */
|
||||
static unsigned long mce_need_notify;
|
||||
@@ -302,7 +294,7 @@ static void wait_for_panic(void)
|
||||
while (timeout-- > 0)
|
||||
udelay(1);
|
||||
if (panic_timeout == 0)
|
||||
panic_timeout = mce_panic_timeout;
|
||||
panic_timeout = mca_cfg.panic_timeout;
|
||||
panic("Panicing machine check CPU died");
|
||||
}
|
||||
|
||||
@@ -360,7 +352,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
|
||||
pr_emerg(HW_ERR "Machine check: %s\n", exp);
|
||||
if (!fake_panic) {
|
||||
if (panic_timeout == 0)
|
||||
panic_timeout = mce_panic_timeout;
|
||||
panic_timeout = mca_cfg.panic_timeout;
|
||||
panic(msg);
|
||||
} else
|
||||
pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
|
||||
@@ -372,7 +364,7 @@ static int msr_to_offset(u32 msr)
|
||||
{
|
||||
unsigned bank = __this_cpu_read(injectm.bank);
|
||||
|
||||
if (msr == rip_msr)
|
||||
if (msr == mca_cfg.rip_msr)
|
||||
return offsetof(struct mce, ip);
|
||||
if (msr == MSR_IA32_MCx_STATUS(bank))
|
||||
return offsetof(struct mce, status);
|
||||
@@ -451,8 +443,8 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
|
||||
m->cs |= 3;
|
||||
}
|
||||
/* Use accurate RIP reporting if available. */
|
||||
if (rip_msr)
|
||||
m->ip = mce_rdmsrl(rip_msr);
|
||||
if (mca_cfg.rip_msr)
|
||||
m->ip = mce_rdmsrl(mca_cfg.rip_msr);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -513,7 +505,7 @@ static int mce_ring_add(unsigned long pfn)
|
||||
|
||||
int mce_available(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (mce_disabled)
|
||||
if (mca_cfg.disabled)
|
||||
return 0;
|
||||
return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
|
||||
}
|
||||
@@ -565,7 +557,7 @@ static void mce_read_aux(struct mce *m, int i)
|
||||
/*
|
||||
* Mask the reported address by the reported granularity.
|
||||
*/
|
||||
if (mce_ser && (m->status & MCI_STATUS_MISCV)) {
|
||||
if (mca_cfg.ser && (m->status & MCI_STATUS_MISCV)) {
|
||||
u8 shift = MCI_MISC_ADDR_LSB(m->misc);
|
||||
m->addr >>= shift;
|
||||
m->addr <<= shift;
|
||||
@@ -599,7 +591,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
|
||||
|
||||
mce_gather_info(&m, NULL);
|
||||
|
||||
for (i = 0; i < banks; i++) {
|
||||
for (i = 0; i < mca_cfg.banks; i++) {
|
||||
if (!mce_banks[i].ctl || !test_bit(i, *b))
|
||||
continue;
|
||||
|
||||
@@ -620,7 +612,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
|
||||
* TBD do the same check for MCI_STATUS_EN here?
|
||||
*/
|
||||
if (!(flags & MCP_UC) &&
|
||||
(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)))
|
||||
(m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC)))
|
||||
continue;
|
||||
|
||||
mce_read_aux(&m, i);
|
||||
@@ -631,7 +623,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
|
||||
* Don't get the IP here because it's unlikely to
|
||||
* have anything to do with the actual error location.
|
||||
*/
|
||||
if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce)
|
||||
if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce)
|
||||
mce_log(&m);
|
||||
|
||||
/*
|
||||
@@ -658,14 +650,14 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
|
||||
{
|
||||
int i, ret = 0;
|
||||
|
||||
for (i = 0; i < banks; i++) {
|
||||
for (i = 0; i < mca_cfg.banks; i++) {
|
||||
m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
|
||||
if (m->status & MCI_STATUS_VAL) {
|
||||
__set_bit(i, validp);
|
||||
if (quirk_no_way_out)
|
||||
quirk_no_way_out(i, m, regs);
|
||||
}
|
||||
if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
|
||||
if (mce_severity(m, mca_cfg.tolerant, msg) >= MCE_PANIC_SEVERITY)
|
||||
ret = 1;
|
||||
}
|
||||
return ret;
|
||||
@@ -696,11 +688,11 @@ static int mce_timed_out(u64 *t)
|
||||
rmb();
|
||||
if (atomic_read(&mce_paniced))
|
||||
wait_for_panic();
|
||||
if (!monarch_timeout)
|
||||
if (!mca_cfg.monarch_timeout)
|
||||
goto out;
|
||||
if ((s64)*t < SPINUNIT) {
|
||||
/* CHECKME: Make panic default for 1 too? */
|
||||
if (tolerant < 1)
|
||||
if (mca_cfg.tolerant < 1)
|
||||
mce_panic("Timeout synchronizing machine check over CPUs",
|
||||
NULL, NULL);
|
||||
cpu_missing = 1;
|
||||
@@ -750,7 +742,8 @@ static void mce_reign(void)
|
||||
* Grade the severity of the errors of all the CPUs.
|
||||
*/
|
||||
for_each_possible_cpu(cpu) {
|
||||
int severity = mce_severity(&per_cpu(mces_seen, cpu), tolerant,
|
||||
int severity = mce_severity(&per_cpu(mces_seen, cpu),
|
||||
mca_cfg.tolerant,
|
||||
&nmsg);
|
||||
if (severity > global_worst) {
|
||||
msg = nmsg;
|
||||
@@ -764,7 +757,7 @@ static void mce_reign(void)
|
||||
* This dumps all the mces in the log buffer and stops the
|
||||
* other CPUs.
|
||||
*/
|
||||
if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3)
|
||||
if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
|
||||
mce_panic("Fatal Machine check", m, msg);
|
||||
|
||||
/*
|
||||
@@ -777,7 +770,7 @@ static void mce_reign(void)
|
||||
* No machine check event found. Must be some external
|
||||
* source or one CPU is hung. Panic.
|
||||
*/
|
||||
if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3)
|
||||
if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3)
|
||||
mce_panic("Machine check from unknown source", NULL, NULL);
|
||||
|
||||
/*
|
||||
@@ -801,7 +794,7 @@ static int mce_start(int *no_way_out)
|
||||
{
|
||||
int order;
|
||||
int cpus = num_online_cpus();
|
||||
u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
|
||||
u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
|
||||
|
||||
if (!timeout)
|
||||
return -1;
|
||||
@@ -865,7 +858,7 @@ static int mce_start(int *no_way_out)
|
||||
static int mce_end(int order)
|
||||
{
|
||||
int ret = -1;
|
||||
u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
|
||||
u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
|
||||
|
||||
if (!timeout)
|
||||
goto reset;
|
||||
@@ -946,7 +939,7 @@ static void mce_clear_state(unsigned long *toclear)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < banks; i++) {
|
||||
for (i = 0; i < mca_cfg.banks; i++) {
|
||||
if (test_bit(i, toclear))
|
||||
mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
|
||||
}
|
||||
@@ -1011,6 +1004,7 @@ static void mce_clear_info(struct mce_info *mi)
|
||||
*/
|
||||
void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
struct mca_config *cfg = &mca_cfg;
|
||||
struct mce m, *final;
|
||||
int i;
|
||||
int worst = 0;
|
||||
@@ -1022,7 +1016,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
int order;
|
||||
/*
|
||||
* If no_way_out gets set, there is no safe way to recover from this
|
||||
* MCE. If tolerant is cranked up, we'll try anyway.
|
||||
* MCE. If mca_cfg.tolerant is cranked up, we'll try anyway.
|
||||
*/
|
||||
int no_way_out = 0;
|
||||
/*
|
||||
@@ -1038,7 +1032,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
|
||||
this_cpu_inc(mce_exception_count);
|
||||
|
||||
if (!banks)
|
||||
if (!cfg->banks)
|
||||
goto out;
|
||||
|
||||
mce_gather_info(&m, regs);
|
||||
@@ -1065,7 +1059,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
* because the first one to see it will clear it.
|
||||
*/
|
||||
order = mce_start(&no_way_out);
|
||||
for (i = 0; i < banks; i++) {
|
||||
for (i = 0; i < cfg->banks; i++) {
|
||||
__clear_bit(i, toclear);
|
||||
if (!test_bit(i, valid_banks))
|
||||
continue;
|
||||
@@ -1084,7 +1078,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
* Non uncorrected or non signaled errors are handled by
|
||||
* machine_check_poll. Leave them alone, unless this panics.
|
||||
*/
|
||||
if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
|
||||
if (!(m.status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
|
||||
!no_way_out)
|
||||
continue;
|
||||
|
||||
@@ -1093,7 +1087,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
*/
|
||||
add_taint(TAINT_MACHINE_CHECK);
|
||||
|
||||
severity = mce_severity(&m, tolerant, NULL);
|
||||
severity = mce_severity(&m, cfg->tolerant, NULL);
|
||||
|
||||
/*
|
||||
* When machine check was for corrected handler don't touch,
|
||||
@@ -1117,7 +1111,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
* When the ring overflows we just ignore the AO error.
|
||||
* RED-PEN add some logging mechanism when
|
||||
* usable_address or mce_add_ring fails.
|
||||
* RED-PEN don't ignore overflow for tolerant == 0
|
||||
* RED-PEN don't ignore overflow for mca_cfg.tolerant == 0
|
||||
*/
|
||||
if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
|
||||
mce_ring_add(m.addr >> PAGE_SHIFT);
|
||||
@@ -1149,7 +1143,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
* issues we try to recover, or limit damage to the current
|
||||
* process.
|
||||
*/
|
||||
if (tolerant < 3) {
|
||||
if (cfg->tolerant < 3) {
|
||||
if (no_way_out)
|
||||
mce_panic("Fatal machine check on current CPU", &m, msg);
|
||||
if (worst == MCE_AR_SEVERITY) {
|
||||
@@ -1377,11 +1371,13 @@ EXPORT_SYMBOL_GPL(mce_notify_irq);
|
||||
static int __cpuinit __mcheck_cpu_mce_banks_init(void)
|
||||
{
|
||||
int i;
|
||||
u8 num_banks = mca_cfg.banks;
|
||||
|
||||
mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL);
|
||||
mce_banks = kzalloc(num_banks * sizeof(struct mce_bank), GFP_KERNEL);
|
||||
if (!mce_banks)
|
||||
return -ENOMEM;
|
||||
for (i = 0; i < banks; i++) {
|
||||
|
||||
for (i = 0; i < num_banks; i++) {
|
||||
struct mce_bank *b = &mce_banks[i];
|
||||
|
||||
b->ctl = -1ULL;
|
||||
@@ -1401,7 +1397,7 @@ static int __cpuinit __mcheck_cpu_cap_init(void)
|
||||
rdmsrl(MSR_IA32_MCG_CAP, cap);
|
||||
|
||||
b = cap & MCG_BANKCNT_MASK;
|
||||
if (!banks)
|
||||
if (!mca_cfg.banks)
|
||||
pr_info("CPU supports %d MCE banks\n", b);
|
||||
|
||||
if (b > MAX_NR_BANKS) {
|
||||
@@ -1411,8 +1407,9 @@ static int __cpuinit __mcheck_cpu_cap_init(void)
|
||||
}
|
||||
|
||||
/* Don't support asymmetric configurations today */
|
||||
WARN_ON(banks != 0 && b != banks);
|
||||
banks = b;
|
||||
WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks);
|
||||
mca_cfg.banks = b;
|
||||
|
||||
if (!mce_banks) {
|
||||
int err = __mcheck_cpu_mce_banks_init();
|
||||
|
||||
@@ -1422,25 +1419,29 @@ static int __cpuinit __mcheck_cpu_cap_init(void)
|
||||
|
||||
/* Use accurate RIP reporting if available. */
|
||||
if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
|
||||
rip_msr = MSR_IA32_MCG_EIP;
|
||||
mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
|
||||
|
||||
if (cap & MCG_SER_P)
|
||||
mce_ser = 1;
|
||||
mca_cfg.ser = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __mcheck_cpu_init_generic(void)
|
||||
{
|
||||
enum mcp_flags m_fl = 0;
|
||||
mce_banks_t all_banks;
|
||||
u64 cap;
|
||||
int i;
|
||||
|
||||
if (!mca_cfg.bootlog)
|
||||
m_fl = MCP_DONTLOG;
|
||||
|
||||
/*
|
||||
* Log the machine checks left over from the previous reset.
|
||||
*/
|
||||
bitmap_fill(all_banks, MAX_NR_BANKS);
|
||||
machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks);
|
||||
machine_check_poll(MCP_UC | m_fl, &all_banks);
|
||||
|
||||
set_in_cr4(X86_CR4_MCE);
|
||||
|
||||
@@ -1448,7 +1449,7 @@ static void __mcheck_cpu_init_generic(void)
|
||||
if (cap & MCG_CTL_P)
|
||||
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
|
||||
|
||||
for (i = 0; i < banks; i++) {
|
||||
for (i = 0; i < mca_cfg.banks; i++) {
|
||||
struct mce_bank *b = &mce_banks[i];
|
||||
|
||||
if (!b->init)
|
||||
@@ -1489,6 +1490,8 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
|
||||
/* Add per CPU specific workarounds here */
|
||||
static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
|
||||
{
|
||||
struct mca_config *cfg = &mca_cfg;
|
||||
|
||||
if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
|
||||
pr_info("unknown CPU type - not enabling MCE support\n");
|
||||
return -EOPNOTSUPP;
|
||||
@@ -1496,7 +1499,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
|
||||
|
||||
/* This should be disabled by the BIOS, but isn't always */
|
||||
if (c->x86_vendor == X86_VENDOR_AMD) {
|
||||
if (c->x86 == 15 && banks > 4) {
|
||||
if (c->x86 == 15 && cfg->banks > 4) {
|
||||
/*
|
||||
* disable GART TBL walk error reporting, which
|
||||
* trips off incorrectly with the IOMMU & 3ware
|
||||
@@ -1504,18 +1507,18 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
|
||||
*/
|
||||
clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
|
||||
}
|
||||
if (c->x86 <= 17 && mce_bootlog < 0) {
|
||||
if (c->x86 <= 17 && cfg->bootlog < 0) {
|
||||
/*
|
||||
* Lots of broken BIOS around that don't clear them
|
||||
* by default and leave crap in there. Don't log:
|
||||
*/
|
||||
mce_bootlog = 0;
|
||||
cfg->bootlog = 0;
|
||||
}
|
||||
/*
|
||||
* Various K7s with broken bank 0 around. Always disable
|
||||
* by default.
|
||||
*/
|
||||
if (c->x86 == 6 && banks > 0)
|
||||
if (c->x86 == 6 && cfg->banks > 0)
|
||||
mce_banks[0].ctl = 0;
|
||||
|
||||
/*
|
||||
@@ -1566,7 +1569,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
|
||||
* valid event later, merely don't write CTL0.
|
||||
*/
|
||||
|
||||
if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0)
|
||||
if (c->x86 == 6 && c->x86_model < 0x1A && cfg->banks > 0)
|
||||
mce_banks[0].init = 0;
|
||||
|
||||
/*
|
||||
@@ -1574,23 +1577,23 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
|
||||
* synchronization with a one second timeout.
|
||||
*/
|
||||
if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
|
||||
monarch_timeout < 0)
|
||||
monarch_timeout = USEC_PER_SEC;
|
||||
cfg->monarch_timeout < 0)
|
||||
cfg->monarch_timeout = USEC_PER_SEC;
|
||||
|
||||
/*
|
||||
* There are also broken BIOSes on some Pentium M and
|
||||
* earlier systems:
|
||||
*/
|
||||
if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0)
|
||||
mce_bootlog = 0;
|
||||
if (c->x86 == 6 && c->x86_model <= 13 && cfg->bootlog < 0)
|
||||
cfg->bootlog = 0;
|
||||
|
||||
if (c->x86 == 6 && c->x86_model == 45)
|
||||
quirk_no_way_out = quirk_sandybridge_ifu;
|
||||
}
|
||||
if (monarch_timeout < 0)
|
||||
monarch_timeout = 0;
|
||||
if (mce_bootlog != 0)
|
||||
mce_panic_timeout = 30;
|
||||
if (cfg->monarch_timeout < 0)
|
||||
cfg->monarch_timeout = 0;
|
||||
if (cfg->bootlog != 0)
|
||||
cfg->panic_timeout = 30;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1635,7 +1638,7 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t)
|
||||
|
||||
__this_cpu_write(mce_next_interval, iv);
|
||||
|
||||
if (mce_ignore_ce || !iv)
|
||||
if (mca_cfg.ignore_ce || !iv)
|
||||
return;
|
||||
|
||||
t->expires = round_jiffies(jiffies + iv);
|
||||
@@ -1668,7 +1671,7 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) =
|
||||
*/
|
||||
void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (mce_disabled)
|
||||
if (mca_cfg.disabled)
|
||||
return;
|
||||
|
||||
if (__mcheck_cpu_ancient_init(c))
|
||||
@@ -1678,7 +1681,7 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
|
||||
return;
|
||||
|
||||
if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
|
||||
mce_disabled = 1;
|
||||
mca_cfg.disabled = true;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1951,6 +1954,8 @@ static struct miscdevice mce_chrdev_device = {
|
||||
*/
|
||||
static int __init mcheck_enable(char *str)
|
||||
{
|
||||
struct mca_config *cfg = &mca_cfg;
|
||||
|
||||
if (*str == 0) {
|
||||
enable_p5_mce();
|
||||
return 1;
|
||||
@@ -1958,22 +1963,22 @@ static int __init mcheck_enable(char *str)
|
||||
if (*str == '=')
|
||||
str++;
|
||||
if (!strcmp(str, "off"))
|
||||
mce_disabled = 1;
|
||||
cfg->disabled = true;
|
||||
else if (!strcmp(str, "no_cmci"))
|
||||
mce_cmci_disabled = 1;
|
||||
cfg->cmci_disabled = true;
|
||||
else if (!strcmp(str, "dont_log_ce"))
|
||||
mce_dont_log_ce = 1;
|
||||
cfg->dont_log_ce = true;
|
||||
else if (!strcmp(str, "ignore_ce"))
|
||||
mce_ignore_ce = 1;
|
||||
cfg->ignore_ce = true;
|
||||
else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
|
||||
mce_bootlog = (str[0] == 'b');
|
||||
cfg->bootlog = (str[0] == 'b');
|
||||
else if (!strcmp(str, "bios_cmci_threshold"))
|
||||
mce_bios_cmci_threshold = 1;
|
||||
cfg->bios_cmci_threshold = true;
|
||||
else if (isdigit(str[0])) {
|
||||
get_option(&str, &tolerant);
|
||||
get_option(&str, &(cfg->tolerant));
|
||||
if (*str == ',') {
|
||||
++str;
|
||||
get_option(&str, &monarch_timeout);
|
||||
get_option(&str, &(cfg->monarch_timeout));
|
||||
}
|
||||
} else {
|
||||
pr_info("mce argument %s ignored. Please use /sys\n", str);
|
||||
@@ -2002,7 +2007,7 @@ static int mce_disable_error_reporting(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < banks; i++) {
|
||||
for (i = 0; i < mca_cfg.banks; i++) {
|
||||
struct mce_bank *b = &mce_banks[i];
|
||||
|
||||
if (b->init)
|
||||
@@ -2142,15 +2147,15 @@ static ssize_t set_ignore_ce(struct device *s,
|
||||
if (strict_strtoull(buf, 0, &new) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (mce_ignore_ce ^ !!new) {
|
||||
if (mca_cfg.ignore_ce ^ !!new) {
|
||||
if (new) {
|
||||
/* disable ce features */
|
||||
mce_timer_delete_all();
|
||||
on_each_cpu(mce_disable_cmci, NULL, 1);
|
||||
mce_ignore_ce = 1;
|
||||
mca_cfg.ignore_ce = true;
|
||||
} else {
|
||||
/* enable ce features */
|
||||
mce_ignore_ce = 0;
|
||||
mca_cfg.ignore_ce = false;
|
||||
on_each_cpu(mce_enable_ce, (void *)1, 1);
|
||||
}
|
||||
}
|
||||
@@ -2166,14 +2171,14 @@ static ssize_t set_cmci_disabled(struct device *s,
|
||||
if (strict_strtoull(buf, 0, &new) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (mce_cmci_disabled ^ !!new) {
|
||||
if (mca_cfg.cmci_disabled ^ !!new) {
|
||||
if (new) {
|
||||
/* disable cmci */
|
||||
on_each_cpu(mce_disable_cmci, NULL, 1);
|
||||
mce_cmci_disabled = 1;
|
||||
mca_cfg.cmci_disabled = true;
|
||||
} else {
|
||||
/* enable cmci */
|
||||
mce_cmci_disabled = 0;
|
||||
mca_cfg.cmci_disabled = false;
|
||||
on_each_cpu(mce_enable_ce, NULL, 1);
|
||||
}
|
||||
}
|
||||
@@ -2190,9 +2195,9 @@ static ssize_t store_int_with_restart(struct device *s,
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger);
|
||||
static DEVICE_INT_ATTR(tolerant, 0644, tolerant);
|
||||
static DEVICE_INT_ATTR(monarch_timeout, 0644, monarch_timeout);
|
||||
static DEVICE_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce);
|
||||
static DEVICE_INT_ATTR(tolerant, 0644, mca_cfg.tolerant);
|
||||
static DEVICE_INT_ATTR(monarch_timeout, 0644, mca_cfg.monarch_timeout);
|
||||
static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce);
|
||||
|
||||
static struct dev_ext_attribute dev_attr_check_interval = {
|
||||
__ATTR(check_interval, 0644, device_show_int, store_int_with_restart),
|
||||
@@ -2200,13 +2205,13 @@ static struct dev_ext_attribute dev_attr_check_interval = {
|
||||
};
|
||||
|
||||
static struct dev_ext_attribute dev_attr_ignore_ce = {
|
||||
__ATTR(ignore_ce, 0644, device_show_int, set_ignore_ce),
|
||||
&mce_ignore_ce
|
||||
__ATTR(ignore_ce, 0644, device_show_bool, set_ignore_ce),
|
||||
&mca_cfg.ignore_ce
|
||||
};
|
||||
|
||||
static struct dev_ext_attribute dev_attr_cmci_disabled = {
|
||||
__ATTR(cmci_disabled, 0644, device_show_int, set_cmci_disabled),
|
||||
&mce_cmci_disabled
|
||||
__ATTR(cmci_disabled, 0644, device_show_bool, set_cmci_disabled),
|
||||
&mca_cfg.cmci_disabled
|
||||
};
|
||||
|
||||
static struct device_attribute *mce_device_attrs[] = {
|
||||
@@ -2253,7 +2258,7 @@ static __cpuinit int mce_device_create(unsigned int cpu)
|
||||
if (err)
|
||||
goto error;
|
||||
}
|
||||
for (j = 0; j < banks; j++) {
|
||||
for (j = 0; j < mca_cfg.banks; j++) {
|
||||
err = device_create_file(dev, &mce_banks[j].attr);
|
||||
if (err)
|
||||
goto error2;
|
||||
@@ -2285,7 +2290,7 @@ static __cpuinit void mce_device_remove(unsigned int cpu)
|
||||
for (i = 0; mce_device_attrs[i]; i++)
|
||||
device_remove_file(dev, mce_device_attrs[i]);
|
||||
|
||||
for (i = 0; i < banks; i++)
|
||||
for (i = 0; i < mca_cfg.banks; i++)
|
||||
device_remove_file(dev, &mce_banks[i].attr);
|
||||
|
||||
device_unregister(dev);
|
||||
@@ -2304,7 +2309,7 @@ static void __cpuinit mce_disable_cpu(void *h)
|
||||
|
||||
if (!(action & CPU_TASKS_FROZEN))
|
||||
cmci_clear();
|
||||
for (i = 0; i < banks; i++) {
|
||||
for (i = 0; i < mca_cfg.banks; i++) {
|
||||
struct mce_bank *b = &mce_banks[i];
|
||||
|
||||
if (b->init)
|
||||
@@ -2322,7 +2327,7 @@ static void __cpuinit mce_reenable_cpu(void *h)
|
||||
|
||||
if (!(action & CPU_TASKS_FROZEN))
|
||||
cmci_reenable();
|
||||
for (i = 0; i < banks; i++) {
|
||||
for (i = 0; i < mca_cfg.banks; i++) {
|
||||
struct mce_bank *b = &mce_banks[i];
|
||||
|
||||
if (b->init)
|
||||
@@ -2375,7 +2380,7 @@ static __init void mce_init_banks(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < banks; i++) {
|
||||
for (i = 0; i < mca_cfg.banks; i++) {
|
||||
struct mce_bank *b = &mce_banks[i];
|
||||
struct device_attribute *a = &b->attr;
|
||||
|
||||
@@ -2426,7 +2431,7 @@ device_initcall_sync(mcheck_init_device);
|
||||
*/
|
||||
static int __init mcheck_disable(char *str)
|
||||
{
|
||||
mce_disabled = 1;
|
||||
mca_cfg.disabled = true;
|
||||
return 1;
|
||||
}
|
||||
__setup("nomce", mcheck_disable);
|
||||
|
@@ -6,7 +6,7 @@
|
||||
*
|
||||
* Written by Jacob Shin - AMD, Inc.
|
||||
*
|
||||
* Support: borislav.petkov@amd.com
|
||||
* Maintained by: Borislav Petkov <bp@alien8.de>
|
||||
*
|
||||
* April 2006
|
||||
* - added support for AMD Family 0x10 processors
|
||||
|
@@ -53,7 +53,7 @@ static int cmci_supported(int *banks)
|
||||
{
|
||||
u64 cap;
|
||||
|
||||
if (mce_cmci_disabled || mce_ignore_ce)
|
||||
if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
@@ -200,7 +200,7 @@ static void cmci_discover(int banks)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!mce_bios_cmci_threshold) {
|
||||
if (!mca_cfg.bios_cmci_threshold) {
|
||||
val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
|
||||
val |= CMCI_THRESHOLD;
|
||||
} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
|
||||
@@ -227,7 +227,7 @@ static void cmci_discover(int banks)
|
||||
* set the thresholds properly or does not work with
|
||||
* this boot option. Note down now and report later.
|
||||
*/
|
||||
if (mce_bios_cmci_threshold && bios_zero_thresh &&
|
||||
if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
|
||||
(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
|
||||
bios_wrong_thresh = 1;
|
||||
} else {
|
||||
@@ -235,7 +235,7 @@ static void cmci_discover(int banks)
|
||||
}
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
if (mce_bios_cmci_threshold && bios_wrong_thresh) {
|
||||
if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
|
||||
pr_info_once(
|
||||
"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
|
||||
pr_info_once(
|
||||
@@ -285,34 +285,39 @@ void cmci_clear(void)
|
||||
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
}
|
||||
|
||||
static long cmci_rediscover_work_func(void *arg)
|
||||
{
|
||||
int banks;
|
||||
|
||||
/* Recheck banks in case CPUs don't all have the same */
|
||||
if (cmci_supported(&banks))
|
||||
cmci_discover(banks);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* After a CPU went down cycle through all the others and rediscover
|
||||
* Must run in process context.
|
||||
*/
|
||||
void cmci_rediscover(int dying)
|
||||
{
|
||||
int banks;
|
||||
int cpu;
|
||||
cpumask_var_t old;
|
||||
int cpu, banks;
|
||||
|
||||
if (!cmci_supported(&banks))
|
||||
return;
|
||||
if (!alloc_cpumask_var(&old, GFP_KERNEL))
|
||||
return;
|
||||
cpumask_copy(old, ¤t->cpus_allowed);
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
if (cpu == dying)
|
||||
continue;
|
||||
if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
|
||||
continue;
|
||||
/* Recheck banks in case CPUs don't all have the same */
|
||||
if (cmci_supported(&banks))
|
||||
cmci_discover(banks);
|
||||
}
|
||||
|
||||
set_cpus_allowed_ptr(current, old);
|
||||
free_cpumask_var(old);
|
||||
if (cpu == smp_processor_id()) {
|
||||
cmci_rediscover_work_func(NULL);
|
||||
continue;
|
||||
}
|
||||
|
||||
work_on_cpu(cpu, cmci_rediscover_work_func, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -695,11 +695,16 @@ void mtrr_ap_init(void)
|
||||
}
|
||||
|
||||
/**
|
||||
* Save current fixed-range MTRR state of the BSP
|
||||
* Save current fixed-range MTRR state of the first cpu in cpu_online_mask.
|
||||
*/
|
||||
void mtrr_save_state(void)
|
||||
{
|
||||
smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
|
||||
int first_cpu;
|
||||
|
||||
get_online_cpus();
|
||||
first_cpu = cpumask_first(cpu_online_mask);
|
||||
smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
|
||||
put_online_cpus();
|
||||
}
|
||||
|
||||
void set_mtrr_aps_delayed_init(void)
|
||||
|
@@ -340,9 +340,6 @@ int x86_setup_perfctr(struct perf_event *event)
|
||||
/* BTS is currently only allowed for user-mode. */
|
||||
if (!attr->exclude_kernel)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!attr->exclude_guest)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
hwc->config |= config;
|
||||
@@ -385,9 +382,6 @@ int x86_pmu_hw_config(struct perf_event *event)
|
||||
if (event->attr.precise_ip) {
|
||||
int precise = 0;
|
||||
|
||||
if (!event->attr.exclude_guest)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* Support for constant skid */
|
||||
if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
|
||||
precise++;
|
||||
@@ -1316,6 +1310,121 @@ static struct attribute_group x86_pmu_format_group = {
|
||||
.attrs = NULL,
|
||||
};
|
||||
|
||||
struct perf_pmu_events_attr {
|
||||
struct device_attribute attr;
|
||||
u64 id;
|
||||
};
|
||||
|
||||
/*
|
||||
* Remove all undefined events (x86_pmu.event_map(id) == 0)
|
||||
* out of events_attr attributes.
|
||||
*/
|
||||
static void __init filter_events(struct attribute **attrs)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; attrs[i]; i++) {
|
||||
if (x86_pmu.event_map(i))
|
||||
continue;
|
||||
|
||||
for (j = i; attrs[j]; j++)
|
||||
attrs[j] = attrs[j + 1];
|
||||
|
||||
/* Check the shifted attr. */
|
||||
i--;
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
|
||||
char *page)
|
||||
{
|
||||
struct perf_pmu_events_attr *pmu_attr = \
|
||||
container_of(attr, struct perf_pmu_events_attr, attr);
|
||||
|
||||
u64 config = x86_pmu.event_map(pmu_attr->id);
|
||||
return x86_pmu.events_sysfs_show(page, config);
|
||||
}
|
||||
|
||||
#define EVENT_VAR(_id) event_attr_##_id
|
||||
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
|
||||
|
||||
#define EVENT_ATTR(_name, _id) \
|
||||
static struct perf_pmu_events_attr EVENT_VAR(_id) = { \
|
||||
.attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
|
||||
.id = PERF_COUNT_HW_##_id, \
|
||||
};
|
||||
|
||||
EVENT_ATTR(cpu-cycles, CPU_CYCLES );
|
||||
EVENT_ATTR(instructions, INSTRUCTIONS );
|
||||
EVENT_ATTR(cache-references, CACHE_REFERENCES );
|
||||
EVENT_ATTR(cache-misses, CACHE_MISSES );
|
||||
EVENT_ATTR(branch-instructions, BRANCH_INSTRUCTIONS );
|
||||
EVENT_ATTR(branch-misses, BRANCH_MISSES );
|
||||
EVENT_ATTR(bus-cycles, BUS_CYCLES );
|
||||
EVENT_ATTR(stalled-cycles-frontend, STALLED_CYCLES_FRONTEND );
|
||||
EVENT_ATTR(stalled-cycles-backend, STALLED_CYCLES_BACKEND );
|
||||
EVENT_ATTR(ref-cycles, REF_CPU_CYCLES );
|
||||
|
||||
static struct attribute *empty_attrs;
|
||||
|
||||
static struct attribute *events_attr[] = {
|
||||
EVENT_PTR(CPU_CYCLES),
|
||||
EVENT_PTR(INSTRUCTIONS),
|
||||
EVENT_PTR(CACHE_REFERENCES),
|
||||
EVENT_PTR(CACHE_MISSES),
|
||||
EVENT_PTR(BRANCH_INSTRUCTIONS),
|
||||
EVENT_PTR(BRANCH_MISSES),
|
||||
EVENT_PTR(BUS_CYCLES),
|
||||
EVENT_PTR(STALLED_CYCLES_FRONTEND),
|
||||
EVENT_PTR(STALLED_CYCLES_BACKEND),
|
||||
EVENT_PTR(REF_CPU_CYCLES),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group x86_pmu_events_group = {
|
||||
.name = "events",
|
||||
.attrs = events_attr,
|
||||
};
|
||||
|
||||
ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
|
||||
{
|
||||
u64 umask = (config & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
|
||||
u64 cmask = (config & ARCH_PERFMON_EVENTSEL_CMASK) >> 24;
|
||||
bool edge = (config & ARCH_PERFMON_EVENTSEL_EDGE);
|
||||
bool pc = (config & ARCH_PERFMON_EVENTSEL_PIN_CONTROL);
|
||||
bool any = (config & ARCH_PERFMON_EVENTSEL_ANY);
|
||||
bool inv = (config & ARCH_PERFMON_EVENTSEL_INV);
|
||||
ssize_t ret;
|
||||
|
||||
/*
|
||||
* We have whole page size to spend and just little data
|
||||
* to write, so we can safely use sprintf.
|
||||
*/
|
||||
ret = sprintf(page, "event=0x%02llx", event);
|
||||
|
||||
if (umask)
|
||||
ret += sprintf(page + ret, ",umask=0x%02llx", umask);
|
||||
|
||||
if (edge)
|
||||
ret += sprintf(page + ret, ",edge");
|
||||
|
||||
if (pc)
|
||||
ret += sprintf(page + ret, ",pc");
|
||||
|
||||
if (any)
|
||||
ret += sprintf(page + ret, ",any");
|
||||
|
||||
if (inv)
|
||||
ret += sprintf(page + ret, ",inv");
|
||||
|
||||
if (cmask)
|
||||
ret += sprintf(page + ret, ",cmask=0x%02llx", cmask);
|
||||
|
||||
ret += sprintf(page + ret, "\n");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __init init_hw_perf_events(void)
|
||||
{
|
||||
struct x86_pmu_quirk *quirk;
|
||||
@@ -1362,6 +1471,11 @@ static int __init init_hw_perf_events(void)
|
||||
x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
|
||||
x86_pmu_format_group.attrs = x86_pmu.format_attrs;
|
||||
|
||||
if (!x86_pmu.events_sysfs_show)
|
||||
x86_pmu_events_group.attrs = &empty_attrs;
|
||||
else
|
||||
filter_events(x86_pmu_events_group.attrs);
|
||||
|
||||
pr_info("... version: %d\n", x86_pmu.version);
|
||||
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
|
||||
pr_info("... generic registers: %d\n", x86_pmu.num_counters);
|
||||
@@ -1651,6 +1765,7 @@ static struct attribute_group x86_pmu_attr_group = {
|
||||
static const struct attribute_group *x86_pmu_attr_groups[] = {
|
||||
&x86_pmu_attr_group,
|
||||
&x86_pmu_format_group,
|
||||
&x86_pmu_events_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
@@ -354,6 +354,8 @@ struct x86_pmu {
|
||||
int attr_rdpmc;
|
||||
struct attribute **format_attrs;
|
||||
|
||||
ssize_t (*events_sysfs_show)(char *page, u64 config);
|
||||
|
||||
/*
|
||||
* CPU Hotplug hooks
|
||||
*/
|
||||
@@ -536,6 +538,9 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
|
||||
regs->ip = ip;
|
||||
}
|
||||
|
||||
ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
|
||||
ssize_t intel_event_sysfs_show(char *page, u64 config);
|
||||
|
||||
#ifdef CONFIG_CPU_SUP_AMD
|
||||
|
||||
int amd_pmu_init(void);
|
||||
|
@@ -568,6 +568,14 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t amd_event_sysfs_show(char *page, u64 config)
|
||||
{
|
||||
u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
|
||||
(config & AMD64_EVENTSEL_EVENT) >> 24;
|
||||
|
||||
return x86_event_sysfs_show(page, config, event);
|
||||
}
|
||||
|
||||
static __initconst const struct x86_pmu amd_pmu = {
|
||||
.name = "AMD",
|
||||
.handle_irq = x86_pmu_handle_irq,
|
||||
@@ -591,6 +599,7 @@ static __initconst const struct x86_pmu amd_pmu = {
|
||||
.put_event_constraints = amd_put_event_constraints,
|
||||
|
||||
.format_attrs = amd_format_attr,
|
||||
.events_sysfs_show = amd_event_sysfs_show,
|
||||
|
||||
.cpu_prepare = amd_pmu_cpu_prepare,
|
||||
.cpu_starting = amd_pmu_cpu_starting,
|
||||
|
@@ -1603,6 +1603,13 @@ static struct attribute *intel_arch_formats_attr[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
ssize_t intel_event_sysfs_show(char *page, u64 config)
|
||||
{
|
||||
u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
|
||||
|
||||
return x86_event_sysfs_show(page, config, event);
|
||||
}
|
||||
|
||||
static __initconst const struct x86_pmu core_pmu = {
|
||||
.name = "core",
|
||||
.handle_irq = x86_pmu_handle_irq,
|
||||
@@ -1628,6 +1635,7 @@ static __initconst const struct x86_pmu core_pmu = {
|
||||
.event_constraints = intel_core_event_constraints,
|
||||
.guest_get_msrs = core_guest_get_msrs,
|
||||
.format_attrs = intel_arch_formats_attr,
|
||||
.events_sysfs_show = intel_event_sysfs_show,
|
||||
};
|
||||
|
||||
struct intel_shared_regs *allocate_shared_regs(int cpu)
|
||||
@@ -1766,6 +1774,7 @@ static __initconst const struct x86_pmu intel_pmu = {
|
||||
.pebs_aliases = intel_pebs_aliases_core2,
|
||||
|
||||
.format_attrs = intel_arch3_formats_attr,
|
||||
.events_sysfs_show = intel_event_sysfs_show,
|
||||
|
||||
.cpu_prepare = intel_pmu_cpu_prepare,
|
||||
.cpu_starting = intel_pmu_cpu_starting,
|
||||
|
@@ -2500,7 +2500,7 @@ static bool pcidrv_registered;
|
||||
/*
|
||||
* add a pci uncore device
|
||||
*/
|
||||
static int __devinit uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
|
||||
static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
|
||||
{
|
||||
struct intel_uncore_pmu *pmu;
|
||||
struct intel_uncore_box *box;
|
||||
@@ -2571,8 +2571,8 @@ static void uncore_pci_remove(struct pci_dev *pdev)
|
||||
kfree(box);
|
||||
}
|
||||
|
||||
static int __devinit uncore_pci_probe(struct pci_dev *pdev,
|
||||
const struct pci_device_id *id)
|
||||
static int uncore_pci_probe(struct pci_dev *pdev,
|
||||
const struct pci_device_id *id)
|
||||
{
|
||||
struct intel_uncore_type *type;
|
||||
|
||||
|
@@ -227,6 +227,8 @@ static __initconst const struct x86_pmu p6_pmu = {
|
||||
.event_constraints = p6_event_constraints,
|
||||
|
||||
.format_attrs = intel_p6_formats_attr,
|
||||
.events_sysfs_show = intel_event_sysfs_show,
|
||||
|
||||
};
|
||||
|
||||
__init int p6_pmu_init(void)
|
||||
|
@@ -26,11 +26,6 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
|
||||
#ifdef CONFIG_X86_32
|
||||
static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
|
||||
{
|
||||
/*
|
||||
* We use exception 16 if we have hardware math and we've either seen
|
||||
* it or the CPU claims it is internal
|
||||
*/
|
||||
int fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu);
|
||||
seq_printf(m,
|
||||
"fdiv_bug\t: %s\n"
|
||||
"hlt_bug\t\t: %s\n"
|
||||
@@ -45,7 +40,7 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
|
||||
c->f00f_bug ? "yes" : "no",
|
||||
c->coma_bug ? "yes" : "no",
|
||||
c->hard_math ? "yes" : "no",
|
||||
fpu_exception ? "yes" : "no",
|
||||
c->hard_math ? "yes" : "no",
|
||||
c->cpuid_level,
|
||||
c->wp_works_ok ? "yes" : "no");
|
||||
}
|
||||
|
@@ -16,6 +16,7 @@
|
||||
#include <linux/delay.h>
|
||||
#include <linux/elf.h>
|
||||
#include <linux/elfcore.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/hardirq.h>
|
||||
@@ -30,6 +31,27 @@
|
||||
|
||||
int in_crash_kexec;
|
||||
|
||||
/*
|
||||
* This is used to VMCLEAR all VMCSs loaded on the
|
||||
* processor. And when loading kvm_intel module, the
|
||||
* callback function pointer will be assigned.
|
||||
*
|
||||
* protected by rcu.
|
||||
*/
|
||||
crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
|
||||
EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
|
||||
|
||||
static inline void cpu_crash_vmclear_loaded_vmcss(void)
|
||||
{
|
||||
crash_vmclear_fn *do_vmclear_operation = NULL;
|
||||
|
||||
rcu_read_lock();
|
||||
do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
|
||||
if (do_vmclear_operation)
|
||||
do_vmclear_operation();
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
|
||||
|
||||
static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
|
||||
@@ -46,6 +68,11 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
|
||||
#endif
|
||||
crash_save_cpu(regs, cpu);
|
||||
|
||||
/*
|
||||
* VMCLEAR VMCSs loaded on all cpus if needed.
|
||||
*/
|
||||
cpu_crash_vmclear_loaded_vmcss();
|
||||
|
||||
/* Disable VMX or SVM if needed.
|
||||
*
|
||||
* We need to disable virtualization on all CPUs.
|
||||
@@ -88,6 +115,11 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
|
||||
|
||||
kdump_nmi_shootdown_cpus();
|
||||
|
||||
/*
|
||||
* VMCLEAR VMCSs loaded on this cpu if needed.
|
||||
*/
|
||||
cpu_crash_vmclear_loaded_vmcss();
|
||||
|
||||
/* Booting kdump kernel with VMX or SVM enabled won't work,
|
||||
* because (among other limitations) we can't disable paging
|
||||
* with the virt flags.
|
||||
|
@@ -739,30 +739,11 @@ ENTRY(ptregs_##name) ; \
|
||||
ENDPROC(ptregs_##name)
|
||||
|
||||
PTREGSCALL1(iopl)
|
||||
PTREGSCALL0(fork)
|
||||
PTREGSCALL0(vfork)
|
||||
PTREGSCALL2(sigaltstack)
|
||||
PTREGSCALL0(sigreturn)
|
||||
PTREGSCALL0(rt_sigreturn)
|
||||
PTREGSCALL2(vm86)
|
||||
PTREGSCALL1(vm86old)
|
||||
|
||||
/* Clone is an oddball. The 4th arg is in %edi */
|
||||
ENTRY(ptregs_clone)
|
||||
CFI_STARTPROC
|
||||
leal 4(%esp),%eax
|
||||
pushl_cfi %eax
|
||||
pushl_cfi PT_EDI(%eax)
|
||||
movl PT_EDX(%eax),%ecx
|
||||
movl PT_ECX(%eax),%edx
|
||||
movl PT_EBX(%eax),%eax
|
||||
call sys_clone
|
||||
addl $8,%esp
|
||||
CFI_ADJUST_CFA_OFFSET -8
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(ptregs_clone)
|
||||
|
||||
.macro FIXUP_ESPFIX_STACK
|
||||
/*
|
||||
* Switch back for ESPFIX stack to the normal zerobased stack
|
||||
@@ -1084,7 +1065,6 @@ ENTRY(xen_failsafe_callback)
|
||||
lea 16(%esp),%esp
|
||||
CFI_ADJUST_CFA_OFFSET -16
|
||||
jz 5f
|
||||
addl $16,%esp
|
||||
jmp iret_exc
|
||||
5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */
|
||||
SAVE_ALL
|
||||
|
@@ -56,7 +56,7 @@
|
||||
#include <asm/ftrace.h>
|
||||
#include <asm/percpu.h>
|
||||
#include <asm/asm.h>
|
||||
#include <asm/rcu.h>
|
||||
#include <asm/context_tracking.h>
|
||||
#include <asm/smap.h>
|
||||
#include <linux/err.h>
|
||||
|
||||
@@ -845,10 +845,25 @@ ENTRY(\label)
|
||||
END(\label)
|
||||
.endm
|
||||
|
||||
PTREGSCALL stub_clone, sys_clone, %r8
|
||||
PTREGSCALL stub_fork, sys_fork, %rdi
|
||||
PTREGSCALL stub_vfork, sys_vfork, %rdi
|
||||
PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
|
||||
.macro FORK_LIKE func
|
||||
ENTRY(stub_\func)
|
||||
CFI_STARTPROC
|
||||
popq %r11 /* save return address */
|
||||
PARTIAL_FRAME 0
|
||||
SAVE_REST
|
||||
pushq %r11 /* put it back on stack */
|
||||
FIXUP_TOP_OF_STACK %r11, 8
|
||||
DEFAULT_FRAME 0 8 /* offset 8: return address */
|
||||
call sys_\func
|
||||
RESTORE_TOP_OF_STACK %r11, 8
|
||||
ret $REST_SKIP /* pop extended registers */
|
||||
CFI_ENDPROC
|
||||
END(stub_\func)
|
||||
.endm
|
||||
|
||||
FORK_LIKE clone
|
||||
FORK_LIKE fork
|
||||
FORK_LIKE vfork
|
||||
PTREGSCALL stub_iopl, sys_iopl, %rsi
|
||||
|
||||
ENTRY(ptregscall_common)
|
||||
@@ -897,8 +912,6 @@ ENTRY(stub_rt_sigreturn)
|
||||
END(stub_rt_sigreturn)
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
PTREGSCALL stub_x32_sigaltstack, sys32_sigaltstack, %rdx
|
||||
|
||||
ENTRY(stub_x32_rt_sigreturn)
|
||||
CFI_STARTPROC
|
||||
addq $8, %rsp
|
||||
@@ -995,8 +1008,8 @@ END(interrupt)
|
||||
*/
|
||||
.p2align CONFIG_X86_L1_CACHE_SHIFT
|
||||
common_interrupt:
|
||||
ASM_CLAC
|
||||
XCPT_FRAME
|
||||
ASM_CLAC
|
||||
addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
|
||||
interrupt do_IRQ
|
||||
/* 0(%rsp): old_rsp-ARGOFFSET */
|
||||
@@ -1135,8 +1148,8 @@ END(common_interrupt)
|
||||
*/
|
||||
.macro apicinterrupt num sym do_sym
|
||||
ENTRY(\sym)
|
||||
ASM_CLAC
|
||||
INTR_FRAME
|
||||
ASM_CLAC
|
||||
pushq_cfi $~(\num)
|
||||
.Lcommon_\sym:
|
||||
interrupt \do_sym
|
||||
@@ -1190,8 +1203,8 @@ apicinterrupt IRQ_WORK_VECTOR \
|
||||
*/
|
||||
.macro zeroentry sym do_sym
|
||||
ENTRY(\sym)
|
||||
ASM_CLAC
|
||||
INTR_FRAME
|
||||
ASM_CLAC
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
|
||||
subq $ORIG_RAX-R15, %rsp
|
||||
@@ -1208,8 +1221,8 @@ END(\sym)
|
||||
|
||||
.macro paranoidzeroentry sym do_sym
|
||||
ENTRY(\sym)
|
||||
ASM_CLAC
|
||||
INTR_FRAME
|
||||
ASM_CLAC
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
|
||||
subq $ORIG_RAX-R15, %rsp
|
||||
@@ -1227,8 +1240,8 @@ END(\sym)
|
||||
#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
|
||||
.macro paranoidzeroentry_ist sym do_sym ist
|
||||
ENTRY(\sym)
|
||||
ASM_CLAC
|
||||
INTR_FRAME
|
||||
ASM_CLAC
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
|
||||
subq $ORIG_RAX-R15, %rsp
|
||||
@@ -1247,8 +1260,8 @@ END(\sym)
|
||||
|
||||
.macro errorentry sym do_sym
|
||||
ENTRY(\sym)
|
||||
ASM_CLAC
|
||||
XCPT_FRAME
|
||||
ASM_CLAC
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
subq $ORIG_RAX-R15, %rsp
|
||||
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
|
||||
@@ -1266,8 +1279,8 @@ END(\sym)
|
||||
/* error code is on the stack already */
|
||||
.macro paranoiderrorentry sym do_sym
|
||||
ENTRY(\sym)
|
||||
ASM_CLAC
|
||||
XCPT_FRAME
|
||||
ASM_CLAC
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
subq $ORIG_RAX-R15, %rsp
|
||||
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
|
||||
@@ -1699,9 +1712,10 @@ nested_nmi:
|
||||
|
||||
1:
|
||||
/* Set up the interrupted NMIs stack to jump to repeat_nmi */
|
||||
leaq -6*8(%rsp), %rdx
|
||||
leaq -1*8(%rsp), %rdx
|
||||
movq %rdx, %rsp
|
||||
CFI_ADJUST_CFA_OFFSET 6*8
|
||||
CFI_ADJUST_CFA_OFFSET 1*8
|
||||
leaq -10*8(%rsp), %rdx
|
||||
pushq_cfi $__KERNEL_DS
|
||||
pushq_cfi %rdx
|
||||
pushfq_cfi
|
||||
@@ -1709,8 +1723,8 @@ nested_nmi:
|
||||
pushq_cfi $repeat_nmi
|
||||
|
||||
/* Put stack back */
|
||||
addq $(11*8), %rsp
|
||||
CFI_ADJUST_CFA_OFFSET -11*8
|
||||
addq $(6*8), %rsp
|
||||
CFI_ADJUST_CFA_OFFSET -6*8
|
||||
|
||||
nested_nmi_out:
|
||||
popq_cfi %rdx
|
||||
@@ -1736,18 +1750,18 @@ first_nmi:
|
||||
* +-------------------------+
|
||||
* | NMI executing variable |
|
||||
* +-------------------------+
|
||||
* | Saved SS |
|
||||
* | Saved Return RSP |
|
||||
* | Saved RFLAGS |
|
||||
* | Saved CS |
|
||||
* | Saved RIP |
|
||||
* +-------------------------+
|
||||
* | copied SS |
|
||||
* | copied Return RSP |
|
||||
* | copied RFLAGS |
|
||||
* | copied CS |
|
||||
* | copied RIP |
|
||||
* +-------------------------+
|
||||
* | Saved SS |
|
||||
* | Saved Return RSP |
|
||||
* | Saved RFLAGS |
|
||||
* | Saved CS |
|
||||
* | Saved RIP |
|
||||
* +-------------------------+
|
||||
* | pt_regs |
|
||||
* +-------------------------+
|
||||
*
|
||||
@@ -1763,9 +1777,14 @@ first_nmi:
|
||||
/* Set the NMI executing variable on the stack. */
|
||||
pushq_cfi $1
|
||||
|
||||
/*
|
||||
* Leave room for the "copied" frame
|
||||
*/
|
||||
subq $(5*8), %rsp
|
||||
|
||||
/* Copy the stack frame to the Saved frame */
|
||||
.rept 5
|
||||
pushq_cfi 6*8(%rsp)
|
||||
pushq_cfi 11*8(%rsp)
|
||||
.endr
|
||||
CFI_DEF_CFA_OFFSET SS+8-RIP
|
||||
|
||||
@@ -1786,12 +1805,15 @@ repeat_nmi:
|
||||
* is benign for the non-repeat case, where 1 was pushed just above
|
||||
* to this very stack slot).
|
||||
*/
|
||||
movq $1, 5*8(%rsp)
|
||||
movq $1, 10*8(%rsp)
|
||||
|
||||
/* Make another copy, this one may be modified by nested NMIs */
|
||||
addq $(10*8), %rsp
|
||||
CFI_ADJUST_CFA_OFFSET -10*8
|
||||
.rept 5
|
||||
pushq_cfi 4*8(%rsp)
|
||||
pushq_cfi -6*8(%rsp)
|
||||
.endr
|
||||
subq $(5*8), %rsp
|
||||
CFI_DEF_CFA_OFFSET SS+8-RIP
|
||||
end_repeat_nmi:
|
||||
|
||||
@@ -1842,8 +1864,12 @@ nmi_swapgs:
|
||||
SWAPGS_UNSAFE_STACK
|
||||
nmi_restore:
|
||||
RESTORE_ALL 8
|
||||
|
||||
/* Pop the extra iret frame */
|
||||
addq $(5*8), %rsp
|
||||
|
||||
/* Clear the NMI executing stack variable */
|
||||
movq $0, 10*8(%rsp)
|
||||
movq $0, 5*8(%rsp)
|
||||
jmp irq_return
|
||||
CFI_ENDPROC
|
||||
END(nmi)
|
||||
|
@@ -266,6 +266,19 @@ num_subarch_entries = (. - subarch_entries) / 4
|
||||
jmp default_entry
|
||||
#endif /* CONFIG_PARAVIRT */
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
/*
|
||||
* Boot CPU0 entry point. It's called from play_dead(). Everything has been set
|
||||
* up already except stack. We just set up stack here. Then call
|
||||
* start_secondary().
|
||||
*/
|
||||
ENTRY(start_cpu0)
|
||||
movl stack_start, %ecx
|
||||
movl %ecx, %esp
|
||||
jmp *(initial_code)
|
||||
ENDPROC(start_cpu0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Non-boot CPU entry point; entered from trampoline.S
|
||||
* We can't lgdt here, because lgdt itself uses a data segment, but
|
||||
@@ -292,8 +305,8 @@ default_entry:
|
||||
* be using the global pages.
|
||||
*
|
||||
* NOTE! If we are on a 486 we may have no cr4 at all!
|
||||
* Specifically, cr4 exists if and only if CPUID exists,
|
||||
* which in turn exists if and only if EFLAGS.ID exists.
|
||||
* Specifically, cr4 exists if and only if CPUID exists
|
||||
* and has flags other than the FPU flag set.
|
||||
*/
|
||||
movl $X86_EFLAGS_ID,%ecx
|
||||
pushl %ecx
|
||||
@@ -308,6 +321,11 @@ default_entry:
|
||||
testl %ecx,%eax
|
||||
jz 6f # No ID flag = no CPUID = no CR4
|
||||
|
||||
movl $1,%eax
|
||||
cpuid
|
||||
andl $~1,%edx # Ignore CPUID.FPU
|
||||
jz 6f # No flags or only CPUID.FPU = no CR4
|
||||
|
||||
movl pa(mmu_cr4_features),%eax
|
||||
movl %eax,%cr4
|
||||
|
||||
|
@@ -252,6 +252,22 @@ ENTRY(secondary_startup_64)
|
||||
pushq %rax # target address in negative space
|
||||
lretq
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
/*
|
||||
* Boot CPU0 entry point. It's called from play_dead(). Everything has been set
|
||||
* up already except stack. We just set up stack here. Then call
|
||||
* start_secondary().
|
||||
*/
|
||||
ENTRY(start_cpu0)
|
||||
movq stack_start(%rip),%rsp
|
||||
movq initial_code(%rip),%rax
|
||||
pushq $0 # fake return address to stop unwinder
|
||||
pushq $__KERNEL_CS # set correct cs
|
||||
pushq %rax # target address in negative space
|
||||
lretq
|
||||
ENDPROC(start_cpu0)
|
||||
#endif
|
||||
|
||||
/* SMP bootup changes these two */
|
||||
__REFDATA
|
||||
.align 8
|
||||
|
@@ -434,7 +434,7 @@ void hpet_msi_unmask(struct irq_data *data)
|
||||
|
||||
/* unmask it */
|
||||
cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
|
||||
cfg |= HPET_TN_FSB;
|
||||
cfg |= HPET_TN_ENABLE | HPET_TN_FSB;
|
||||
hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
|
||||
}
|
||||
|
||||
@@ -445,7 +445,7 @@ void hpet_msi_mask(struct irq_data *data)
|
||||
|
||||
/* mask it */
|
||||
cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
|
||||
cfg &= ~HPET_TN_FSB;
|
||||
cfg &= ~(HPET_TN_ENABLE | HPET_TN_FSB);
|
||||
hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
|
||||
}
|
||||
|
||||
|
@@ -175,7 +175,11 @@ void __cpuinit fpu_init(void)
|
||||
cr0 |= X86_CR0_EM;
|
||||
write_cr0(cr0);
|
||||
|
||||
if (!smp_processor_id())
|
||||
/*
|
||||
* init_thread_xstate is only called once to avoid overriding
|
||||
* xstate_size during boot time or during CPU hotplug.
|
||||
*/
|
||||
if (xstate_size == 0)
|
||||
init_thread_xstate();
|
||||
|
||||
mxcsr_feature_mask_init();
|
||||
|
@@ -42,39 +42,6 @@
|
||||
* (these are usually mapped into the 0x30-0xff vector range)
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Note that on a 486, we don't want to do a SIGFPE on an irq13
|
||||
* as the irq is unreliable, and exception 16 works correctly
|
||||
* (ie as explained in the intel literature). On a 386, you
|
||||
* can't use exception 16 due to bad IBM design, so we have to
|
||||
* rely on the less exact irq13.
|
||||
*
|
||||
* Careful.. Not only is IRQ13 unreliable, but it is also
|
||||
* leads to races. IBM designers who came up with it should
|
||||
* be shot.
|
||||
*/
|
||||
|
||||
static irqreturn_t math_error_irq(int cpl, void *dev_id)
|
||||
{
|
||||
outb(0, 0xF0);
|
||||
if (ignore_fpu_irq || !boot_cpu_data.hard_math)
|
||||
return IRQ_NONE;
|
||||
math_error(get_irq_regs(), 0, X86_TRAP_MF);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
/*
|
||||
* New motherboards sometimes make IRQ 13 be a PCI interrupt,
|
||||
* so allow interrupt sharing.
|
||||
*/
|
||||
static struct irqaction fpu_irq = {
|
||||
.handler = math_error_irq,
|
||||
.name = "fpu",
|
||||
.flags = IRQF_NO_THREAD,
|
||||
};
|
||||
#endif
|
||||
|
||||
/*
|
||||
* IRQ2 is cascade interrupt to second interrupt controller
|
||||
*/
|
||||
@@ -242,13 +209,6 @@ void __init native_init_IRQ(void)
|
||||
setup_irq(2, &irq2);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* External FPU? Set up irq13 if so, for
|
||||
* original braindamaged IBM FERR coupling.
|
||||
*/
|
||||
if (boot_cpu_data.hard_math && !cpu_has_fpu)
|
||||
setup_irq(FPU_IRQ, &fpu_irq);
|
||||
|
||||
irq_ctx_init(smp_processor_id());
|
||||
#endif
|
||||
}
|
||||
|
@@ -42,6 +42,8 @@
|
||||
#include <asm/apic.h>
|
||||
#include <asm/apicdef.h>
|
||||
#include <asm/hypervisor.h>
|
||||
#include <asm/kvm_guest.h>
|
||||
#include <asm/context_tracking.h>
|
||||
|
||||
static int kvmapf = 1;
|
||||
|
||||
@@ -62,6 +64,15 @@ static int parse_no_stealacc(char *arg)
|
||||
|
||||
early_param("no-steal-acc", parse_no_stealacc);
|
||||
|
||||
static int kvmclock_vsyscall = 1;
|
||||
static int parse_no_kvmclock_vsyscall(char *arg)
|
||||
{
|
||||
kvmclock_vsyscall = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
|
||||
|
||||
static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
|
||||
static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
|
||||
static int has_steal_clock = 0;
|
||||
@@ -110,11 +121,8 @@ void kvm_async_pf_task_wait(u32 token)
|
||||
struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
|
||||
struct kvm_task_sleep_node n, *e;
|
||||
DEFINE_WAIT(wait);
|
||||
int cpu, idle;
|
||||
|
||||
cpu = get_cpu();
|
||||
idle = idle_cpu(cpu);
|
||||
put_cpu();
|
||||
rcu_irq_enter();
|
||||
|
||||
spin_lock(&b->lock);
|
||||
e = _find_apf_task(b, token);
|
||||
@@ -123,12 +131,14 @@ void kvm_async_pf_task_wait(u32 token)
|
||||
hlist_del(&e->link);
|
||||
kfree(e);
|
||||
spin_unlock(&b->lock);
|
||||
|
||||
rcu_irq_exit();
|
||||
return;
|
||||
}
|
||||
|
||||
n.token = token;
|
||||
n.cpu = smp_processor_id();
|
||||
n.halted = idle || preempt_count() > 1;
|
||||
n.halted = is_idle_task(current) || preempt_count() > 1;
|
||||
init_waitqueue_head(&n.wq);
|
||||
hlist_add_head(&n.link, &b->list);
|
||||
spin_unlock(&b->lock);
|
||||
@@ -147,13 +157,16 @@ void kvm_async_pf_task_wait(u32 token)
|
||||
/*
|
||||
* We cannot reschedule. So halt.
|
||||
*/
|
||||
rcu_irq_exit();
|
||||
native_safe_halt();
|
||||
rcu_irq_enter();
|
||||
local_irq_disable();
|
||||
}
|
||||
}
|
||||
if (!n.halted)
|
||||
finish_wait(&n.wq, &wait);
|
||||
|
||||
rcu_irq_exit();
|
||||
return;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait);
|
||||
@@ -247,10 +260,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
break;
|
||||
case KVM_PV_REASON_PAGE_NOT_PRESENT:
|
||||
/* page is swapped out by the host. */
|
||||
rcu_irq_enter();
|
||||
exception_enter(regs);
|
||||
exit_idle();
|
||||
kvm_async_pf_task_wait((u32)read_cr2());
|
||||
rcu_irq_exit();
|
||||
exception_exit(regs);
|
||||
break;
|
||||
case KVM_PV_REASON_PAGE_READY:
|
||||
rcu_irq_enter();
|
||||
@@ -471,6 +484,9 @@ void __init kvm_guest_init(void)
|
||||
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
|
||||
apic_set_eoi_write(kvm_guest_apic_eoi_write);
|
||||
|
||||
if (kvmclock_vsyscall)
|
||||
kvm_setup_vsyscall_timeinfo();
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
|
||||
register_cpu_notifier(&kvm_cpu_notifier);
|
||||
|
@@ -23,6 +23,7 @@
|
||||
#include <asm/apic.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/memblock.h>
|
||||
|
||||
#include <asm/x86_init.h>
|
||||
#include <asm/reboot.h>
|
||||
@@ -39,7 +40,7 @@ static int parse_no_kvmclock(char *arg)
|
||||
early_param("no-kvmclock", parse_no_kvmclock);
|
||||
|
||||
/* The hypervisor will put information about time periodically here */
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock);
|
||||
static struct pvclock_vsyscall_time_info *hv_clock;
|
||||
static struct pvclock_wall_clock wall_clock;
|
||||
|
||||
/*
|
||||
@@ -52,15 +53,20 @@ static unsigned long kvm_get_wallclock(void)
|
||||
struct pvclock_vcpu_time_info *vcpu_time;
|
||||
struct timespec ts;
|
||||
int low, high;
|
||||
int cpu;
|
||||
|
||||
low = (int)__pa_symbol(&wall_clock);
|
||||
high = ((u64)__pa_symbol(&wall_clock) >> 32);
|
||||
|
||||
native_write_msr(msr_kvm_wall_clock, low, high);
|
||||
|
||||
vcpu_time = &get_cpu_var(hv_clock);
|
||||
preempt_disable();
|
||||
cpu = smp_processor_id();
|
||||
|
||||
vcpu_time = &hv_clock[cpu].pvti;
|
||||
pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
|
||||
put_cpu_var(hv_clock);
|
||||
|
||||
preempt_enable();
|
||||
|
||||
return ts.tv_sec;
|
||||
}
|
||||
@@ -74,9 +80,11 @@ static cycle_t kvm_clock_read(void)
|
||||
{
|
||||
struct pvclock_vcpu_time_info *src;
|
||||
cycle_t ret;
|
||||
int cpu;
|
||||
|
||||
preempt_disable_notrace();
|
||||
src = &__get_cpu_var(hv_clock);
|
||||
cpu = smp_processor_id();
|
||||
src = &hv_clock[cpu].pvti;
|
||||
ret = pvclock_clocksource_read(src);
|
||||
preempt_enable_notrace();
|
||||
return ret;
|
||||
@@ -99,8 +107,15 @@ static cycle_t kvm_clock_get_cycles(struct clocksource *cs)
|
||||
static unsigned long kvm_get_tsc_khz(void)
|
||||
{
|
||||
struct pvclock_vcpu_time_info *src;
|
||||
src = &per_cpu(hv_clock, 0);
|
||||
return pvclock_tsc_khz(src);
|
||||
int cpu;
|
||||
unsigned long tsc_khz;
|
||||
|
||||
preempt_disable();
|
||||
cpu = smp_processor_id();
|
||||
src = &hv_clock[cpu].pvti;
|
||||
tsc_khz = pvclock_tsc_khz(src);
|
||||
preempt_enable();
|
||||
return tsc_khz;
|
||||
}
|
||||
|
||||
static void kvm_get_preset_lpj(void)
|
||||
@@ -119,10 +134,14 @@ bool kvm_check_and_clear_guest_paused(void)
|
||||
{
|
||||
bool ret = false;
|
||||
struct pvclock_vcpu_time_info *src;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
src = &__get_cpu_var(hv_clock);
|
||||
if (!hv_clock)
|
||||
return ret;
|
||||
|
||||
src = &hv_clock[cpu].pvti;
|
||||
if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) {
|
||||
__this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED);
|
||||
src->flags &= ~PVCLOCK_GUEST_STOPPED;
|
||||
ret = true;
|
||||
}
|
||||
|
||||
@@ -141,9 +160,10 @@ int kvm_register_clock(char *txt)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
int low, high, ret;
|
||||
struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti;
|
||||
|
||||
low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
|
||||
high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
|
||||
low = (int)__pa(src) | 1;
|
||||
high = ((u64)__pa(src) >> 32);
|
||||
ret = native_write_msr_safe(msr_kvm_system_time, low, high);
|
||||
printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
|
||||
cpu, high, low, txt);
|
||||
@@ -197,6 +217,8 @@ static void kvm_shutdown(void)
|
||||
|
||||
void __init kvmclock_init(void)
|
||||
{
|
||||
unsigned long mem;
|
||||
|
||||
if (!kvm_para_available())
|
||||
return;
|
||||
|
||||
@@ -209,8 +231,18 @@ void __init kvmclock_init(void)
|
||||
printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
|
||||
msr_kvm_system_time, msr_kvm_wall_clock);
|
||||
|
||||
if (kvm_register_clock("boot clock"))
|
||||
mem = memblock_alloc(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS,
|
||||
PAGE_SIZE);
|
||||
if (!mem)
|
||||
return;
|
||||
hv_clock = __va(mem);
|
||||
|
||||
if (kvm_register_clock("boot clock")) {
|
||||
hv_clock = NULL;
|
||||
memblock_free(mem,
|
||||
sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
|
||||
return;
|
||||
}
|
||||
pv_time_ops.sched_clock = kvm_clock_read;
|
||||
x86_platform.calibrate_tsc = kvm_get_tsc_khz;
|
||||
x86_platform.get_wallclock = kvm_get_wallclock;
|
||||
@@ -233,3 +265,37 @@ void __init kvmclock_init(void)
|
||||
if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
|
||||
pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
|
||||
}
|
||||
|
||||
int __init kvm_setup_vsyscall_timeinfo(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
int cpu;
|
||||
int ret;
|
||||
u8 flags;
|
||||
struct pvclock_vcpu_time_info *vcpu_time;
|
||||
unsigned int size;
|
||||
|
||||
size = sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS;
|
||||
|
||||
preempt_disable();
|
||||
cpu = smp_processor_id();
|
||||
|
||||
vcpu_time = &hv_clock[cpu].pvti;
|
||||
flags = pvclock_read_flags(vcpu_time);
|
||||
|
||||
if (!(flags & PVCLOCK_TSC_STABLE_BIT)) {
|
||||
preempt_enable();
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((ret = pvclock_init_vsyscall(hv_clock, size))) {
|
||||
preempt_enable();
|
||||
return ret;
|
||||
}
|
||||
|
||||
preempt_enable();
|
||||
|
||||
kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
@@ -8,8 +8,8 @@
|
||||
* Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
|
||||
*
|
||||
* Maintainers:
|
||||
* Andreas Herrmann <andreas.herrmann3@amd.com>
|
||||
* Borislav Petkov <borislav.petkov@amd.com>
|
||||
* Andreas Herrmann <herrmann.der.user@googlemail.com>
|
||||
* Borislav Petkov <bp@alien8.de>
|
||||
*
|
||||
* This driver allows to upgrade microcode on F10h AMD
|
||||
* CPUs and later.
|
||||
@@ -190,6 +190,7 @@ static unsigned int verify_patch_size(int cpu, u32 patch_size,
|
||||
#define F1XH_MPB_MAX_SIZE 2048
|
||||
#define F14H_MPB_MAX_SIZE 1824
|
||||
#define F15H_MPB_MAX_SIZE 4096
|
||||
#define F16H_MPB_MAX_SIZE 3458
|
||||
|
||||
switch (c->x86) {
|
||||
case 0x14:
|
||||
@@ -198,6 +199,9 @@ static unsigned int verify_patch_size(int cpu, u32 patch_size,
|
||||
case 0x15:
|
||||
max_size = F15H_MPB_MAX_SIZE;
|
||||
break;
|
||||
case 0x16:
|
||||
max_size = F16H_MPB_MAX_SIZE;
|
||||
break;
|
||||
default:
|
||||
max_size = F1XH_MPB_MAX_SIZE;
|
||||
break;
|
||||
|
@@ -265,7 +265,7 @@ rootfs_initcall(pci_iommu_init);
|
||||
#ifdef CONFIG_PCI
|
||||
/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
|
||||
|
||||
static __devinit void via_no_dac(struct pci_dev *dev)
|
||||
static void via_no_dac(struct pci_dev *dev)
|
||||
{
|
||||
if (forbid_dac == 0) {
|
||||
dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n");
|
||||
|
@@ -262,36 +262,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
||||
propagate_user_return_notify(prev_p, next_p);
|
||||
}
|
||||
|
||||
int sys_fork(struct pt_regs *regs)
|
||||
{
|
||||
return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is trivial, and on the face of it looks like it
|
||||
* could equally well be done in user mode.
|
||||
*
|
||||
* Not so, for quite unobvious reasons - register pressure.
|
||||
* In user mode vfork() cannot have a stack frame, and if
|
||||
* done by calling the "clone()" system call directly, you
|
||||
* do not have enough call-clobbered registers to hold all
|
||||
* the information you need.
|
||||
*/
|
||||
int sys_vfork(struct pt_regs *regs)
|
||||
{
|
||||
return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
long
|
||||
sys_clone(unsigned long clone_flags, unsigned long newsp,
|
||||
void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
|
||||
{
|
||||
if (!newsp)
|
||||
newsp = regs->sp;
|
||||
return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
|
||||
}
|
||||
|
||||
/*
|
||||
* Idle related variables and functions
|
||||
*/
|
||||
@@ -306,11 +276,6 @@ void (*pm_idle)(void);
|
||||
EXPORT_SYMBOL(pm_idle);
|
||||
#endif
|
||||
|
||||
static inline int hlt_use_halt(void)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_SMP
|
||||
static inline void play_dead(void)
|
||||
{
|
||||
@@ -410,28 +375,22 @@ void cpu_idle(void)
|
||||
*/
|
||||
void default_idle(void)
|
||||
{
|
||||
if (hlt_use_halt()) {
|
||||
trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
|
||||
trace_cpu_idle_rcuidle(1, smp_processor_id());
|
||||
current_thread_info()->status &= ~TS_POLLING;
|
||||
/*
|
||||
* TS_POLLING-cleared state must be visible before we
|
||||
* test NEED_RESCHED:
|
||||
*/
|
||||
smp_mb();
|
||||
trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
|
||||
trace_cpu_idle_rcuidle(1, smp_processor_id());
|
||||
current_thread_info()->status &= ~TS_POLLING;
|
||||
/*
|
||||
* TS_POLLING-cleared state must be visible before we
|
||||
* test NEED_RESCHED:
|
||||
*/
|
||||
smp_mb();
|
||||
|
||||
if (!need_resched())
|
||||
safe_halt(); /* enables interrupts racelessly */
|
||||
else
|
||||
local_irq_enable();
|
||||
current_thread_info()->status |= TS_POLLING;
|
||||
trace_power_end_rcuidle(smp_processor_id());
|
||||
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
|
||||
} else {
|
||||
if (!need_resched())
|
||||
safe_halt(); /* enables interrupts racelessly */
|
||||
else
|
||||
local_irq_enable();
|
||||
/* loop is done by the caller */
|
||||
cpu_relax();
|
||||
}
|
||||
current_thread_info()->status |= TS_POLLING;
|
||||
trace_power_end_rcuidle(smp_processor_id());
|
||||
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
|
||||
}
|
||||
#ifdef CONFIG_APM_MODULE
|
||||
EXPORT_SYMBOL(default_idle);
|
||||
|
@@ -128,8 +128,7 @@ void release_thread(struct task_struct *dead_task)
|
||||
}
|
||||
|
||||
int copy_thread(unsigned long clone_flags, unsigned long sp,
|
||||
unsigned long arg,
|
||||
struct task_struct *p, struct pt_regs *regs)
|
||||
unsigned long arg, struct task_struct *p)
|
||||
{
|
||||
struct pt_regs *childregs = task_pt_regs(p);
|
||||
struct task_struct *tsk;
|
||||
@@ -138,7 +137,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
|
||||
p->thread.sp = (unsigned long) childregs;
|
||||
p->thread.sp0 = (unsigned long) (childregs+1);
|
||||
|
||||
if (unlikely(!regs)) {
|
||||
if (unlikely(p->flags & PF_KTHREAD)) {
|
||||
/* kernel thread */
|
||||
memset(childregs, 0, sizeof(struct pt_regs));
|
||||
p->thread.ip = (unsigned long) ret_from_kernel_thread;
|
||||
@@ -156,12 +155,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
|
||||
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
|
||||
return 0;
|
||||
}
|
||||
*childregs = *regs;
|
||||
*childregs = *current_pt_regs();
|
||||
childregs->ax = 0;
|
||||
childregs->sp = sp;
|
||||
if (sp)
|
||||
childregs->sp = sp;
|
||||
|
||||
p->thread.ip = (unsigned long) ret_from_fork;
|
||||
task_user_gs(p) = get_user_gs(regs);
|
||||
task_user_gs(p) = get_user_gs(current_pt_regs());
|
||||
|
||||
p->fpu_counter = 0;
|
||||
p->thread.io_bitmap_ptr = NULL;
|
||||
|
@@ -146,8 +146,7 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls)
|
||||
}
|
||||
|
||||
int copy_thread(unsigned long clone_flags, unsigned long sp,
|
||||
unsigned long arg,
|
||||
struct task_struct *p, struct pt_regs *regs)
|
||||
unsigned long arg, struct task_struct *p)
|
||||
{
|
||||
int err;
|
||||
struct pt_regs *childregs;
|
||||
@@ -169,7 +168,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
|
||||
savesegment(ds, p->thread.ds);
|
||||
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
|
||||
|
||||
if (unlikely(!regs)) {
|
||||
if (unlikely(p->flags & PF_KTHREAD)) {
|
||||
/* kernel thread */
|
||||
memset(childregs, 0, sizeof(struct pt_regs));
|
||||
childregs->sp = (unsigned long)childregs;
|
||||
@@ -181,10 +180,11 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
|
||||
childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
|
||||
return 0;
|
||||
}
|
||||
*childregs = *regs;
|
||||
*childregs = *current_pt_regs();
|
||||
|
||||
childregs->ax = 0;
|
||||
childregs->sp = sp;
|
||||
if (sp)
|
||||
childregs->sp = sp;
|
||||
|
||||
err = -ENOMEM;
|
||||
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
|
||||
|
@@ -22,6 +22,8 @@
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/context_tracking.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/pgtable.h>
|
||||
@@ -166,6 +168,35 @@ static inline bool invalid_selector(u16 value)
|
||||
|
||||
#define FLAG_MASK FLAG_MASK_32
|
||||
|
||||
/*
|
||||
* X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
|
||||
* when it traps. The previous stack will be directly underneath the saved
|
||||
* registers, and 'sp/ss' won't even have been saved. Thus the '®s->sp'.
|
||||
*
|
||||
* Now, if the stack is empty, '®s->sp' is out of range. In this
|
||||
* case we try to take the previous stack. To always return a non-null
|
||||
* stack pointer we fall back to regs as stack if no previous stack
|
||||
* exists.
|
||||
*
|
||||
* This is valid only for kernel mode traps.
|
||||
*/
|
||||
unsigned long kernel_stack_pointer(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1);
|
||||
unsigned long sp = (unsigned long)®s->sp;
|
||||
struct thread_info *tinfo;
|
||||
|
||||
if (context == (sp & ~(THREAD_SIZE - 1)))
|
||||
return sp;
|
||||
|
||||
tinfo = (struct thread_info *)context;
|
||||
if (tinfo->previous_esp)
|
||||
return tinfo->previous_esp;
|
||||
|
||||
return (unsigned long)regs;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kernel_stack_pointer);
|
||||
|
||||
static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
|
||||
{
|
||||
BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
|
||||
@@ -1461,7 +1492,7 @@ long syscall_trace_enter(struct pt_regs *regs)
|
||||
{
|
||||
long ret = 0;
|
||||
|
||||
rcu_user_exit();
|
||||
user_exit();
|
||||
|
||||
/*
|
||||
* If we stepped into a sysenter/syscall insn, it trapped in
|
||||
@@ -1511,6 +1542,13 @@ void syscall_trace_leave(struct pt_regs *regs)
|
||||
{
|
||||
bool step;
|
||||
|
||||
/*
|
||||
* We may come here right after calling schedule_user()
|
||||
* or do_notify_resume(), in which case we can be in RCU
|
||||
* user mode.
|
||||
*/
|
||||
user_exit();
|
||||
|
||||
audit_syscall_exit(regs);
|
||||
|
||||
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
|
||||
@@ -1527,5 +1565,5 @@ void syscall_trace_leave(struct pt_regs *regs)
|
||||
if (step || test_thread_flag(TIF_SYSCALL_TRACE))
|
||||
tracehook_report_syscall_exit(regs, step);
|
||||
|
||||
rcu_user_enter();
|
||||
user_enter();
|
||||
}
|
||||
|
@@ -17,23 +17,13 @@
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/pvclock.h>
|
||||
|
||||
/*
|
||||
* These are perodically updated
|
||||
* xen: magic shared_info page
|
||||
* kvm: gpa registered via msr
|
||||
* and then copied here.
|
||||
*/
|
||||
struct pvclock_shadow_time {
|
||||
u64 tsc_timestamp; /* TSC at last update of time vals. */
|
||||
u64 system_timestamp; /* Time, in nanosecs, since boot. */
|
||||
u32 tsc_to_nsec_mul;
|
||||
int tsc_shift;
|
||||
u32 version;
|
||||
u8 flags;
|
||||
};
|
||||
|
||||
static u8 valid_flags __read_mostly = 0;
|
||||
|
||||
void pvclock_set_flags(u8 flags)
|
||||
@@ -41,34 +31,6 @@ void pvclock_set_flags(u8 flags)
|
||||
valid_flags = flags;
|
||||
}
|
||||
|
||||
static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
|
||||
{
|
||||
u64 delta = native_read_tsc() - shadow->tsc_timestamp;
|
||||
return pvclock_scale_delta(delta, shadow->tsc_to_nsec_mul,
|
||||
shadow->tsc_shift);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reads a consistent set of time-base values from hypervisor,
|
||||
* into a shadow data area.
|
||||
*/
|
||||
static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
|
||||
struct pvclock_vcpu_time_info *src)
|
||||
{
|
||||
do {
|
||||
dst->version = src->version;
|
||||
rmb(); /* fetch version before data */
|
||||
dst->tsc_timestamp = src->tsc_timestamp;
|
||||
dst->system_timestamp = src->system_time;
|
||||
dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
|
||||
dst->tsc_shift = src->tsc_shift;
|
||||
dst->flags = src->flags;
|
||||
rmb(); /* test version after fetching data */
|
||||
} while ((src->version & 1) || (dst->version != src->version));
|
||||
|
||||
return dst->version;
|
||||
}
|
||||
|
||||
unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
|
||||
{
|
||||
u64 pv_tsc_khz = 1000000ULL << 32;
|
||||
@@ -88,23 +50,32 @@ void pvclock_resume(void)
|
||||
atomic64_set(&last_value, 0);
|
||||
}
|
||||
|
||||
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
|
||||
u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
|
||||
{
|
||||
struct pvclock_shadow_time shadow;
|
||||
unsigned version;
|
||||
cycle_t ret, offset;
|
||||
u64 last;
|
||||
cycle_t ret;
|
||||
u8 flags;
|
||||
|
||||
do {
|
||||
version = pvclock_get_time_values(&shadow, src);
|
||||
barrier();
|
||||
offset = pvclock_get_nsec_offset(&shadow);
|
||||
ret = shadow.system_timestamp + offset;
|
||||
barrier();
|
||||
} while (version != src->version);
|
||||
version = __pvclock_read_cycles(src, &ret, &flags);
|
||||
} while ((src->version & 1) || version != src->version);
|
||||
|
||||
return flags & valid_flags;
|
||||
}
|
||||
|
||||
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
|
||||
{
|
||||
unsigned version;
|
||||
cycle_t ret;
|
||||
u64 last;
|
||||
u8 flags;
|
||||
|
||||
do {
|
||||
version = __pvclock_read_cycles(src, &ret, &flags);
|
||||
} while ((src->version & 1) || version != src->version);
|
||||
|
||||
if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) &&
|
||||
(shadow.flags & PVCLOCK_TSC_STABLE_BIT))
|
||||
(flags & PVCLOCK_TSC_STABLE_BIT))
|
||||
return ret;
|
||||
|
||||
/*
|
||||
@@ -156,3 +127,71 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
|
||||
|
||||
set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
|
||||
}
|
||||
|
||||
static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
|
||||
|
||||
static struct pvclock_vsyscall_time_info *
|
||||
pvclock_get_vsyscall_user_time_info(int cpu)
|
||||
{
|
||||
if (!pvclock_vdso_info) {
|
||||
BUG();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return &pvclock_vdso_info[cpu];
|
||||
}
|
||||
|
||||
struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
|
||||
{
|
||||
return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
|
||||
void *v)
|
||||
{
|
||||
struct task_migration_notifier *mn = v;
|
||||
struct pvclock_vsyscall_time_info *pvti;
|
||||
|
||||
pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
|
||||
|
||||
/* this is NULL when pvclock vsyscall is not initialized */
|
||||
if (unlikely(pvti == NULL))
|
||||
return NOTIFY_DONE;
|
||||
|
||||
pvti->migrate_count++;
|
||||
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
static struct notifier_block pvclock_migrate = {
|
||||
.notifier_call = pvclock_task_migrate,
|
||||
};
|
||||
|
||||
/*
|
||||
* Initialize the generic pvclock vsyscall state. This will allocate
|
||||
* a/some page(s) for the per-vcpu pvclock information, set up a
|
||||
* fixmap mapping for the page(s)
|
||||
*/
|
||||
|
||||
int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
|
||||
int size)
|
||||
{
|
||||
int idx;
|
||||
|
||||
WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
|
||||
|
||||
pvclock_vdso_info = i;
|
||||
|
||||
for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
|
||||
__set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
|
||||
__pa_symbol(i) + (idx*PAGE_SIZE),
|
||||
PAGE_KERNEL_VVAR);
|
||||
}
|
||||
|
||||
|
||||
register_task_migration_notifier(&pvclock_migrate);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
@@ -8,7 +8,7 @@
|
||||
|
||||
#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
|
||||
|
||||
static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
|
||||
static void quirk_intel_irqbalance(struct pci_dev *dev)
|
||||
{
|
||||
u8 config;
|
||||
u16 word;
|
||||
@@ -512,7 +512,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
|
||||
|
||||
#if defined(CONFIG_PCI) && defined(CONFIG_NUMA)
|
||||
/* Set correct numa_node information for AMD NB functions */
|
||||
static void __devinit quirk_amd_nb_node(struct pci_dev *dev)
|
||||
static void quirk_amd_nb_node(struct pci_dev *dev)
|
||||
{
|
||||
struct pci_dev *nb_ht;
|
||||
unsigned int devfn;
|
||||
|
@@ -195,12 +195,6 @@ void read_persistent_clock(struct timespec *ts)
|
||||
ts->tv_nsec = 0;
|
||||
}
|
||||
|
||||
unsigned long long native_read_tsc(void)
|
||||
{
|
||||
return __native_read_tsc();
|
||||
}
|
||||
EXPORT_SYMBOL(native_read_tsc);
|
||||
|
||||
|
||||
static struct resource rtc_resources[] = {
|
||||
[0] = {
|
||||
|
@@ -143,11 +143,7 @@ int default_check_phys_apicid_present(int phys_apicid)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_DEBUG_BOOT_PARAMS
|
||||
struct boot_params __initdata boot_params;
|
||||
#else
|
||||
struct boot_params boot_params;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Machine setup..
|
||||
@@ -614,6 +610,83 @@ static __init void reserve_ibft_region(void)
|
||||
|
||||
static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
|
||||
|
||||
static bool __init snb_gfx_workaround_needed(void)
|
||||
{
|
||||
#ifdef CONFIG_PCI
|
||||
int i;
|
||||
u16 vendor, devid;
|
||||
static const __initconst u16 snb_ids[] = {
|
||||
0x0102,
|
||||
0x0112,
|
||||
0x0122,
|
||||
0x0106,
|
||||
0x0116,
|
||||
0x0126,
|
||||
0x010a,
|
||||
};
|
||||
|
||||
/* Assume no if something weird is going on with PCI */
|
||||
if (!early_pci_allowed())
|
||||
return false;
|
||||
|
||||
vendor = read_pci_config_16(0, 2, 0, PCI_VENDOR_ID);
|
||||
if (vendor != 0x8086)
|
||||
return false;
|
||||
|
||||
devid = read_pci_config_16(0, 2, 0, PCI_DEVICE_ID);
|
||||
for (i = 0; i < ARRAY_SIZE(snb_ids); i++)
|
||||
if (devid == snb_ids[i])
|
||||
return true;
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sandy Bridge graphics has trouble with certain ranges, exclude
|
||||
* them from allocation.
|
||||
*/
|
||||
static void __init trim_snb_memory(void)
|
||||
{
|
||||
static const __initconst unsigned long bad_pages[] = {
|
||||
0x20050000,
|
||||
0x20110000,
|
||||
0x20130000,
|
||||
0x20138000,
|
||||
0x40004000,
|
||||
};
|
||||
int i;
|
||||
|
||||
if (!snb_gfx_workaround_needed())
|
||||
return;
|
||||
|
||||
printk(KERN_DEBUG "reserving inaccessible SNB gfx pages\n");
|
||||
|
||||
/*
|
||||
* Reserve all memory below the 1 MB mark that has not
|
||||
* already been reserved.
|
||||
*/
|
||||
memblock_reserve(0, 1<<20);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(bad_pages); i++) {
|
||||
if (memblock_reserve(bad_pages[i], PAGE_SIZE))
|
||||
printk(KERN_WARNING "failed to reserve 0x%08lx\n",
|
||||
bad_pages[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Here we put platform-specific memory range workarounds, i.e.
|
||||
* memory known to be corrupt or otherwise in need to be reserved on
|
||||
* specific platforms.
|
||||
*
|
||||
* If this gets used more widely it could use a real dispatch mechanism.
|
||||
*/
|
||||
static void __init trim_platform_memory_ranges(void)
|
||||
{
|
||||
trim_snb_memory();
|
||||
}
|
||||
|
||||
static void __init trim_bios_range(void)
|
||||
{
|
||||
/*
|
||||
@@ -634,6 +707,7 @@ static void __init trim_bios_range(void)
|
||||
* take them out.
|
||||
*/
|
||||
e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
|
||||
|
||||
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
|
||||
}
|
||||
|
||||
@@ -912,6 +986,8 @@ void __init setup_arch(char **cmdline_p)
|
||||
|
||||
setup_real_mode();
|
||||
|
||||
trim_platform_memory_ranges();
|
||||
|
||||
init_gbpages();
|
||||
|
||||
/* max_pfn_mapped is updated here */
|
||||
@@ -956,6 +1032,10 @@ void __init setup_arch(char **cmdline_p)
|
||||
|
||||
reserve_initrd();
|
||||
|
||||
#if defined(CONFIG_ACPI) && defined(CONFIG_BLK_DEV_INITRD)
|
||||
acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start);
|
||||
#endif
|
||||
|
||||
reserve_crashkernel();
|
||||
|
||||
vsmp_init();
|
||||
|
@@ -22,6 +22,7 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/user-return-notifier.h>
|
||||
#include <linux/uprobes.h>
|
||||
#include <linux/context_tracking.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/ucontext.h>
|
||||
@@ -363,10 +364,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
|
||||
else
|
||||
put_user_ex(0, &frame->uc.uc_flags);
|
||||
put_user_ex(0, &frame->uc.uc_link);
|
||||
put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
|
||||
put_user_ex(sas_ss_flags(regs->sp),
|
||||
&frame->uc.uc_stack.ss_flags);
|
||||
put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
|
||||
err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
|
||||
|
||||
/* Set up to return from userspace. */
|
||||
restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
|
||||
@@ -413,7 +411,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
|
||||
struct rt_sigframe __user *frame;
|
||||
void __user *fp = NULL;
|
||||
int err = 0;
|
||||
struct task_struct *me = current;
|
||||
|
||||
frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe), &fp);
|
||||
|
||||
@@ -432,10 +429,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
|
||||
else
|
||||
put_user_ex(0, &frame->uc.uc_flags);
|
||||
put_user_ex(0, &frame->uc.uc_link);
|
||||
put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
|
||||
put_user_ex(sas_ss_flags(regs->sp),
|
||||
&frame->uc.uc_stack.ss_flags);
|
||||
put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
|
||||
err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
|
||||
|
||||
/* Set up to return from userspace. If provided, use a stub
|
||||
already in userspace. */
|
||||
@@ -502,10 +496,7 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
|
||||
else
|
||||
put_user_ex(0, &frame->uc.uc_flags);
|
||||
put_user_ex(0, &frame->uc.uc_link);
|
||||
put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
|
||||
put_user_ex(sas_ss_flags(regs->sp),
|
||||
&frame->uc.uc_stack.ss_flags);
|
||||
put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
|
||||
err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
|
||||
put_user_ex(0, &frame->uc.uc__pad0);
|
||||
|
||||
if (ka->sa.sa_flags & SA_RESTORER) {
|
||||
@@ -602,13 +593,6 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
|
||||
}
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
long
|
||||
sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
return do_sigaltstack(uss, uoss, regs->sp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Do a signal return; undo the signal stack.
|
||||
*/
|
||||
@@ -658,7 +642,7 @@ long sys_rt_sigreturn(struct pt_regs *regs)
|
||||
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
|
||||
goto badframe;
|
||||
|
||||
if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
|
||||
if (restore_altstack(&frame->uc.uc_stack))
|
||||
goto badframe;
|
||||
|
||||
return ax;
|
||||
@@ -816,7 +800,7 @@ static void do_signal(struct pt_regs *regs)
|
||||
void
|
||||
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
|
||||
{
|
||||
rcu_user_exit();
|
||||
user_exit();
|
||||
|
||||
#ifdef CONFIG_X86_MCE
|
||||
/* notify userspace of pending MCEs */
|
||||
@@ -838,7 +822,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
|
||||
if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
|
||||
fire_user_return_notifiers();
|
||||
|
||||
rcu_user_enter();
|
||||
user_enter();
|
||||
}
|
||||
|
||||
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
|
||||
@@ -864,7 +848,6 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
|
||||
struct rt_sigframe_x32 __user *frame;
|
||||
sigset_t set;
|
||||
unsigned long ax;
|
||||
struct pt_regs tregs;
|
||||
|
||||
frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
|
||||
|
||||
@@ -878,8 +861,7 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
|
||||
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
|
||||
goto badframe;
|
||||
|
||||
tregs = *regs;
|
||||
if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT)
|
||||
if (compat_restore_altstack(&frame->uc.uc_stack))
|
||||
goto badframe;
|
||||
|
||||
return ax;
|
||||
|
@@ -68,6 +68,8 @@
|
||||
#include <asm/mwait.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/io_apic.h>
|
||||
#include <asm/i387.h>
|
||||
#include <asm/fpu-internal.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/uv/uv.h>
|
||||
#include <linux/mc146818rtc.h>
|
||||
@@ -125,8 +127,8 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
|
||||
atomic_t init_deasserted;
|
||||
|
||||
/*
|
||||
* Report back to the Boot Processor.
|
||||
* Running on AP.
|
||||
* Report back to the Boot Processor during boot time or to the caller processor
|
||||
* during CPU online.
|
||||
*/
|
||||
static void __cpuinit smp_callin(void)
|
||||
{
|
||||
@@ -138,15 +140,17 @@ static void __cpuinit smp_callin(void)
|
||||
* we may get here before an INIT-deassert IPI reaches
|
||||
* our local APIC. We have to wait for the IPI or we'll
|
||||
* lock up on an APIC access.
|
||||
*
|
||||
* Since CPU0 is not wakened up by INIT, it doesn't wait for the IPI.
|
||||
*/
|
||||
if (apic->wait_for_init_deassert)
|
||||
cpuid = smp_processor_id();
|
||||
if (apic->wait_for_init_deassert && cpuid != 0)
|
||||
apic->wait_for_init_deassert(&init_deasserted);
|
||||
|
||||
/*
|
||||
* (This works even if the APIC is not enabled.)
|
||||
*/
|
||||
phys_id = read_apic_id();
|
||||
cpuid = smp_processor_id();
|
||||
if (cpumask_test_cpu(cpuid, cpu_callin_mask)) {
|
||||
panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
|
||||
phys_id, cpuid);
|
||||
@@ -228,6 +232,8 @@ static void __cpuinit smp_callin(void)
|
||||
cpumask_set_cpu(cpuid, cpu_callin_mask);
|
||||
}
|
||||
|
||||
static int cpu0_logical_apicid;
|
||||
static int enable_start_cpu0;
|
||||
/*
|
||||
* Activate a secondary processor.
|
||||
*/
|
||||
@@ -243,6 +249,8 @@ notrace static void __cpuinit start_secondary(void *unused)
|
||||
preempt_disable();
|
||||
smp_callin();
|
||||
|
||||
enable_start_cpu0 = 0;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* switch away from the initial page table */
|
||||
load_cr3(swapper_pg_dir);
|
||||
@@ -279,19 +287,30 @@ notrace static void __cpuinit start_secondary(void *unused)
|
||||
cpu_idle();
|
||||
}
|
||||
|
||||
void __init smp_store_boot_cpu_info(void)
|
||||
{
|
||||
int id = 0; /* CPU 0 */
|
||||
struct cpuinfo_x86 *c = &cpu_data(id);
|
||||
|
||||
*c = boot_cpu_data;
|
||||
c->cpu_index = id;
|
||||
}
|
||||
|
||||
/*
|
||||
* The bootstrap kernel entry code has set these up. Save them for
|
||||
* a given CPU
|
||||
*/
|
||||
|
||||
void __cpuinit smp_store_cpu_info(int id)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &cpu_data(id);
|
||||
|
||||
*c = boot_cpu_data;
|
||||
c->cpu_index = id;
|
||||
if (id != 0)
|
||||
identify_secondary_cpu(c);
|
||||
/*
|
||||
* During boot time, CPU0 has this setup already. Save the info when
|
||||
* bringing up AP or offlined CPU0.
|
||||
*/
|
||||
identify_secondary_cpu(c);
|
||||
}
|
||||
|
||||
static bool __cpuinit
|
||||
@@ -313,7 +332,7 @@ do { \
|
||||
|
||||
static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
|
||||
{
|
||||
if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
|
||||
if (cpu_has_topoext) {
|
||||
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
|
||||
|
||||
if (c->phys_proc_id == o->phys_proc_id &&
|
||||
@@ -481,7 +500,7 @@ void __inquire_remote_apic(int apicid)
|
||||
* won't ... remember to clear down the APIC, etc later.
|
||||
*/
|
||||
int __cpuinit
|
||||
wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
|
||||
wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
|
||||
{
|
||||
unsigned long send_status, accept_status = 0;
|
||||
int maxlvt;
|
||||
@@ -489,7 +508,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
|
||||
/* Target chip */
|
||||
/* Boot on the stack */
|
||||
/* Kick the second */
|
||||
apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid);
|
||||
apic_icr_write(APIC_DM_NMI | apic->dest_logical, apicid);
|
||||
|
||||
pr_debug("Waiting for send to finish...\n");
|
||||
send_status = safe_apic_wait_icr_idle();
|
||||
@@ -649,6 +668,63 @@ static void __cpuinit announce_cpu(int cpu, int apicid)
|
||||
node, cpu, apicid);
|
||||
}
|
||||
|
||||
static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
cpu = smp_processor_id();
|
||||
if (cpu == 0 && !cpu_online(cpu) && enable_start_cpu0)
|
||||
return NMI_HANDLED;
|
||||
|
||||
return NMI_DONE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wake up AP by INIT, INIT, STARTUP sequence.
|
||||
*
|
||||
* Instead of waiting for STARTUP after INITs, BSP will execute the BIOS
|
||||
* boot-strap code which is not a desired behavior for waking up BSP. To
|
||||
* void the boot-strap code, wake up CPU0 by NMI instead.
|
||||
*
|
||||
* This works to wake up soft offlined CPU0 only. If CPU0 is hard offlined
|
||||
* (i.e. physically hot removed and then hot added), NMI won't wake it up.
|
||||
* We'll change this code in the future to wake up hard offlined CPU0 if
|
||||
* real platform and request are available.
|
||||
*/
|
||||
static int __cpuinit
|
||||
wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid,
|
||||
int *cpu0_nmi_registered)
|
||||
{
|
||||
int id;
|
||||
int boot_error;
|
||||
|
||||
/*
|
||||
* Wake up AP by INIT, INIT, STARTUP sequence.
|
||||
*/
|
||||
if (cpu)
|
||||
return wakeup_secondary_cpu_via_init(apicid, start_ip);
|
||||
|
||||
/*
|
||||
* Wake up BSP by nmi.
|
||||
*
|
||||
* Register a NMI handler to help wake up CPU0.
|
||||
*/
|
||||
boot_error = register_nmi_handler(NMI_LOCAL,
|
||||
wakeup_cpu0_nmi, 0, "wake_cpu0");
|
||||
|
||||
if (!boot_error) {
|
||||
enable_start_cpu0 = 1;
|
||||
*cpu0_nmi_registered = 1;
|
||||
if (apic->dest_logical == APIC_DEST_LOGICAL)
|
||||
id = cpu0_logical_apicid;
|
||||
else
|
||||
id = apicid;
|
||||
boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip);
|
||||
}
|
||||
|
||||
return boot_error;
|
||||
}
|
||||
|
||||
/*
|
||||
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
|
||||
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
|
||||
@@ -664,6 +740,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
|
||||
|
||||
unsigned long boot_error = 0;
|
||||
int timeout;
|
||||
int cpu0_nmi_registered = 0;
|
||||
|
||||
/* Just in case we booted with a single CPU. */
|
||||
alternatives_enable_smp();
|
||||
@@ -711,13 +788,16 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
|
||||
}
|
||||
|
||||
/*
|
||||
* Kick the secondary CPU. Use the method in the APIC driver
|
||||
* if it's defined - or use an INIT boot APIC message otherwise:
|
||||
* Wake up a CPU in difference cases:
|
||||
* - Use the method in the APIC driver if it's defined
|
||||
* Otherwise,
|
||||
* - Use an INIT boot APIC message for APs or NMI for BSP.
|
||||
*/
|
||||
if (apic->wakeup_secondary_cpu)
|
||||
boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
|
||||
else
|
||||
boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
|
||||
boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
|
||||
&cpu0_nmi_registered);
|
||||
|
||||
if (!boot_error) {
|
||||
/*
|
||||
@@ -782,6 +862,13 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
|
||||
*/
|
||||
smpboot_restore_warm_reset_vector();
|
||||
}
|
||||
/*
|
||||
* Clean up the nmi handler. Do this after the callin and callout sync
|
||||
* to avoid impact of possible long unregister time.
|
||||
*/
|
||||
if (cpu0_nmi_registered)
|
||||
unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");
|
||||
|
||||
return boot_error;
|
||||
}
|
||||
|
||||
@@ -795,7 +882,7 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle)
|
||||
|
||||
pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu);
|
||||
|
||||
if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
|
||||
if (apicid == BAD_APICID ||
|
||||
!physid_isset(apicid, phys_cpu_present_map) ||
|
||||
!apic->apic_id_valid(apicid)) {
|
||||
pr_err("%s: bad cpu %d\n", __func__, cpu);
|
||||
@@ -818,6 +905,9 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle)
|
||||
|
||||
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
|
||||
|
||||
/* the FPU context is blank, nobody can own it */
|
||||
__cpu_disable_lazy_restore(cpu);
|
||||
|
||||
err = do_boot_cpu(apicid, cpu, tidle);
|
||||
if (err) {
|
||||
pr_debug("do_boot_cpu failed %d\n", err);
|
||||
@@ -990,7 +1080,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
|
||||
/*
|
||||
* Setup boot CPU information
|
||||
*/
|
||||
smp_store_cpu_info(0); /* Final full version of the data */
|
||||
smp_store_boot_cpu_info(); /* Final full version of the data */
|
||||
cpumask_copy(cpu_callin_mask, cpumask_of(0));
|
||||
mb();
|
||||
|
||||
@@ -1026,6 +1116,11 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
|
||||
*/
|
||||
setup_local_APIC();
|
||||
|
||||
if (x2apic_mode)
|
||||
cpu0_logical_apicid = apic_read(APIC_LDR);
|
||||
else
|
||||
cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
|
||||
|
||||
/*
|
||||
* Enable IO APIC before setting up error vector
|
||||
*/
|
||||
@@ -1214,19 +1309,6 @@ void cpu_disable_common(void)
|
||||
|
||||
int native_cpu_disable(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
/*
|
||||
* Perhaps use cpufreq to drop frequency, but that could go
|
||||
* into generic code.
|
||||
*
|
||||
* We won't take down the boot processor on i386 due to some
|
||||
* interrupts only being able to be serviced by the BSP.
|
||||
* Especially so if we're not using an IOAPIC -zwane
|
||||
*/
|
||||
if (cpu == 0)
|
||||
return -EBUSY;
|
||||
|
||||
clear_local_APIC();
|
||||
|
||||
cpu_disable_common();
|
||||
@@ -1266,6 +1348,14 @@ void play_dead_common(void)
|
||||
local_irq_disable();
|
||||
}
|
||||
|
||||
static bool wakeup_cpu0(void)
|
||||
{
|
||||
if (smp_processor_id() == 0 && enable_start_cpu0)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to flush the caches before going to sleep, lest we have
|
||||
* dirty data in our caches when we come back up.
|
||||
@@ -1329,6 +1419,11 @@ static inline void mwait_play_dead(void)
|
||||
__monitor(mwait_ptr, 0, 0);
|
||||
mb();
|
||||
__mwait(eax, 0);
|
||||
/*
|
||||
* If NMI wants to wake up CPU0, start CPU0.
|
||||
*/
|
||||
if (wakeup_cpu0())
|
||||
start_cpu0();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1339,6 +1434,11 @@ static inline void hlt_play_dead(void)
|
||||
|
||||
while (1) {
|
||||
native_halt();
|
||||
/*
|
||||
* If NMI wants to wake up CPU0, start CPU0.
|
||||
*/
|
||||
if (wakeup_cpu0())
|
||||
start_cpu0();
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -165,10 +165,11 @@ void set_task_blockstep(struct task_struct *task, bool on)
|
||||
* Ensure irq/preemption can't change debugctl in between.
|
||||
* Note also that both TIF_BLOCKSTEP and debugctl should
|
||||
* be changed atomically wrt preemption.
|
||||
* FIXME: this means that set/clear TIF_BLOCKSTEP is simply
|
||||
* wrong if task != current, SIGKILL can wakeup the stopped
|
||||
* tracee and set/clear can play with the running task, this
|
||||
* can confuse the next __switch_to_xtra().
|
||||
*
|
||||
* NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if
|
||||
* task is current or it can't be running, otherwise we can race
|
||||
* with __switch_to_xtra(). We rely on ptrace_freeze_traced() but
|
||||
* PTRACE_KILL is not safe.
|
||||
*/
|
||||
local_irq_disable();
|
||||
debugctl = get_debugctlmsr();
|
||||
|
@@ -21,37 +21,23 @@
|
||||
|
||||
/*
|
||||
* Align a virtual address to avoid aliasing in the I$ on AMD F15h.
|
||||
*
|
||||
* @flags denotes the allocation direction - bottomup or topdown -
|
||||
* or vDSO; see call sites below.
|
||||
*/
|
||||
unsigned long align_addr(unsigned long addr, struct file *filp,
|
||||
enum align_flags flags)
|
||||
static unsigned long get_align_mask(void)
|
||||
{
|
||||
unsigned long tmp_addr;
|
||||
|
||||
/* handle 32- and 64-bit case with a single conditional */
|
||||
if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32())))
|
||||
return addr;
|
||||
return 0;
|
||||
|
||||
if (!(current->flags & PF_RANDOMIZE))
|
||||
return addr;
|
||||
return 0;
|
||||
|
||||
if (!((flags & ALIGN_VDSO) || filp))
|
||||
return addr;
|
||||
return va_align.mask;
|
||||
}
|
||||
|
||||
tmp_addr = addr;
|
||||
|
||||
/*
|
||||
* We need an address which is <= than the original
|
||||
* one only when in topdown direction.
|
||||
*/
|
||||
if (!(flags & ALIGN_TOPDOWN))
|
||||
tmp_addr += va_align.mask;
|
||||
|
||||
tmp_addr &= ~va_align.mask;
|
||||
|
||||
return tmp_addr;
|
||||
unsigned long align_vdso_addr(unsigned long addr)
|
||||
{
|
||||
unsigned long align_mask = get_align_mask();
|
||||
return (addr + align_mask) & ~align_mask;
|
||||
}
|
||||
|
||||
static int __init control_va_addr_alignment(char *str)
|
||||
@@ -126,7 +112,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long start_addr;
|
||||
struct vm_unmapped_area_info info;
|
||||
unsigned long begin, end;
|
||||
|
||||
if (flags & MAP_FIXED)
|
||||
@@ -144,50 +130,16 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
|
||||
(!vma || addr + len <= vma->vm_start))
|
||||
return addr;
|
||||
}
|
||||
if (((flags & MAP_32BIT) || test_thread_flag(TIF_ADDR32))
|
||||
&& len <= mm->cached_hole_size) {
|
||||
mm->cached_hole_size = 0;
|
||||
mm->free_area_cache = begin;
|
||||
}
|
||||
addr = mm->free_area_cache;
|
||||
if (addr < begin)
|
||||
addr = begin;
|
||||
start_addr = addr;
|
||||
|
||||
full_search:
|
||||
|
||||
addr = align_addr(addr, filp, 0);
|
||||
|
||||
for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
|
||||
/* At this point: (!vma || addr < vma->vm_end). */
|
||||
if (end - len < addr) {
|
||||
/*
|
||||
* Start a new search - just in case we missed
|
||||
* some holes.
|
||||
*/
|
||||
if (start_addr != begin) {
|
||||
start_addr = addr = begin;
|
||||
mm->cached_hole_size = 0;
|
||||
goto full_search;
|
||||
}
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (!vma || addr + len <= vma->vm_start) {
|
||||
/*
|
||||
* Remember the place where we stopped the search:
|
||||
*/
|
||||
mm->free_area_cache = addr + len;
|
||||
return addr;
|
||||
}
|
||||
if (addr + mm->cached_hole_size < vma->vm_start)
|
||||
mm->cached_hole_size = vma->vm_start - addr;
|
||||
|
||||
addr = vma->vm_end;
|
||||
addr = align_addr(addr, filp, 0);
|
||||
}
|
||||
info.flags = 0;
|
||||
info.length = len;
|
||||
info.low_limit = begin;
|
||||
info.high_limit = end;
|
||||
info.align_mask = filp ? get_align_mask() : 0;
|
||||
info.align_offset = pgoff << PAGE_SHIFT;
|
||||
return vm_unmapped_area(&info);
|
||||
}
|
||||
|
||||
|
||||
unsigned long
|
||||
arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
|
||||
const unsigned long len, const unsigned long pgoff,
|
||||
@@ -195,7 +147,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
struct mm_struct *mm = current->mm;
|
||||
unsigned long addr = addr0, start_addr;
|
||||
unsigned long addr = addr0;
|
||||
struct vm_unmapped_area_info info;
|
||||
|
||||
/* requested length too big for entire address space */
|
||||
if (len > TASK_SIZE)
|
||||
@@ -217,51 +170,16 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
|
||||
return addr;
|
||||
}
|
||||
|
||||
/* check if free_area_cache is useful for us */
|
||||
if (len <= mm->cached_hole_size) {
|
||||
mm->cached_hole_size = 0;
|
||||
mm->free_area_cache = mm->mmap_base;
|
||||
}
|
||||
|
||||
try_again:
|
||||
/* either no address requested or can't fit in requested address hole */
|
||||
start_addr = addr = mm->free_area_cache;
|
||||
|
||||
if (addr < len)
|
||||
goto fail;
|
||||
|
||||
addr -= len;
|
||||
do {
|
||||
addr = align_addr(addr, filp, ALIGN_TOPDOWN);
|
||||
|
||||
/*
|
||||
* Lookup failure means no vma is above this address,
|
||||
* else if new region fits below vma->vm_start,
|
||||
* return with success:
|
||||
*/
|
||||
vma = find_vma(mm, addr);
|
||||
if (!vma || addr+len <= vma->vm_start)
|
||||
/* remember the address as a hint for next time */
|
||||
return mm->free_area_cache = addr;
|
||||
|
||||
/* remember the largest hole we saw so far */
|
||||
if (addr + mm->cached_hole_size < vma->vm_start)
|
||||
mm->cached_hole_size = vma->vm_start - addr;
|
||||
|
||||
/* try just below the current vma->vm_start */
|
||||
addr = vma->vm_start-len;
|
||||
} while (len < vma->vm_start);
|
||||
|
||||
fail:
|
||||
/*
|
||||
* if hint left us with no space for the requested
|
||||
* mapping then try again:
|
||||
*/
|
||||
if (start_addr != mm->mmap_base) {
|
||||
mm->free_area_cache = mm->mmap_base;
|
||||
mm->cached_hole_size = 0;
|
||||
goto try_again;
|
||||
}
|
||||
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
|
||||
info.length = len;
|
||||
info.low_limit = PAGE_SIZE;
|
||||
info.high_limit = mm->mmap_base;
|
||||
info.align_mask = filp ? get_align_mask() : 0;
|
||||
info.align_offset = pgoff << PAGE_SHIFT;
|
||||
addr = vm_unmapped_area(&info);
|
||||
if (!(addr & ~PAGE_MASK))
|
||||
return addr;
|
||||
VM_BUG_ON(addr != -ENOMEM);
|
||||
|
||||
bottomup:
|
||||
/*
|
||||
@@ -270,14 +188,5 @@ bottomup:
|
||||
* can happen with large stack limits and large mmap()
|
||||
* allocations.
|
||||
*/
|
||||
mm->cached_hole_size = ~0UL;
|
||||
mm->free_area_cache = TASK_UNMAPPED_BASE;
|
||||
addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
|
||||
/*
|
||||
* Restore the topdown base:
|
||||
*/
|
||||
mm->free_area_cache = mm->mmap_base;
|
||||
mm->cached_hole_size = ~0UL;
|
||||
|
||||
return addr;
|
||||
return arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
|
||||
}
|
||||
|
@@ -30,23 +30,110 @@
|
||||
#include <linux/mmzone.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/irq.h>
|
||||
#include <asm/cpu.h>
|
||||
|
||||
static DEFINE_PER_CPU(struct x86_cpu, cpu_devices);
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
|
||||
#ifdef CONFIG_BOOTPARAM_HOTPLUG_CPU0
|
||||
static int cpu0_hotpluggable = 1;
|
||||
#else
|
||||
static int cpu0_hotpluggable;
|
||||
static int __init enable_cpu0_hotplug(char *str)
|
||||
{
|
||||
cpu0_hotpluggable = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("cpu0_hotplug", enable_cpu0_hotplug);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DEBUG_HOTPLUG_CPU0
|
||||
/*
|
||||
* This function offlines a CPU as early as possible and allows userspace to
|
||||
* boot up without the CPU. The CPU can be onlined back by user after boot.
|
||||
*
|
||||
* This is only called for debugging CPU offline/online feature.
|
||||
*/
|
||||
int __ref _debug_hotplug_cpu(int cpu, int action)
|
||||
{
|
||||
struct device *dev = get_cpu_device(cpu);
|
||||
int ret;
|
||||
|
||||
if (!cpu_is_hotpluggable(cpu))
|
||||
return -EINVAL;
|
||||
|
||||
cpu_hotplug_driver_lock();
|
||||
|
||||
switch (action) {
|
||||
case 0:
|
||||
ret = cpu_down(cpu);
|
||||
if (!ret) {
|
||||
pr_info("CPU %u is now offline\n", cpu);
|
||||
kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
|
||||
} else
|
||||
pr_debug("Can't offline CPU%d.\n", cpu);
|
||||
break;
|
||||
case 1:
|
||||
ret = cpu_up(cpu);
|
||||
if (!ret)
|
||||
kobject_uevent(&dev->kobj, KOBJ_ONLINE);
|
||||
else
|
||||
pr_debug("Can't online CPU%d.\n", cpu);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
cpu_hotplug_driver_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __init debug_hotplug_cpu(void)
|
||||
{
|
||||
_debug_hotplug_cpu(0, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall_sync(debug_hotplug_cpu);
|
||||
#endif /* CONFIG_DEBUG_HOTPLUG_CPU0 */
|
||||
|
||||
int __ref arch_register_cpu(int num)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &cpu_data(num);
|
||||
|
||||
/*
|
||||
* CPU0 cannot be offlined due to several
|
||||
* restrictions and assumptions in kernel. This basically
|
||||
* doesn't add a control file, one cannot attempt to offline
|
||||
* BSP.
|
||||
*
|
||||
* Also certain PCI quirks require not to enable hotplug control
|
||||
* for all CPU's.
|
||||
* Currently CPU0 is only hotpluggable on Intel platforms. Other
|
||||
* vendors can add hotplug support later.
|
||||
*/
|
||||
if (num)
|
||||
if (c->x86_vendor != X86_VENDOR_INTEL)
|
||||
cpu0_hotpluggable = 0;
|
||||
|
||||
/*
|
||||
* Two known BSP/CPU0 dependencies: Resume from suspend/hibernate
|
||||
* depends on BSP. PIC interrupts depend on BSP.
|
||||
*
|
||||
* If the BSP depencies are under control, one can tell kernel to
|
||||
* enable BSP hotplug. This basically adds a control file and
|
||||
* one can attempt to offline BSP.
|
||||
*/
|
||||
if (num == 0 && cpu0_hotpluggable) {
|
||||
unsigned int irq;
|
||||
/*
|
||||
* We won't take down the boot processor on i386 if some
|
||||
* interrupts only are able to be serviced by the BSP in PIC.
|
||||
*/
|
||||
for_each_active_irq(irq) {
|
||||
if (!IO_APIC_IRQ(irq) && irq_has_action(irq)) {
|
||||
cpu0_hotpluggable = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (num || cpu0_hotpluggable)
|
||||
per_cpu(cpu_devices, num).cpu.hotpluggable = 1;
|
||||
|
||||
return register_cpu(&per_cpu(cpu_devices, num).cpu, num);
|
||||
|
21
arch/x86/kernel/trace_clock.c
Normal file
21
arch/x86/kernel/trace_clock.c
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* X86 trace clocks
|
||||
*/
|
||||
#include <asm/trace_clock.h>
|
||||
#include <asm/barrier.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
/*
|
||||
* trace_clock_x86_tsc(): A clock that is just the cycle counter.
|
||||
*
|
||||
* Unlike the other clocks, this is not in nanoseconds.
|
||||
*/
|
||||
u64 notrace trace_clock_x86_tsc(void)
|
||||
{
|
||||
u64 ret;
|
||||
|
||||
rdtsc_barrier();
|
||||
rdtscll(ret);
|
||||
|
||||
return ret;
|
||||
}
|
@@ -55,7 +55,7 @@
|
||||
#include <asm/i387.h>
|
||||
#include <asm/fpu-internal.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/rcu.h>
|
||||
#include <asm/context_tracking.h>
|
||||
|
||||
#include <asm/mach_traps.h>
|
||||
|
||||
@@ -69,9 +69,6 @@
|
||||
|
||||
asmlinkage int system_call(void);
|
||||
|
||||
/* Do we ignore FPU interrupts ? */
|
||||
char ignore_fpu_irq;
|
||||
|
||||
/*
|
||||
* The IDT has to be page-aligned to simplify the Pentium
|
||||
* F0 0F bug workaround.
|
||||
@@ -564,9 +561,6 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr)
|
||||
|
||||
dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
ignore_fpu_irq = 1;
|
||||
#endif
|
||||
exception_enter(regs);
|
||||
math_error(regs, error_code, X86_TRAP_MF);
|
||||
exception_exit(regs);
|
||||
|
@@ -77,6 +77,12 @@ unsigned long long
|
||||
sched_clock(void) __attribute__((alias("native_sched_clock")));
|
||||
#endif
|
||||
|
||||
unsigned long long native_read_tsc(void)
|
||||
{
|
||||
return __native_read_tsc();
|
||||
}
|
||||
EXPORT_SYMBOL(native_read_tsc);
|
||||
|
||||
int check_tsc_unstable(void)
|
||||
{
|
||||
return tsc_unstable;
|
||||
|
@@ -478,6 +478,11 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
|
||||
regs->ip = current->utask->xol_vaddr;
|
||||
pre_xol_rip_insn(auprobe, regs, autask);
|
||||
|
||||
autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF);
|
||||
regs->flags |= X86_EFLAGS_TF;
|
||||
if (test_tsk_thread_flag(current, TIF_BLOCKSTEP))
|
||||
set_task_blockstep(current, false);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -603,6 +608,16 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
|
||||
if (auprobe->fixups & UPROBE_FIX_CALL)
|
||||
result = adjust_ret_addr(regs->sp, correction);
|
||||
|
||||
/*
|
||||
* arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP
|
||||
* so we can get an extra SIGTRAP if we do not clear TF. We need
|
||||
* to examine the opcode to make it right.
|
||||
*/
|
||||
if (utask->autask.saved_tf)
|
||||
send_sig(SIGTRAP, current, 0);
|
||||
else if (!(auprobe->fixups & UPROBE_FIX_SETF))
|
||||
regs->flags &= ~X86_EFLAGS_TF;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -647,6 +662,10 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
|
||||
current->thread.trap_nr = utask->autask.saved_trap_nr;
|
||||
handle_riprel_post_xol(auprobe, regs, NULL);
|
||||
instruction_pointer_set(regs, utask->vaddr);
|
||||
|
||||
/* clear TF if it was set by us in arch_uprobe_pre_xol() */
|
||||
if (!utask->autask.saved_tf)
|
||||
regs->flags &= ~X86_EFLAGS_TF;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -676,38 +695,3 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
|
||||
send_sig(SIGTRAP, current, 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void arch_uprobe_enable_step(struct arch_uprobe *auprobe)
|
||||
{
|
||||
struct task_struct *task = current;
|
||||
struct arch_uprobe_task *autask = &task->utask->autask;
|
||||
struct pt_regs *regs = task_pt_regs(task);
|
||||
|
||||
autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF);
|
||||
|
||||
regs->flags |= X86_EFLAGS_TF;
|
||||
if (test_tsk_thread_flag(task, TIF_BLOCKSTEP))
|
||||
set_task_blockstep(task, false);
|
||||
}
|
||||
|
||||
void arch_uprobe_disable_step(struct arch_uprobe *auprobe)
|
||||
{
|
||||
struct task_struct *task = current;
|
||||
struct arch_uprobe_task *autask = &task->utask->autask;
|
||||
bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED);
|
||||
struct pt_regs *regs = task_pt_regs(task);
|
||||
/*
|
||||
* The state of TIF_BLOCKSTEP was not saved so we can get an extra
|
||||
* SIGTRAP if we do not clear TF. We need to examine the opcode to
|
||||
* make it right.
|
||||
*/
|
||||
if (unlikely(trapped)) {
|
||||
if (!autask->saved_tf)
|
||||
regs->flags &= ~X86_EFLAGS_TF;
|
||||
} else {
|
||||
if (autask->saved_tf)
|
||||
send_sig(SIGTRAP, task, 0);
|
||||
else if (!(auprobe->fixups & UPROBE_FIX_SETF))
|
||||
regs->flags &= ~X86_EFLAGS_TF;
|
||||
}
|
||||
}
|
||||
|
@@ -182,7 +182,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
|
||||
if (pud_none_or_clear_bad(pud))
|
||||
goto out;
|
||||
pmd = pmd_offset(pud, 0xA0000);
|
||||
split_huge_page_pmd(mm, pmd);
|
||||
split_huge_page_pmd_mm(mm, 0xA0000, pmd);
|
||||
if (pmd_none_or_clear_bad(pmd))
|
||||
goto out;
|
||||
pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl);
|
||||
|
@@ -145,19 +145,6 @@ static int addr_to_vsyscall_nr(unsigned long addr)
|
||||
return nr;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SECCOMP
|
||||
static int vsyscall_seccomp(struct task_struct *tsk, int syscall_nr)
|
||||
{
|
||||
if (!seccomp_mode(&tsk->seccomp))
|
||||
return 0;
|
||||
task_pt_regs(tsk)->orig_ax = syscall_nr;
|
||||
task_pt_regs(tsk)->ax = syscall_nr;
|
||||
return __secure_computing(syscall_nr);
|
||||
}
|
||||
#else
|
||||
#define vsyscall_seccomp(_tsk, _nr) 0
|
||||
#endif
|
||||
|
||||
static bool write_ok_or_segv(unsigned long ptr, size_t size)
|
||||
{
|
||||
/*
|
||||
@@ -190,10 +177,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
|
||||
{
|
||||
struct task_struct *tsk;
|
||||
unsigned long caller;
|
||||
int vsyscall_nr;
|
||||
int vsyscall_nr, syscall_nr, tmp;
|
||||
int prev_sig_on_uaccess_error;
|
||||
long ret;
|
||||
int skip;
|
||||
|
||||
/*
|
||||
* No point in checking CS -- the only way to get here is a user mode
|
||||
@@ -225,6 +211,64 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
|
||||
}
|
||||
|
||||
tsk = current;
|
||||
|
||||
/*
|
||||
* Check for access_ok violations and find the syscall nr.
|
||||
*
|
||||
* NULL is a valid user pointer (in the access_ok sense) on 32-bit and
|
||||
* 64-bit, so we don't need to special-case it here. For all the
|
||||
* vsyscalls, NULL means "don't write anything" not "write it at
|
||||
* address 0".
|
||||
*/
|
||||
switch (vsyscall_nr) {
|
||||
case 0:
|
||||
if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
|
||||
!write_ok_or_segv(regs->si, sizeof(struct timezone))) {
|
||||
ret = -EFAULT;
|
||||
goto check_fault;
|
||||
}
|
||||
|
||||
syscall_nr = __NR_gettimeofday;
|
||||
break;
|
||||
|
||||
case 1:
|
||||
if (!write_ok_or_segv(regs->di, sizeof(time_t))) {
|
||||
ret = -EFAULT;
|
||||
goto check_fault;
|
||||
}
|
||||
|
||||
syscall_nr = __NR_time;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
|
||||
!write_ok_or_segv(regs->si, sizeof(unsigned))) {
|
||||
ret = -EFAULT;
|
||||
goto check_fault;
|
||||
}
|
||||
|
||||
syscall_nr = __NR_getcpu;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle seccomp. regs->ip must be the original value.
|
||||
* See seccomp_send_sigsys and Documentation/prctl/seccomp_filter.txt.
|
||||
*
|
||||
* We could optimize the seccomp disabled case, but performance
|
||||
* here doesn't matter.
|
||||
*/
|
||||
regs->orig_ax = syscall_nr;
|
||||
regs->ax = -ENOSYS;
|
||||
tmp = secure_computing(syscall_nr);
|
||||
if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
|
||||
warn_bad_vsyscall(KERN_DEBUG, regs,
|
||||
"seccomp tried to change syscall nr or ip");
|
||||
do_exit(SIGSYS);
|
||||
}
|
||||
if (tmp)
|
||||
goto do_ret; /* skip requested */
|
||||
|
||||
/*
|
||||
* With a real vsyscall, page faults cause SIGSEGV. We want to
|
||||
* preserve that behavior to make writing exploits harder.
|
||||
@@ -232,49 +276,19 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
|
||||
prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
|
||||
current_thread_info()->sig_on_uaccess_error = 1;
|
||||
|
||||
/*
|
||||
* NULL is a valid user pointer (in the access_ok sense) on 32-bit and
|
||||
* 64-bit, so we don't need to special-case it here. For all the
|
||||
* vsyscalls, NULL means "don't write anything" not "write it at
|
||||
* address 0".
|
||||
*/
|
||||
ret = -EFAULT;
|
||||
skip = 0;
|
||||
switch (vsyscall_nr) {
|
||||
case 0:
|
||||
skip = vsyscall_seccomp(tsk, __NR_gettimeofday);
|
||||
if (skip)
|
||||
break;
|
||||
|
||||
if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
|
||||
!write_ok_or_segv(regs->si, sizeof(struct timezone)))
|
||||
break;
|
||||
|
||||
ret = sys_gettimeofday(
|
||||
(struct timeval __user *)regs->di,
|
||||
(struct timezone __user *)regs->si);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
skip = vsyscall_seccomp(tsk, __NR_time);
|
||||
if (skip)
|
||||
break;
|
||||
|
||||
if (!write_ok_or_segv(regs->di, sizeof(time_t)))
|
||||
break;
|
||||
|
||||
ret = sys_time((time_t __user *)regs->di);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
skip = vsyscall_seccomp(tsk, __NR_getcpu);
|
||||
if (skip)
|
||||
break;
|
||||
|
||||
if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
|
||||
!write_ok_or_segv(regs->si, sizeof(unsigned)))
|
||||
break;
|
||||
|
||||
ret = sys_getcpu((unsigned __user *)regs->di,
|
||||
(unsigned __user *)regs->si,
|
||||
NULL);
|
||||
@@ -283,12 +297,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
|
||||
|
||||
current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
|
||||
|
||||
if (skip) {
|
||||
if ((long)regs->ax <= 0L) /* seccomp errno emulation */
|
||||
goto do_ret;
|
||||
goto done; /* seccomp trace/trap */
|
||||
}
|
||||
|
||||
check_fault:
|
||||
if (ret == -EFAULT) {
|
||||
/* Bad news -- userspace fed a bad pointer to a vsyscall. */
|
||||
warn_bad_vsyscall(KERN_INFO, regs,
|
||||
@@ -311,7 +320,6 @@ do_ret:
|
||||
/* Emulate a ret instruction. */
|
||||
regs->ip = caller;
|
||||
regs->sp += 8;
|
||||
done:
|
||||
return true;
|
||||
|
||||
sigsegv:
|
||||
|
Reference in New Issue
Block a user