Merge branch 'master' into for-next

Conflicts:
	drivers/devfreq/exynos4_bus.c

Sync with Linus' tree to be able to apply patches that are
against newer code (mvneta).
This commit is contained in:
Jiri Kosina
2013-01-29 10:48:30 +01:00
11382 fájl változott, egészen pontosan 531475 új sor hozzáadva és 303353 régi sor törölve

Fájl megtekintése

@@ -9,7 +9,6 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
ifdef CONFIG_FUNCTION_TRACER
# Do not profile debug and lowlevel utilities
CFLAGS_REMOVE_tsc.o = -pg
CFLAGS_REMOVE_rtc.o = -pg
CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
CFLAGS_REMOVE_pvclock.o = -pg
CFLAGS_REMOVE_kvmclock.o = -pg
@@ -62,6 +61,7 @@ obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
obj-$(CONFIG_X86_TSC) += trace_clock.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o

Fájl megtekintése

@@ -574,6 +574,12 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
return irq;
}
EXPORT_SYMBOL_GPL(acpi_register_gsi);
void acpi_unregister_gsi(u32 gsi)
{
}
EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
void __init acpi_set_irq_model_pic(void)
{
@@ -1700,3 +1706,9 @@ int __acpi_release_global_lock(unsigned int *lock)
} while (unlikely (val != old));
return old & 0x1;
}
void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
{
e820_add_region(addr, size, E820_ACPI);
update_e820();
}

Fájl megtekintése

@@ -101,6 +101,8 @@ static int __init acpi_sleep_setup(char *str)
#endif
if (strncmp(str, "nonvs", 5) == 0)
acpi_nvs_nosave();
if (strncmp(str, "nonvs_s3", 8) == 0)
acpi_nvs_nosave_s3();
if (strncmp(str, "old_ordering", 12) == 0)
acpi_old_suspend_ordering();
str = strchr(str, ',');

Fájl megtekintése

@@ -90,21 +90,6 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
*/
DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
/*
* Knob to control our willingness to enable the local APIC.
*
* +1=force-enable
*/
static int force_enable_local_apic __initdata;
/*
* APIC command line parameters
*/
static int __init parse_lapic(char *arg)
{
force_enable_local_apic = 1;
return 0;
}
early_param("lapic", parse_lapic);
/* Local APIC was disabled by the BIOS and enabled by the kernel */
static int enabled_via_apicbase;
@@ -133,6 +118,25 @@ static inline void imcr_apic_to_pic(void)
}
#endif
/*
* Knob to control our willingness to enable the local APIC.
*
* +1=force-enable
*/
static int force_enable_local_apic __initdata;
/*
* APIC command line parameters
*/
static int __init parse_lapic(char *arg)
{
if (config_enabled(CONFIG_X86_32) && !arg)
force_enable_local_apic = 1;
else if (!strncmp(arg, "notscdeadline", 13))
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
return 0;
}
early_param("lapic", parse_lapic);
#ifdef CONFIG_X86_64
static int apic_calibrate_pmtmr __initdata;
static __init int setup_apicpmtimer(char *s)
@@ -315,6 +319,7 @@ int lapic_get_maxlvt(void)
/* Clock divisor */
#define APIC_DIVISOR 16
#define TSC_DIVISOR 32
/*
* This function sets up the local APIC timer, with a timeout of
@@ -333,6 +338,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
lvtt_value = LOCAL_TIMER_VECTOR;
if (!oneshot)
lvtt_value |= APIC_LVT_TIMER_PERIODIC;
else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE;
if (!lapic_is_integrated())
lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
@@ -341,6 +349,11 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
apic_write(APIC_LVTT, lvtt_value);
if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
return;
}
/*
* Divide PICLK by 16
*/
@@ -453,6 +466,16 @@ static int lapic_next_event(unsigned long delta,
return 0;
}
static int lapic_next_deadline(unsigned long delta,
struct clock_event_device *evt)
{
u64 tsc;
rdtscll(tsc);
wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
return 0;
}
/*
* Setup the lapic timer in periodic or oneshot mode
*/
@@ -533,7 +556,15 @@ static void __cpuinit setup_APIC_timer(void)
memcpy(levt, &lapic_clockevent, sizeof(*levt));
levt->cpumask = cpumask_of(smp_processor_id());
clockevents_register_device(levt);
if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |
CLOCK_EVT_FEAT_DUMMY);
levt->set_next_event = lapic_next_deadline;
clockevents_config_and_register(levt,
(tsc_khz / TSC_DIVISOR) * 1000,
0xF, ~0UL);
} else
clockevents_register_device(levt);
}
/*
@@ -661,7 +692,9 @@ static int __init calibrate_APIC_clock(void)
* in the clockevent structure and return.
*/
if (lapic_timer_frequency) {
if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
return 0;
} else if (lapic_timer_frequency) {
apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
lapic_timer_frequency);
lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
@@ -674,6 +707,9 @@ static int __init calibrate_APIC_clock(void)
return 0;
}
apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
"calibrating APIC timer ...\n");
local_irq_disable();
/* Replace the global interrupt handler */
@@ -811,9 +847,6 @@ void __init setup_boot_APIC_clock(void)
return;
}
apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
"calibrating APIC timer ...\n");
if (calibrate_APIC_clock()) {
/* No broadcast on UP ! */
if (num_possible_cpus() > 1)

Fájl megtekintése

@@ -22,6 +22,7 @@
#include <linux/hardirq.h>
#include <linux/delay.h>
#include <asm/numachip/numachip.h>
#include <asm/numachip/numachip_csr.h>
#include <asm/smp.h>
#include <asm/apic.h>
@@ -179,6 +180,7 @@ static int __init numachip_system_init(void)
return 0;
x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
x86_init.pci.arch_init = pci_numachip_init;
map_csrs();

Fájl megtekintése

@@ -234,11 +234,11 @@ int __init arch_early_irq_init(void)
zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
/*
* For legacy IRQ's, start with assigning irq0 to irq15 to
* IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
* IRQ0_VECTOR to IRQ15_VECTOR for all cpu's.
*/
if (i < legacy_pic->nr_legacy_irqs) {
cfg[i].vector = IRQ0_VECTOR + i;
cpumask_set_cpu(0, cfg[i].domain);
cpumask_setall(cfg[i].domain);
}
}
@@ -1141,7 +1141,8 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
* allocation for the members that are not used anymore.
*/
cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask);
cfg->move_in_progress = 1;
cfg->move_in_progress =
cpumask_intersects(cfg->old_domain, cpu_online_mask);
cpumask_and(cfg->domain, cfg->domain, tmp_mask);
break;
}
@@ -1172,8 +1173,9 @@ next:
current_vector = vector;
current_offset = offset;
if (cfg->vector) {
cfg->move_in_progress = 1;
cpumask_copy(cfg->old_domain, cfg->domain);
cfg->move_in_progress =
cpumask_intersects(cfg->old_domain, cpu_online_mask);
}
for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
per_cpu(vector_irq, new_cpu)[vector] = irq;
@@ -1241,12 +1243,6 @@ void __setup_vector_irq(int cpu)
cfg = irq_get_chip_data(irq);
if (!cfg)
continue;
/*
* If it is a legacy IRQ handled by the legacy PIC, this cpu
* will be part of the irq_cfg's domain.
*/
if (irq < legacy_pic->nr_legacy_irqs && !IO_APIC_IRQ(irq))
cpumask_set_cpu(cpu, cfg->domain);
if (!cpumask_test_cpu(cpu, cfg->domain))
continue;
@@ -1356,16 +1352,6 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
if (!IO_APIC_IRQ(irq))
return;
/*
* For legacy irqs, cfg->domain starts with cpu 0. Now that IO-APIC
* can handle this irq and the apic driver is finialized at this point,
* update the cfg->domain.
*/
if (irq < legacy_pic->nr_legacy_irqs &&
cpumask_equal(cfg->domain, cpumask_of(0)))
apic->vector_allocation_domain(0, cfg->domain,
apic->target_cpus());
if (assign_irq_vector(irq, cfg, apic->target_cpus()))
return;
@@ -2199,9 +2185,11 @@ static int ioapic_retrigger_irq(struct irq_data *data)
{
struct irq_cfg *cfg = data->chip_data;
unsigned long flags;
int cpu;
raw_spin_lock_irqsave(&vector_lock, flags);
apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
cpu = cpumask_first_and(cfg->domain, cpu_online_mask);
apic->send_IPI_mask(cpumask_of(cpu), cfg->vector);
raw_spin_unlock_irqrestore(&vector_lock, flags);
return 1;
@@ -3317,8 +3305,9 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
int ret;
if (irq_remapping_enabled) {
if (!setup_hpet_msi_remapped(irq, id))
return -1;
ret = setup_hpet_msi_remapped(irq, id);
if (ret)
return ret;
}
ret = msi_compose_msg(NULL, irq, &msg, id);

Fájl megtekintése

@@ -304,7 +304,7 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
int cpu = smp_processor_id();
/* get information required for multi-node processors */
if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
if (cpu_has_topoext) {
u32 eax, ebx, ecx, edx;
cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
@@ -631,6 +631,20 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
}
}
/*
* The way access filter has a performance penalty on some workloads.
* Disable it on the affected CPUs.
*/
if ((c->x86 == 0x15) &&
(c->x86_model >= 0x02) && (c->x86_model < 0x20)) {
u64 val;
if (!rdmsrl_safe(0xc0011021, &val) && !(val & 0x1E)) {
val |= 0x1E;
wrmsrl_safe(0xc0011021, val);
}
}
cpu_detect_cache_sizes(c);
/* Multi core CPU? */
@@ -643,12 +657,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
detect_ht(c);
#endif
if (c->extended_cpuid_level >= 0x80000006) {
if (cpuid_edx(0x80000006) & 0xf000)
num_cache_leaves = 4;
else
num_cache_leaves = 3;
}
init_amd_cacheinfo(c);
if (c->x86 >= 0xf)
set_cpu_cap(c, X86_FEATURE_K8);
@@ -739,9 +748,6 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
{
if (!cpu_has_invlpg)
return;
tlb_flushall_shift = 5;
if (c->x86 <= 0x11)

Fájl megtekintése

@@ -106,54 +106,18 @@ static void __init check_hlt(void)
pr_cont("OK\n");
}
/*
* Most 386 processors have a bug where a POPAD can lock the
* machine even from user space.
*/
static void __init check_popad(void)
{
#ifndef CONFIG_X86_POPAD_OK
int res, inp = (int) &res;
pr_info("Checking for popad bug... ");
__asm__ __volatile__(
"movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
: "=&a" (res)
: "d" (inp)
: "ecx", "edi");
/*
* If this fails, it means that any user program may lock the
* CPU hard. Too bad.
*/
if (res != 12345678)
pr_cont("Buggy\n");
else
pr_cont("OK\n");
#endif
}
/*
* Check whether we are able to run this kernel safely on SMP.
*
* - In order to run on a i386, we need to be compiled for i386
* (for due to lack of "invlpg" and working WP on a i386)
* - i386 is no longer supported.
* - In order to run on anything without a TSC, we need to be
* compiled for a i486.
*/
static void __init check_config(void)
{
/*
* We'd better not be a i386 if we're configured to use some
* i486+ only features! (WP works in supervisor mode and the
* new "invlpg" and "bswap" instructions)
*/
#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || \
defined(CONFIG_X86_BSWAP)
if (boot_cpu_data.x86 == 3)
if (boot_cpu_data.x86 < 4)
panic("Kernel requires i486+ for 'invlpg' and other features");
#endif
}
@@ -166,7 +130,6 @@ void __init check_bugs(void)
#endif
check_config();
check_hlt();
check_popad();
init_utsname()->machine[1] =
'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
alternative_instructions();

Fájl megtekintése

@@ -1173,15 +1173,6 @@ DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif
/* Make sure %fs and %gs are initialized properly in idle threads */
struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
{
memset(regs, 0, sizeof(struct pt_regs));
regs->fs = __KERNEL_PERCPU;
regs->gs = __KERNEL_STACK_CANARY;
return regs;
}
#endif /* CONFIG_X86_64 */
/*
@@ -1237,7 +1228,7 @@ void __cpuinit cpu_init(void)
oist = &per_cpu(orig_ist, cpu);
#ifdef CONFIG_NUMA
if (cpu != 0 && this_cpu_read(numa_node) == 0 &&
if (this_cpu_read(numa_node) == 0 &&
early_cpu_to_node(cpu) != NUMA_NO_NODE)
set_numa_node(early_cpu_to_node(cpu));
#endif
@@ -1269,8 +1260,7 @@ void __cpuinit cpu_init(void)
barrier();
x86_configure_nx();
if (cpu != 0)
enable_x2apic();
enable_x2apic();
/*
* set up and load the per-CPU TSS

Fájl megtekintése

@@ -612,10 +612,6 @@ static void __cpuinit intel_tlb_lookup(const unsigned char desc)
static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
{
if (!cpu_has_invlpg) {
tlb_flushall_shift = -1;
return;
}
switch ((c->x86 << 8) + c->x86_model) {
case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */

Fájl megtekintése

@@ -538,7 +538,11 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
unsigned edx;
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
amd_cpuid4(index, &eax, &ebx, &ecx);
if (cpu_has_topoext)
cpuid_count(0x8000001d, index, &eax.full,
&ebx.full, &ecx.full, &edx);
else
amd_cpuid4(index, &eax, &ebx, &ecx);
amd_init_l3_cache(this_leaf, index);
} else {
cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
@@ -557,21 +561,39 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
return 0;
}
static int __cpuinit find_num_cache_leaves(void)
static int __cpuinit find_num_cache_leaves(struct cpuinfo_x86 *c)
{
unsigned int eax, ebx, ecx, edx;
unsigned int eax, ebx, ecx, edx, op;
union _cpuid4_leaf_eax cache_eax;
int i = -1;
if (c->x86_vendor == X86_VENDOR_AMD)
op = 0x8000001d;
else
op = 4;
do {
++i;
/* Do cpuid(4) loop to find out num_cache_leaves */
cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
/* Do cpuid(op) loop to find out num_cache_leaves */
cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
cache_eax.full = eax;
} while (cache_eax.split.type != CACHE_TYPE_NULL);
return i;
}
void __cpuinit init_amd_cacheinfo(struct cpuinfo_x86 *c)
{
if (cpu_has_topoext) {
num_cache_leaves = find_num_cache_leaves(c);
} else if (c->extended_cpuid_level >= 0x80000006) {
if (cpuid_edx(0x80000006) & 0xf000)
num_cache_leaves = 4;
else
num_cache_leaves = 3;
}
}
unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
{
/* Cache sizes */
@@ -588,7 +610,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
if (is_initialized == 0) {
/* Init num_cache_leaves from boot CPU */
num_cache_leaves = find_num_cache_leaves();
num_cache_leaves = find_num_cache_leaves(c);
is_initialized++;
}
@@ -728,12 +750,36 @@ static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
{
struct _cpuid4_info *this_leaf;
int ret, i, sibling;
struct cpuinfo_x86 *c = &cpu_data(cpu);
int i, sibling;
ret = 0;
if (index == 3) {
ret = 1;
if (cpu_has_topoext) {
unsigned int apicid, nshared, first, last;
if (!per_cpu(ici_cpuid4_info, cpu))
return 0;
this_leaf = CPUID4_INFO_IDX(cpu, index);
nshared = this_leaf->base.eax.split.num_threads_sharing + 1;
apicid = cpu_data(cpu).apicid;
first = apicid - (apicid % nshared);
last = first + nshared - 1;
for_each_online_cpu(i) {
apicid = cpu_data(i).apicid;
if ((apicid < first) || (apicid > last))
continue;
if (!per_cpu(ici_cpuid4_info, i))
continue;
this_leaf = CPUID4_INFO_IDX(i, index);
for_each_online_cpu(sibling) {
apicid = cpu_data(sibling).apicid;
if ((apicid < first) || (apicid > last))
continue;
set_bit(sibling, this_leaf->shared_cpu_map);
}
}
} else if (index == 3) {
for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
if (!per_cpu(ici_cpuid4_info, i))
continue;
@@ -744,21 +790,10 @@ static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
set_bit(sibling, this_leaf->shared_cpu_map);
}
}
} else if ((c->x86 == 0x15) && ((index == 1) || (index == 2))) {
ret = 1;
for_each_cpu(i, cpu_sibling_mask(cpu)) {
if (!per_cpu(ici_cpuid4_info, i))
continue;
this_leaf = CPUID4_INFO_IDX(i, index);
for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
if (!cpu_online(sibling))
continue;
set_bit(sibling, this_leaf->shared_cpu_map);
}
}
}
} else
return 0;
return ret;
return 1;
}
static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)

Fájl megtekintése

@@ -24,8 +24,6 @@ struct mce_bank {
int mce_severity(struct mce *a, int tolerant, char **msg);
struct dentry *mce_get_debugfs_dir(void);
extern int mce_ser;
extern struct mce_bank *mce_banks;
#ifdef CONFIG_X86_MCE_INTEL

Fájl megtekintése

@@ -193,9 +193,9 @@ int mce_severity(struct mce *m, int tolerant, char **msg)
continue;
if ((m->mcgstatus & s->mcgmask) != s->mcgres)
continue;
if (s->ser == SER_REQUIRED && !mce_ser)
if (s->ser == SER_REQUIRED && !mca_cfg.ser)
continue;
if (s->ser == NO_SER && mce_ser)
if (s->ser == NO_SER && mca_cfg.ser)
continue;
if (s->context && ctx != s->context)
continue;

Fájl megtekintése

@@ -58,34 +58,26 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
#define CREATE_TRACE_POINTS
#include <trace/events/mce.h>
int mce_disabled __read_mostly;
#define SPINUNIT 100 /* 100ns */
atomic_t mce_entry;
DEFINE_PER_CPU(unsigned, mce_exception_count);
/*
* Tolerant levels:
* 0: always panic on uncorrected errors, log corrected errors
* 1: panic or SIGBUS on uncorrected errors, log corrected errors
* 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
* 3: never panic or SIGBUS, log all errors (for testing only)
*/
static int tolerant __read_mostly = 1;
static int banks __read_mostly;
static int rip_msr __read_mostly;
static int mce_bootlog __read_mostly = -1;
static int monarch_timeout __read_mostly = -1;
static int mce_panic_timeout __read_mostly;
static int mce_dont_log_ce __read_mostly;
int mce_cmci_disabled __read_mostly;
int mce_ignore_ce __read_mostly;
int mce_ser __read_mostly;
int mce_bios_cmci_threshold __read_mostly;
struct mce_bank *mce_banks __read_mostly;
struct mce_bank *mce_banks __read_mostly;
struct mca_config mca_cfg __read_mostly = {
.bootlog = -1,
/*
* Tolerant levels:
* 0: always panic on uncorrected errors, log corrected errors
* 1: panic or SIGBUS on uncorrected errors, log corrected errors
* 2: SIGBUS or log uncorrected errors (if possible), log corr. errors
* 3: never panic or SIGBUS, log all errors (for testing only)
*/
.tolerant = 1,
.monarch_timeout = -1
};
/* User mode helper program triggered by machine check event */
static unsigned long mce_need_notify;
@@ -302,7 +294,7 @@ static void wait_for_panic(void)
while (timeout-- > 0)
udelay(1);
if (panic_timeout == 0)
panic_timeout = mce_panic_timeout;
panic_timeout = mca_cfg.panic_timeout;
panic("Panicing machine check CPU died");
}
@@ -360,7 +352,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
pr_emerg(HW_ERR "Machine check: %s\n", exp);
if (!fake_panic) {
if (panic_timeout == 0)
panic_timeout = mce_panic_timeout;
panic_timeout = mca_cfg.panic_timeout;
panic(msg);
} else
pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
@@ -372,7 +364,7 @@ static int msr_to_offset(u32 msr)
{
unsigned bank = __this_cpu_read(injectm.bank);
if (msr == rip_msr)
if (msr == mca_cfg.rip_msr)
return offsetof(struct mce, ip);
if (msr == MSR_IA32_MCx_STATUS(bank))
return offsetof(struct mce, status);
@@ -451,8 +443,8 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
m->cs |= 3;
}
/* Use accurate RIP reporting if available. */
if (rip_msr)
m->ip = mce_rdmsrl(rip_msr);
if (mca_cfg.rip_msr)
m->ip = mce_rdmsrl(mca_cfg.rip_msr);
}
}
@@ -513,7 +505,7 @@ static int mce_ring_add(unsigned long pfn)
int mce_available(struct cpuinfo_x86 *c)
{
if (mce_disabled)
if (mca_cfg.disabled)
return 0;
return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
}
@@ -565,7 +557,7 @@ static void mce_read_aux(struct mce *m, int i)
/*
* Mask the reported address by the reported granularity.
*/
if (mce_ser && (m->status & MCI_STATUS_MISCV)) {
if (mca_cfg.ser && (m->status & MCI_STATUS_MISCV)) {
u8 shift = MCI_MISC_ADDR_LSB(m->misc);
m->addr >>= shift;
m->addr <<= shift;
@@ -599,7 +591,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
mce_gather_info(&m, NULL);
for (i = 0; i < banks; i++) {
for (i = 0; i < mca_cfg.banks; i++) {
if (!mce_banks[i].ctl || !test_bit(i, *b))
continue;
@@ -620,7 +612,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
* TBD do the same check for MCI_STATUS_EN here?
*/
if (!(flags & MCP_UC) &&
(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)))
(m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC)))
continue;
mce_read_aux(&m, i);
@@ -631,7 +623,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
*/
if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce)
if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce)
mce_log(&m);
/*
@@ -658,14 +650,14 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
{
int i, ret = 0;
for (i = 0; i < banks; i++) {
for (i = 0; i < mca_cfg.banks; i++) {
m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
if (m->status & MCI_STATUS_VAL) {
__set_bit(i, validp);
if (quirk_no_way_out)
quirk_no_way_out(i, m, regs);
}
if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
if (mce_severity(m, mca_cfg.tolerant, msg) >= MCE_PANIC_SEVERITY)
ret = 1;
}
return ret;
@@ -696,11 +688,11 @@ static int mce_timed_out(u64 *t)
rmb();
if (atomic_read(&mce_paniced))
wait_for_panic();
if (!monarch_timeout)
if (!mca_cfg.monarch_timeout)
goto out;
if ((s64)*t < SPINUNIT) {
/* CHECKME: Make panic default for 1 too? */
if (tolerant < 1)
if (mca_cfg.tolerant < 1)
mce_panic("Timeout synchronizing machine check over CPUs",
NULL, NULL);
cpu_missing = 1;
@@ -750,7 +742,8 @@ static void mce_reign(void)
* Grade the severity of the errors of all the CPUs.
*/
for_each_possible_cpu(cpu) {
int severity = mce_severity(&per_cpu(mces_seen, cpu), tolerant,
int severity = mce_severity(&per_cpu(mces_seen, cpu),
mca_cfg.tolerant,
&nmsg);
if (severity > global_worst) {
msg = nmsg;
@@ -764,7 +757,7 @@ static void mce_reign(void)
* This dumps all the mces in the log buffer and stops the
* other CPUs.
*/
if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3)
if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
mce_panic("Fatal Machine check", m, msg);
/*
@@ -777,7 +770,7 @@ static void mce_reign(void)
* No machine check event found. Must be some external
* source or one CPU is hung. Panic.
*/
if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3)
if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3)
mce_panic("Machine check from unknown source", NULL, NULL);
/*
@@ -801,7 +794,7 @@ static int mce_start(int *no_way_out)
{
int order;
int cpus = num_online_cpus();
u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
if (!timeout)
return -1;
@@ -865,7 +858,7 @@ static int mce_start(int *no_way_out)
static int mce_end(int order)
{
int ret = -1;
u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
if (!timeout)
goto reset;
@@ -946,7 +939,7 @@ static void mce_clear_state(unsigned long *toclear)
{
int i;
for (i = 0; i < banks; i++) {
for (i = 0; i < mca_cfg.banks; i++) {
if (test_bit(i, toclear))
mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
}
@@ -1011,6 +1004,7 @@ static void mce_clear_info(struct mce_info *mi)
*/
void do_machine_check(struct pt_regs *regs, long error_code)
{
struct mca_config *cfg = &mca_cfg;
struct mce m, *final;
int i;
int worst = 0;
@@ -1022,7 +1016,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
int order;
/*
* If no_way_out gets set, there is no safe way to recover from this
* MCE. If tolerant is cranked up, we'll try anyway.
* MCE. If mca_cfg.tolerant is cranked up, we'll try anyway.
*/
int no_way_out = 0;
/*
@@ -1038,7 +1032,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
this_cpu_inc(mce_exception_count);
if (!banks)
if (!cfg->banks)
goto out;
mce_gather_info(&m, regs);
@@ -1065,7 +1059,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* because the first one to see it will clear it.
*/
order = mce_start(&no_way_out);
for (i = 0; i < banks; i++) {
for (i = 0; i < cfg->banks; i++) {
__clear_bit(i, toclear);
if (!test_bit(i, valid_banks))
continue;
@@ -1084,7 +1078,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* Non uncorrected or non signaled errors are handled by
* machine_check_poll. Leave them alone, unless this panics.
*/
if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
if (!(m.status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
!no_way_out)
continue;
@@ -1093,7 +1087,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
*/
add_taint(TAINT_MACHINE_CHECK);
severity = mce_severity(&m, tolerant, NULL);
severity = mce_severity(&m, cfg->tolerant, NULL);
/*
* When machine check was for corrected handler don't touch,
@@ -1117,7 +1111,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* When the ring overflows we just ignore the AO error.
* RED-PEN add some logging mechanism when
* usable_address or mce_add_ring fails.
* RED-PEN don't ignore overflow for tolerant == 0
* RED-PEN don't ignore overflow for mca_cfg.tolerant == 0
*/
if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
mce_ring_add(m.addr >> PAGE_SHIFT);
@@ -1149,7 +1143,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* issues we try to recover, or limit damage to the current
* process.
*/
if (tolerant < 3) {
if (cfg->tolerant < 3) {
if (no_way_out)
mce_panic("Fatal machine check on current CPU", &m, msg);
if (worst == MCE_AR_SEVERITY) {
@@ -1377,11 +1371,13 @@ EXPORT_SYMBOL_GPL(mce_notify_irq);
static int __cpuinit __mcheck_cpu_mce_banks_init(void)
{
int i;
u8 num_banks = mca_cfg.banks;
mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL);
mce_banks = kzalloc(num_banks * sizeof(struct mce_bank), GFP_KERNEL);
if (!mce_banks)
return -ENOMEM;
for (i = 0; i < banks; i++) {
for (i = 0; i < num_banks; i++) {
struct mce_bank *b = &mce_banks[i];
b->ctl = -1ULL;
@@ -1401,7 +1397,7 @@ static int __cpuinit __mcheck_cpu_cap_init(void)
rdmsrl(MSR_IA32_MCG_CAP, cap);
b = cap & MCG_BANKCNT_MASK;
if (!banks)
if (!mca_cfg.banks)
pr_info("CPU supports %d MCE banks\n", b);
if (b > MAX_NR_BANKS) {
@@ -1411,8 +1407,9 @@ static int __cpuinit __mcheck_cpu_cap_init(void)
}
/* Don't support asymmetric configurations today */
WARN_ON(banks != 0 && b != banks);
banks = b;
WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks);
mca_cfg.banks = b;
if (!mce_banks) {
int err = __mcheck_cpu_mce_banks_init();
@@ -1422,25 +1419,29 @@ static int __cpuinit __mcheck_cpu_cap_init(void)
/* Use accurate RIP reporting if available. */
if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
rip_msr = MSR_IA32_MCG_EIP;
mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
if (cap & MCG_SER_P)
mce_ser = 1;
mca_cfg.ser = true;
return 0;
}
static void __mcheck_cpu_init_generic(void)
{
enum mcp_flags m_fl = 0;
mce_banks_t all_banks;
u64 cap;
int i;
if (!mca_cfg.bootlog)
m_fl = MCP_DONTLOG;
/*
* Log the machine checks left over from the previous reset.
*/
bitmap_fill(all_banks, MAX_NR_BANKS);
machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks);
machine_check_poll(MCP_UC | m_fl, &all_banks);
set_in_cr4(X86_CR4_MCE);
@@ -1448,7 +1449,7 @@ static void __mcheck_cpu_init_generic(void)
if (cap & MCG_CTL_P)
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
for (i = 0; i < banks; i++) {
for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
if (!b->init)
@@ -1489,6 +1490,8 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
/* Add per CPU specific workarounds here */
static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
struct mca_config *cfg = &mca_cfg;
if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
pr_info("unknown CPU type - not enabling MCE support\n");
return -EOPNOTSUPP;
@@ -1496,7 +1499,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
/* This should be disabled by the BIOS, but isn't always */
if (c->x86_vendor == X86_VENDOR_AMD) {
if (c->x86 == 15 && banks > 4) {
if (c->x86 == 15 && cfg->banks > 4) {
/*
* disable GART TBL walk error reporting, which
* trips off incorrectly with the IOMMU & 3ware
@@ -1504,18 +1507,18 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
*/
clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
}
if (c->x86 <= 17 && mce_bootlog < 0) {
if (c->x86 <= 17 && cfg->bootlog < 0) {
/*
* Lots of broken BIOS around that don't clear them
* by default and leave crap in there. Don't log:
*/
mce_bootlog = 0;
cfg->bootlog = 0;
}
/*
* Various K7s with broken bank 0 around. Always disable
* by default.
*/
if (c->x86 == 6 && banks > 0)
if (c->x86 == 6 && cfg->banks > 0)
mce_banks[0].ctl = 0;
/*
@@ -1566,7 +1569,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
* valid event later, merely don't write CTL0.
*/
if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0)
if (c->x86 == 6 && c->x86_model < 0x1A && cfg->banks > 0)
mce_banks[0].init = 0;
/*
@@ -1574,23 +1577,23 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
* synchronization with a one second timeout.
*/
if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
monarch_timeout < 0)
monarch_timeout = USEC_PER_SEC;
cfg->monarch_timeout < 0)
cfg->monarch_timeout = USEC_PER_SEC;
/*
* There are also broken BIOSes on some Pentium M and
* earlier systems:
*/
if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0)
mce_bootlog = 0;
if (c->x86 == 6 && c->x86_model <= 13 && cfg->bootlog < 0)
cfg->bootlog = 0;
if (c->x86 == 6 && c->x86_model == 45)
quirk_no_way_out = quirk_sandybridge_ifu;
}
if (monarch_timeout < 0)
monarch_timeout = 0;
if (mce_bootlog != 0)
mce_panic_timeout = 30;
if (cfg->monarch_timeout < 0)
cfg->monarch_timeout = 0;
if (cfg->bootlog != 0)
cfg->panic_timeout = 30;
return 0;
}
@@ -1635,7 +1638,7 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t)
__this_cpu_write(mce_next_interval, iv);
if (mce_ignore_ce || !iv)
if (mca_cfg.ignore_ce || !iv)
return;
t->expires = round_jiffies(jiffies + iv);
@@ -1668,7 +1671,7 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) =
*/
void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
{
if (mce_disabled)
if (mca_cfg.disabled)
return;
if (__mcheck_cpu_ancient_init(c))
@@ -1678,7 +1681,7 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
return;
if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
mce_disabled = 1;
mca_cfg.disabled = true;
return;
}
@@ -1951,6 +1954,8 @@ static struct miscdevice mce_chrdev_device = {
*/
static int __init mcheck_enable(char *str)
{
struct mca_config *cfg = &mca_cfg;
if (*str == 0) {
enable_p5_mce();
return 1;
@@ -1958,22 +1963,22 @@ static int __init mcheck_enable(char *str)
if (*str == '=')
str++;
if (!strcmp(str, "off"))
mce_disabled = 1;
cfg->disabled = true;
else if (!strcmp(str, "no_cmci"))
mce_cmci_disabled = 1;
cfg->cmci_disabled = true;
else if (!strcmp(str, "dont_log_ce"))
mce_dont_log_ce = 1;
cfg->dont_log_ce = true;
else if (!strcmp(str, "ignore_ce"))
mce_ignore_ce = 1;
cfg->ignore_ce = true;
else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
mce_bootlog = (str[0] == 'b');
cfg->bootlog = (str[0] == 'b');
else if (!strcmp(str, "bios_cmci_threshold"))
mce_bios_cmci_threshold = 1;
cfg->bios_cmci_threshold = true;
else if (isdigit(str[0])) {
get_option(&str, &tolerant);
get_option(&str, &(cfg->tolerant));
if (*str == ',') {
++str;
get_option(&str, &monarch_timeout);
get_option(&str, &(cfg->monarch_timeout));
}
} else {
pr_info("mce argument %s ignored. Please use /sys\n", str);
@@ -2002,7 +2007,7 @@ static int mce_disable_error_reporting(void)
{
int i;
for (i = 0; i < banks; i++) {
for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
if (b->init)
@@ -2142,15 +2147,15 @@ static ssize_t set_ignore_ce(struct device *s,
if (strict_strtoull(buf, 0, &new) < 0)
return -EINVAL;
if (mce_ignore_ce ^ !!new) {
if (mca_cfg.ignore_ce ^ !!new) {
if (new) {
/* disable ce features */
mce_timer_delete_all();
on_each_cpu(mce_disable_cmci, NULL, 1);
mce_ignore_ce = 1;
mca_cfg.ignore_ce = true;
} else {
/* enable ce features */
mce_ignore_ce = 0;
mca_cfg.ignore_ce = false;
on_each_cpu(mce_enable_ce, (void *)1, 1);
}
}
@@ -2166,14 +2171,14 @@ static ssize_t set_cmci_disabled(struct device *s,
if (strict_strtoull(buf, 0, &new) < 0)
return -EINVAL;
if (mce_cmci_disabled ^ !!new) {
if (mca_cfg.cmci_disabled ^ !!new) {
if (new) {
/* disable cmci */
on_each_cpu(mce_disable_cmci, NULL, 1);
mce_cmci_disabled = 1;
mca_cfg.cmci_disabled = true;
} else {
/* enable cmci */
mce_cmci_disabled = 0;
mca_cfg.cmci_disabled = false;
on_each_cpu(mce_enable_ce, NULL, 1);
}
}
@@ -2190,9 +2195,9 @@ static ssize_t store_int_with_restart(struct device *s,
}
static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger);
static DEVICE_INT_ATTR(tolerant, 0644, tolerant);
static DEVICE_INT_ATTR(monarch_timeout, 0644, monarch_timeout);
static DEVICE_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce);
static DEVICE_INT_ATTR(tolerant, 0644, mca_cfg.tolerant);
static DEVICE_INT_ATTR(monarch_timeout, 0644, mca_cfg.monarch_timeout);
static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce);
static struct dev_ext_attribute dev_attr_check_interval = {
__ATTR(check_interval, 0644, device_show_int, store_int_with_restart),
@@ -2200,13 +2205,13 @@ static struct dev_ext_attribute dev_attr_check_interval = {
};
static struct dev_ext_attribute dev_attr_ignore_ce = {
__ATTR(ignore_ce, 0644, device_show_int, set_ignore_ce),
&mce_ignore_ce
__ATTR(ignore_ce, 0644, device_show_bool, set_ignore_ce),
&mca_cfg.ignore_ce
};
static struct dev_ext_attribute dev_attr_cmci_disabled = {
__ATTR(cmci_disabled, 0644, device_show_int, set_cmci_disabled),
&mce_cmci_disabled
__ATTR(cmci_disabled, 0644, device_show_bool, set_cmci_disabled),
&mca_cfg.cmci_disabled
};
static struct device_attribute *mce_device_attrs[] = {
@@ -2253,7 +2258,7 @@ static __cpuinit int mce_device_create(unsigned int cpu)
if (err)
goto error;
}
for (j = 0; j < banks; j++) {
for (j = 0; j < mca_cfg.banks; j++) {
err = device_create_file(dev, &mce_banks[j].attr);
if (err)
goto error2;
@@ -2285,7 +2290,7 @@ static __cpuinit void mce_device_remove(unsigned int cpu)
for (i = 0; mce_device_attrs[i]; i++)
device_remove_file(dev, mce_device_attrs[i]);
for (i = 0; i < banks; i++)
for (i = 0; i < mca_cfg.banks; i++)
device_remove_file(dev, &mce_banks[i].attr);
device_unregister(dev);
@@ -2304,7 +2309,7 @@ static void __cpuinit mce_disable_cpu(void *h)
if (!(action & CPU_TASKS_FROZEN))
cmci_clear();
for (i = 0; i < banks; i++) {
for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
if (b->init)
@@ -2322,7 +2327,7 @@ static void __cpuinit mce_reenable_cpu(void *h)
if (!(action & CPU_TASKS_FROZEN))
cmci_reenable();
for (i = 0; i < banks; i++) {
for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
if (b->init)
@@ -2375,7 +2380,7 @@ static __init void mce_init_banks(void)
{
int i;
for (i = 0; i < banks; i++) {
for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
struct device_attribute *a = &b->attr;
@@ -2426,7 +2431,7 @@ device_initcall_sync(mcheck_init_device);
*/
static int __init mcheck_disable(char *str)
{
mce_disabled = 1;
mca_cfg.disabled = true;
return 1;
}
__setup("nomce", mcheck_disable);

Fájl megtekintése

@@ -6,7 +6,7 @@
*
* Written by Jacob Shin - AMD, Inc.
*
* Support: borislav.petkov@amd.com
* Maintained by: Borislav Petkov <bp@alien8.de>
*
* April 2006
* - added support for AMD Family 0x10 processors

Fájl megtekintése

@@ -53,7 +53,7 @@ static int cmci_supported(int *banks)
{
u64 cap;
if (mce_cmci_disabled || mce_ignore_ce)
if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
return 0;
/*
@@ -200,7 +200,7 @@ static void cmci_discover(int banks)
continue;
}
if (!mce_bios_cmci_threshold) {
if (!mca_cfg.bios_cmci_threshold) {
val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
val |= CMCI_THRESHOLD;
} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
@@ -227,7 +227,7 @@ static void cmci_discover(int banks)
* set the thresholds properly or does not work with
* this boot option. Note down now and report later.
*/
if (mce_bios_cmci_threshold && bios_zero_thresh &&
if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
bios_wrong_thresh = 1;
} else {
@@ -235,7 +235,7 @@ static void cmci_discover(int banks)
}
}
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
if (mce_bios_cmci_threshold && bios_wrong_thresh) {
if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
pr_info_once(
"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
pr_info_once(
@@ -285,34 +285,39 @@ void cmci_clear(void)
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
static long cmci_rediscover_work_func(void *arg)
{
int banks;
/* Recheck banks in case CPUs don't all have the same */
if (cmci_supported(&banks))
cmci_discover(banks);
return 0;
}
/*
* After a CPU went down cycle through all the others and rediscover
* Must run in process context.
*/
void cmci_rediscover(int dying)
{
int banks;
int cpu;
cpumask_var_t old;
int cpu, banks;
if (!cmci_supported(&banks))
return;
if (!alloc_cpumask_var(&old, GFP_KERNEL))
return;
cpumask_copy(old, &current->cpus_allowed);
for_each_online_cpu(cpu) {
if (cpu == dying)
continue;
if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
continue;
/* Recheck banks in case CPUs don't all have the same */
if (cmci_supported(&banks))
cmci_discover(banks);
}
set_cpus_allowed_ptr(current, old);
free_cpumask_var(old);
if (cpu == smp_processor_id()) {
cmci_rediscover_work_func(NULL);
continue;
}
work_on_cpu(cpu, cmci_rediscover_work_func, NULL);
}
}
/*

Fájl megtekintése

@@ -695,11 +695,16 @@ void mtrr_ap_init(void)
}
/**
* Save current fixed-range MTRR state of the BSP
* Save current fixed-range MTRR state of the first cpu in cpu_online_mask.
*/
void mtrr_save_state(void)
{
smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
int first_cpu;
get_online_cpus();
first_cpu = cpumask_first(cpu_online_mask);
smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
put_online_cpus();
}
void set_mtrr_aps_delayed_init(void)

Fájl megtekintése

@@ -340,9 +340,6 @@ int x86_setup_perfctr(struct perf_event *event)
/* BTS is currently only allowed for user-mode. */
if (!attr->exclude_kernel)
return -EOPNOTSUPP;
if (!attr->exclude_guest)
return -EOPNOTSUPP;
}
hwc->config |= config;
@@ -385,9 +382,6 @@ int x86_pmu_hw_config(struct perf_event *event)
if (event->attr.precise_ip) {
int precise = 0;
if (!event->attr.exclude_guest)
return -EOPNOTSUPP;
/* Support for constant skid */
if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
precise++;
@@ -1316,6 +1310,121 @@ static struct attribute_group x86_pmu_format_group = {
.attrs = NULL,
};
struct perf_pmu_events_attr {
struct device_attribute attr;
u64 id;
};
/*
* Remove all undefined events (x86_pmu.event_map(id) == 0)
* out of events_attr attributes.
*/
static void __init filter_events(struct attribute **attrs)
{
int i, j;
for (i = 0; attrs[i]; i++) {
if (x86_pmu.event_map(i))
continue;
for (j = i; attrs[j]; j++)
attrs[j] = attrs[j + 1];
/* Check the shifted attr. */
i--;
}
}
static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page)
{
struct perf_pmu_events_attr *pmu_attr = \
container_of(attr, struct perf_pmu_events_attr, attr);
u64 config = x86_pmu.event_map(pmu_attr->id);
return x86_pmu.events_sysfs_show(page, config);
}
#define EVENT_VAR(_id) event_attr_##_id
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
#define EVENT_ATTR(_name, _id) \
static struct perf_pmu_events_attr EVENT_VAR(_id) = { \
.attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
.id = PERF_COUNT_HW_##_id, \
};
EVENT_ATTR(cpu-cycles, CPU_CYCLES );
EVENT_ATTR(instructions, INSTRUCTIONS );
EVENT_ATTR(cache-references, CACHE_REFERENCES );
EVENT_ATTR(cache-misses, CACHE_MISSES );
EVENT_ATTR(branch-instructions, BRANCH_INSTRUCTIONS );
EVENT_ATTR(branch-misses, BRANCH_MISSES );
EVENT_ATTR(bus-cycles, BUS_CYCLES );
EVENT_ATTR(stalled-cycles-frontend, STALLED_CYCLES_FRONTEND );
EVENT_ATTR(stalled-cycles-backend, STALLED_CYCLES_BACKEND );
EVENT_ATTR(ref-cycles, REF_CPU_CYCLES );
static struct attribute *empty_attrs;
static struct attribute *events_attr[] = {
EVENT_PTR(CPU_CYCLES),
EVENT_PTR(INSTRUCTIONS),
EVENT_PTR(CACHE_REFERENCES),
EVENT_PTR(CACHE_MISSES),
EVENT_PTR(BRANCH_INSTRUCTIONS),
EVENT_PTR(BRANCH_MISSES),
EVENT_PTR(BUS_CYCLES),
EVENT_PTR(STALLED_CYCLES_FRONTEND),
EVENT_PTR(STALLED_CYCLES_BACKEND),
EVENT_PTR(REF_CPU_CYCLES),
NULL,
};
static struct attribute_group x86_pmu_events_group = {
.name = "events",
.attrs = events_attr,
};
ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
{
u64 umask = (config & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
u64 cmask = (config & ARCH_PERFMON_EVENTSEL_CMASK) >> 24;
bool edge = (config & ARCH_PERFMON_EVENTSEL_EDGE);
bool pc = (config & ARCH_PERFMON_EVENTSEL_PIN_CONTROL);
bool any = (config & ARCH_PERFMON_EVENTSEL_ANY);
bool inv = (config & ARCH_PERFMON_EVENTSEL_INV);
ssize_t ret;
/*
* We have whole page size to spend and just little data
* to write, so we can safely use sprintf.
*/
ret = sprintf(page, "event=0x%02llx", event);
if (umask)
ret += sprintf(page + ret, ",umask=0x%02llx", umask);
if (edge)
ret += sprintf(page + ret, ",edge");
if (pc)
ret += sprintf(page + ret, ",pc");
if (any)
ret += sprintf(page + ret, ",any");
if (inv)
ret += sprintf(page + ret, ",inv");
if (cmask)
ret += sprintf(page + ret, ",cmask=0x%02llx", cmask);
ret += sprintf(page + ret, "\n");
return ret;
}
static int __init init_hw_perf_events(void)
{
struct x86_pmu_quirk *quirk;
@@ -1362,6 +1471,11 @@ static int __init init_hw_perf_events(void)
x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
x86_pmu_format_group.attrs = x86_pmu.format_attrs;
if (!x86_pmu.events_sysfs_show)
x86_pmu_events_group.attrs = &empty_attrs;
else
filter_events(x86_pmu_events_group.attrs);
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
pr_info("... generic registers: %d\n", x86_pmu.num_counters);
@@ -1651,6 +1765,7 @@ static struct attribute_group x86_pmu_attr_group = {
static const struct attribute_group *x86_pmu_attr_groups[] = {
&x86_pmu_attr_group,
&x86_pmu_format_group,
&x86_pmu_events_group,
NULL,
};

Fájl megtekintése

@@ -354,6 +354,8 @@ struct x86_pmu {
int attr_rdpmc;
struct attribute **format_attrs;
ssize_t (*events_sysfs_show)(char *page, u64 config);
/*
* CPU Hotplug hooks
*/
@@ -536,6 +538,9 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
regs->ip = ip;
}
ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
ssize_t intel_event_sysfs_show(char *page, u64 config);
#ifdef CONFIG_CPU_SUP_AMD
int amd_pmu_init(void);

Fájl megtekintése

@@ -568,6 +568,14 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
}
}
static ssize_t amd_event_sysfs_show(char *page, u64 config)
{
u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
(config & AMD64_EVENTSEL_EVENT) >> 24;
return x86_event_sysfs_show(page, config, event);
}
static __initconst const struct x86_pmu amd_pmu = {
.name = "AMD",
.handle_irq = x86_pmu_handle_irq,
@@ -591,6 +599,7 @@ static __initconst const struct x86_pmu amd_pmu = {
.put_event_constraints = amd_put_event_constraints,
.format_attrs = amd_format_attr,
.events_sysfs_show = amd_event_sysfs_show,
.cpu_prepare = amd_pmu_cpu_prepare,
.cpu_starting = amd_pmu_cpu_starting,

Fájl megtekintése

@@ -1603,6 +1603,13 @@ static struct attribute *intel_arch_formats_attr[] = {
NULL,
};
ssize_t intel_event_sysfs_show(char *page, u64 config)
{
u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
return x86_event_sysfs_show(page, config, event);
}
static __initconst const struct x86_pmu core_pmu = {
.name = "core",
.handle_irq = x86_pmu_handle_irq,
@@ -1628,6 +1635,7 @@ static __initconst const struct x86_pmu core_pmu = {
.event_constraints = intel_core_event_constraints,
.guest_get_msrs = core_guest_get_msrs,
.format_attrs = intel_arch_formats_attr,
.events_sysfs_show = intel_event_sysfs_show,
};
struct intel_shared_regs *allocate_shared_regs(int cpu)
@@ -1766,6 +1774,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.pebs_aliases = intel_pebs_aliases_core2,
.format_attrs = intel_arch3_formats_attr,
.events_sysfs_show = intel_event_sysfs_show,
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,

Fájl megtekintése

@@ -2500,7 +2500,7 @@ static bool pcidrv_registered;
/*
* add a pci uncore device
*/
static int __devinit uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
{
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
@@ -2571,8 +2571,8 @@ static void uncore_pci_remove(struct pci_dev *pdev)
kfree(box);
}
static int __devinit uncore_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *id)
static int uncore_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *id)
{
struct intel_uncore_type *type;

Fájl megtekintése

@@ -227,6 +227,8 @@ static __initconst const struct x86_pmu p6_pmu = {
.event_constraints = p6_event_constraints,
.format_attrs = intel_p6_formats_attr,
.events_sysfs_show = intel_event_sysfs_show,
};
__init int p6_pmu_init(void)

Fájl megtekintése

@@ -26,11 +26,6 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
#ifdef CONFIG_X86_32
static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
{
/*
* We use exception 16 if we have hardware math and we've either seen
* it or the CPU claims it is internal
*/
int fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu);
seq_printf(m,
"fdiv_bug\t: %s\n"
"hlt_bug\t\t: %s\n"
@@ -45,7 +40,7 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
c->f00f_bug ? "yes" : "no",
c->coma_bug ? "yes" : "no",
c->hard_math ? "yes" : "no",
fpu_exception ? "yes" : "no",
c->hard_math ? "yes" : "no",
c->cpuid_level,
c->wp_works_ok ? "yes" : "no");
}

Fájl megtekintése

@@ -16,6 +16,7 @@
#include <linux/delay.h>
#include <linux/elf.h>
#include <linux/elfcore.h>
#include <linux/module.h>
#include <asm/processor.h>
#include <asm/hardirq.h>
@@ -30,6 +31,27 @@
int in_crash_kexec;
/*
* This is used to VMCLEAR all VMCSs loaded on the
* processor. And when loading kvm_intel module, the
* callback function pointer will be assigned.
*
* protected by rcu.
*/
crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
static inline void cpu_crash_vmclear_loaded_vmcss(void)
{
crash_vmclear_fn *do_vmclear_operation = NULL;
rcu_read_lock();
do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
if (do_vmclear_operation)
do_vmclear_operation();
rcu_read_unlock();
}
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
@@ -46,6 +68,11 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
#endif
crash_save_cpu(regs, cpu);
/*
* VMCLEAR VMCSs loaded on all cpus if needed.
*/
cpu_crash_vmclear_loaded_vmcss();
/* Disable VMX or SVM if needed.
*
* We need to disable virtualization on all CPUs.
@@ -88,6 +115,11 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
kdump_nmi_shootdown_cpus();
/*
* VMCLEAR VMCSs loaded on this cpu if needed.
*/
cpu_crash_vmclear_loaded_vmcss();
/* Booting kdump kernel with VMX or SVM enabled won't work,
* because (among other limitations) we can't disable paging
* with the virt flags.

Fájl megtekintése

@@ -739,30 +739,11 @@ ENTRY(ptregs_##name) ; \
ENDPROC(ptregs_##name)
PTREGSCALL1(iopl)
PTREGSCALL0(fork)
PTREGSCALL0(vfork)
PTREGSCALL2(sigaltstack)
PTREGSCALL0(sigreturn)
PTREGSCALL0(rt_sigreturn)
PTREGSCALL2(vm86)
PTREGSCALL1(vm86old)
/* Clone is an oddball. The 4th arg is in %edi */
ENTRY(ptregs_clone)
CFI_STARTPROC
leal 4(%esp),%eax
pushl_cfi %eax
pushl_cfi PT_EDI(%eax)
movl PT_EDX(%eax),%ecx
movl PT_ECX(%eax),%edx
movl PT_EBX(%eax),%eax
call sys_clone
addl $8,%esp
CFI_ADJUST_CFA_OFFSET -8
ret
CFI_ENDPROC
ENDPROC(ptregs_clone)
.macro FIXUP_ESPFIX_STACK
/*
* Switch back for ESPFIX stack to the normal zerobased stack
@@ -1084,7 +1065,6 @@ ENTRY(xen_failsafe_callback)
lea 16(%esp),%esp
CFI_ADJUST_CFA_OFFSET -16
jz 5f
addl $16,%esp
jmp iret_exc
5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL

Fájl megtekintése

@@ -56,7 +56,7 @@
#include <asm/ftrace.h>
#include <asm/percpu.h>
#include <asm/asm.h>
#include <asm/rcu.h>
#include <asm/context_tracking.h>
#include <asm/smap.h>
#include <linux/err.h>
@@ -845,10 +845,25 @@ ENTRY(\label)
END(\label)
.endm
PTREGSCALL stub_clone, sys_clone, %r8
PTREGSCALL stub_fork, sys_fork, %rdi
PTREGSCALL stub_vfork, sys_vfork, %rdi
PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
.macro FORK_LIKE func
ENTRY(stub_\func)
CFI_STARTPROC
popq %r11 /* save return address */
PARTIAL_FRAME 0
SAVE_REST
pushq %r11 /* put it back on stack */
FIXUP_TOP_OF_STACK %r11, 8
DEFAULT_FRAME 0 8 /* offset 8: return address */
call sys_\func
RESTORE_TOP_OF_STACK %r11, 8
ret $REST_SKIP /* pop extended registers */
CFI_ENDPROC
END(stub_\func)
.endm
FORK_LIKE clone
FORK_LIKE fork
FORK_LIKE vfork
PTREGSCALL stub_iopl, sys_iopl, %rsi
ENTRY(ptregscall_common)
@@ -897,8 +912,6 @@ ENTRY(stub_rt_sigreturn)
END(stub_rt_sigreturn)
#ifdef CONFIG_X86_X32_ABI
PTREGSCALL stub_x32_sigaltstack, sys32_sigaltstack, %rdx
ENTRY(stub_x32_rt_sigreturn)
CFI_STARTPROC
addq $8, %rsp
@@ -995,8 +1008,8 @@ END(interrupt)
*/
.p2align CONFIG_X86_L1_CACHE_SHIFT
common_interrupt:
ASM_CLAC
XCPT_FRAME
ASM_CLAC
addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
interrupt do_IRQ
/* 0(%rsp): old_rsp-ARGOFFSET */
@@ -1135,8 +1148,8 @@ END(common_interrupt)
*/
.macro apicinterrupt num sym do_sym
ENTRY(\sym)
ASM_CLAC
INTR_FRAME
ASM_CLAC
pushq_cfi $~(\num)
.Lcommon_\sym:
interrupt \do_sym
@@ -1190,8 +1203,8 @@ apicinterrupt IRQ_WORK_VECTOR \
*/
.macro zeroentry sym do_sym
ENTRY(\sym)
ASM_CLAC
INTR_FRAME
ASM_CLAC
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
subq $ORIG_RAX-R15, %rsp
@@ -1208,8 +1221,8 @@ END(\sym)
.macro paranoidzeroentry sym do_sym
ENTRY(\sym)
ASM_CLAC
INTR_FRAME
ASM_CLAC
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
subq $ORIG_RAX-R15, %rsp
@@ -1227,8 +1240,8 @@ END(\sym)
#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
.macro paranoidzeroentry_ist sym do_sym ist
ENTRY(\sym)
ASM_CLAC
INTR_FRAME
ASM_CLAC
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
subq $ORIG_RAX-R15, %rsp
@@ -1247,8 +1260,8 @@ END(\sym)
.macro errorentry sym do_sym
ENTRY(\sym)
ASM_CLAC
XCPT_FRAME
ASM_CLAC
PARAVIRT_ADJUST_EXCEPTION_FRAME
subq $ORIG_RAX-R15, %rsp
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
@@ -1266,8 +1279,8 @@ END(\sym)
/* error code is on the stack already */
.macro paranoiderrorentry sym do_sym
ENTRY(\sym)
ASM_CLAC
XCPT_FRAME
ASM_CLAC
PARAVIRT_ADJUST_EXCEPTION_FRAME
subq $ORIG_RAX-R15, %rsp
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
@@ -1699,9 +1712,10 @@ nested_nmi:
1:
/* Set up the interrupted NMIs stack to jump to repeat_nmi */
leaq -6*8(%rsp), %rdx
leaq -1*8(%rsp), %rdx
movq %rdx, %rsp
CFI_ADJUST_CFA_OFFSET 6*8
CFI_ADJUST_CFA_OFFSET 1*8
leaq -10*8(%rsp), %rdx
pushq_cfi $__KERNEL_DS
pushq_cfi %rdx
pushfq_cfi
@@ -1709,8 +1723,8 @@ nested_nmi:
pushq_cfi $repeat_nmi
/* Put stack back */
addq $(11*8), %rsp
CFI_ADJUST_CFA_OFFSET -11*8
addq $(6*8), %rsp
CFI_ADJUST_CFA_OFFSET -6*8
nested_nmi_out:
popq_cfi %rdx
@@ -1736,18 +1750,18 @@ first_nmi:
* +-------------------------+
* | NMI executing variable |
* +-------------------------+
* | Saved SS |
* | Saved Return RSP |
* | Saved RFLAGS |
* | Saved CS |
* | Saved RIP |
* +-------------------------+
* | copied SS |
* | copied Return RSP |
* | copied RFLAGS |
* | copied CS |
* | copied RIP |
* +-------------------------+
* | Saved SS |
* | Saved Return RSP |
* | Saved RFLAGS |
* | Saved CS |
* | Saved RIP |
* +-------------------------+
* | pt_regs |
* +-------------------------+
*
@@ -1763,9 +1777,14 @@ first_nmi:
/* Set the NMI executing variable on the stack. */
pushq_cfi $1
/*
* Leave room for the "copied" frame
*/
subq $(5*8), %rsp
/* Copy the stack frame to the Saved frame */
.rept 5
pushq_cfi 6*8(%rsp)
pushq_cfi 11*8(%rsp)
.endr
CFI_DEF_CFA_OFFSET SS+8-RIP
@@ -1786,12 +1805,15 @@ repeat_nmi:
* is benign for the non-repeat case, where 1 was pushed just above
* to this very stack slot).
*/
movq $1, 5*8(%rsp)
movq $1, 10*8(%rsp)
/* Make another copy, this one may be modified by nested NMIs */
addq $(10*8), %rsp
CFI_ADJUST_CFA_OFFSET -10*8
.rept 5
pushq_cfi 4*8(%rsp)
pushq_cfi -6*8(%rsp)
.endr
subq $(5*8), %rsp
CFI_DEF_CFA_OFFSET SS+8-RIP
end_repeat_nmi:
@@ -1842,8 +1864,12 @@ nmi_swapgs:
SWAPGS_UNSAFE_STACK
nmi_restore:
RESTORE_ALL 8
/* Pop the extra iret frame */
addq $(5*8), %rsp
/* Clear the NMI executing stack variable */
movq $0, 10*8(%rsp)
movq $0, 5*8(%rsp)
jmp irq_return
CFI_ENDPROC
END(nmi)

Fájl megtekintése

@@ -266,6 +266,19 @@ num_subarch_entries = (. - subarch_entries) / 4
jmp default_entry
#endif /* CONFIG_PARAVIRT */
#ifdef CONFIG_HOTPLUG_CPU
/*
* Boot CPU0 entry point. It's called from play_dead(). Everything has been set
* up already except stack. We just set up stack here. Then call
* start_secondary().
*/
ENTRY(start_cpu0)
movl stack_start, %ecx
movl %ecx, %esp
jmp *(initial_code)
ENDPROC(start_cpu0)
#endif
/*
* Non-boot CPU entry point; entered from trampoline.S
* We can't lgdt here, because lgdt itself uses a data segment, but
@@ -292,8 +305,8 @@ default_entry:
* be using the global pages.
*
* NOTE! If we are on a 486 we may have no cr4 at all!
* Specifically, cr4 exists if and only if CPUID exists,
* which in turn exists if and only if EFLAGS.ID exists.
* Specifically, cr4 exists if and only if CPUID exists
* and has flags other than the FPU flag set.
*/
movl $X86_EFLAGS_ID,%ecx
pushl %ecx
@@ -308,6 +321,11 @@ default_entry:
testl %ecx,%eax
jz 6f # No ID flag = no CPUID = no CR4
movl $1,%eax
cpuid
andl $~1,%edx # Ignore CPUID.FPU
jz 6f # No flags or only CPUID.FPU = no CR4
movl pa(mmu_cr4_features),%eax
movl %eax,%cr4

Fájl megtekintése

@@ -252,6 +252,22 @@ ENTRY(secondary_startup_64)
pushq %rax # target address in negative space
lretq
#ifdef CONFIG_HOTPLUG_CPU
/*
* Boot CPU0 entry point. It's called from play_dead(). Everything has been set
* up already except stack. We just set up stack here. Then call
* start_secondary().
*/
ENTRY(start_cpu0)
movq stack_start(%rip),%rsp
movq initial_code(%rip),%rax
pushq $0 # fake return address to stop unwinder
pushq $__KERNEL_CS # set correct cs
pushq %rax # target address in negative space
lretq
ENDPROC(start_cpu0)
#endif
/* SMP bootup changes these two */
__REFDATA
.align 8

Fájl megtekintése

@@ -434,7 +434,7 @@ void hpet_msi_unmask(struct irq_data *data)
/* unmask it */
cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
cfg |= HPET_TN_FSB;
cfg |= HPET_TN_ENABLE | HPET_TN_FSB;
hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
}
@@ -445,7 +445,7 @@ void hpet_msi_mask(struct irq_data *data)
/* mask it */
cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
cfg &= ~HPET_TN_FSB;
cfg &= ~(HPET_TN_ENABLE | HPET_TN_FSB);
hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
}

Fájl megtekintése

@@ -175,7 +175,11 @@ void __cpuinit fpu_init(void)
cr0 |= X86_CR0_EM;
write_cr0(cr0);
if (!smp_processor_id())
/*
* init_thread_xstate is only called once to avoid overriding
* xstate_size during boot time or during CPU hotplug.
*/
if (xstate_size == 0)
init_thread_xstate();
mxcsr_feature_mask_init();

Fájl megtekintése

@@ -42,39 +42,6 @@
* (these are usually mapped into the 0x30-0xff vector range)
*/
#ifdef CONFIG_X86_32
/*
* Note that on a 486, we don't want to do a SIGFPE on an irq13
* as the irq is unreliable, and exception 16 works correctly
* (ie as explained in the intel literature). On a 386, you
* can't use exception 16 due to bad IBM design, so we have to
* rely on the less exact irq13.
*
* Careful.. Not only is IRQ13 unreliable, but it is also
* leads to races. IBM designers who came up with it should
* be shot.
*/
static irqreturn_t math_error_irq(int cpl, void *dev_id)
{
outb(0, 0xF0);
if (ignore_fpu_irq || !boot_cpu_data.hard_math)
return IRQ_NONE;
math_error(get_irq_regs(), 0, X86_TRAP_MF);
return IRQ_HANDLED;
}
/*
* New motherboards sometimes make IRQ 13 be a PCI interrupt,
* so allow interrupt sharing.
*/
static struct irqaction fpu_irq = {
.handler = math_error_irq,
.name = "fpu",
.flags = IRQF_NO_THREAD,
};
#endif
/*
* IRQ2 is cascade interrupt to second interrupt controller
*/
@@ -242,13 +209,6 @@ void __init native_init_IRQ(void)
setup_irq(2, &irq2);
#ifdef CONFIG_X86_32
/*
* External FPU? Set up irq13 if so, for
* original braindamaged IBM FERR coupling.
*/
if (boot_cpu_data.hard_math && !cpu_has_fpu)
setup_irq(FPU_IRQ, &fpu_irq);
irq_ctx_init(smp_processor_id());
#endif
}

Fájl megtekintése

@@ -42,6 +42,8 @@
#include <asm/apic.h>
#include <asm/apicdef.h>
#include <asm/hypervisor.h>
#include <asm/kvm_guest.h>
#include <asm/context_tracking.h>
static int kvmapf = 1;
@@ -62,6 +64,15 @@ static int parse_no_stealacc(char *arg)
early_param("no-steal-acc", parse_no_stealacc);
static int kvmclock_vsyscall = 1;
static int parse_no_kvmclock_vsyscall(char *arg)
{
kvmclock_vsyscall = 0;
return 0;
}
early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
static int has_steal_clock = 0;
@@ -110,11 +121,8 @@ void kvm_async_pf_task_wait(u32 token)
struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
struct kvm_task_sleep_node n, *e;
DEFINE_WAIT(wait);
int cpu, idle;
cpu = get_cpu();
idle = idle_cpu(cpu);
put_cpu();
rcu_irq_enter();
spin_lock(&b->lock);
e = _find_apf_task(b, token);
@@ -123,12 +131,14 @@ void kvm_async_pf_task_wait(u32 token)
hlist_del(&e->link);
kfree(e);
spin_unlock(&b->lock);
rcu_irq_exit();
return;
}
n.token = token;
n.cpu = smp_processor_id();
n.halted = idle || preempt_count() > 1;
n.halted = is_idle_task(current) || preempt_count() > 1;
init_waitqueue_head(&n.wq);
hlist_add_head(&n.link, &b->list);
spin_unlock(&b->lock);
@@ -147,13 +157,16 @@ void kvm_async_pf_task_wait(u32 token)
/*
* We cannot reschedule. So halt.
*/
rcu_irq_exit();
native_safe_halt();
rcu_irq_enter();
local_irq_disable();
}
}
if (!n.halted)
finish_wait(&n.wq, &wait);
rcu_irq_exit();
return;
}
EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait);
@@ -247,10 +260,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
break;
case KVM_PV_REASON_PAGE_NOT_PRESENT:
/* page is swapped out by the host. */
rcu_irq_enter();
exception_enter(regs);
exit_idle();
kvm_async_pf_task_wait((u32)read_cr2());
rcu_irq_exit();
exception_exit(regs);
break;
case KVM_PV_REASON_PAGE_READY:
rcu_irq_enter();
@@ -471,6 +484,9 @@ void __init kvm_guest_init(void)
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
apic_set_eoi_write(kvm_guest_apic_eoi_write);
if (kvmclock_vsyscall)
kvm_setup_vsyscall_timeinfo();
#ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
register_cpu_notifier(&kvm_cpu_notifier);

Fájl megtekintése

@@ -23,6 +23,7 @@
#include <asm/apic.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/memblock.h>
#include <asm/x86_init.h>
#include <asm/reboot.h>
@@ -39,7 +40,7 @@ static int parse_no_kvmclock(char *arg)
early_param("no-kvmclock", parse_no_kvmclock);
/* The hypervisor will put information about time periodically here */
static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock);
static struct pvclock_vsyscall_time_info *hv_clock;
static struct pvclock_wall_clock wall_clock;
/*
@@ -52,15 +53,20 @@ static unsigned long kvm_get_wallclock(void)
struct pvclock_vcpu_time_info *vcpu_time;
struct timespec ts;
int low, high;
int cpu;
low = (int)__pa_symbol(&wall_clock);
high = ((u64)__pa_symbol(&wall_clock) >> 32);
native_write_msr(msr_kvm_wall_clock, low, high);
vcpu_time = &get_cpu_var(hv_clock);
preempt_disable();
cpu = smp_processor_id();
vcpu_time = &hv_clock[cpu].pvti;
pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
put_cpu_var(hv_clock);
preempt_enable();
return ts.tv_sec;
}
@@ -74,9 +80,11 @@ static cycle_t kvm_clock_read(void)
{
struct pvclock_vcpu_time_info *src;
cycle_t ret;
int cpu;
preempt_disable_notrace();
src = &__get_cpu_var(hv_clock);
cpu = smp_processor_id();
src = &hv_clock[cpu].pvti;
ret = pvclock_clocksource_read(src);
preempt_enable_notrace();
return ret;
@@ -99,8 +107,15 @@ static cycle_t kvm_clock_get_cycles(struct clocksource *cs)
static unsigned long kvm_get_tsc_khz(void)
{
struct pvclock_vcpu_time_info *src;
src = &per_cpu(hv_clock, 0);
return pvclock_tsc_khz(src);
int cpu;
unsigned long tsc_khz;
preempt_disable();
cpu = smp_processor_id();
src = &hv_clock[cpu].pvti;
tsc_khz = pvclock_tsc_khz(src);
preempt_enable();
return tsc_khz;
}
static void kvm_get_preset_lpj(void)
@@ -119,10 +134,14 @@ bool kvm_check_and_clear_guest_paused(void)
{
bool ret = false;
struct pvclock_vcpu_time_info *src;
int cpu = smp_processor_id();
src = &__get_cpu_var(hv_clock);
if (!hv_clock)
return ret;
src = &hv_clock[cpu].pvti;
if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) {
__this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED);
src->flags &= ~PVCLOCK_GUEST_STOPPED;
ret = true;
}
@@ -141,9 +160,10 @@ int kvm_register_clock(char *txt)
{
int cpu = smp_processor_id();
int low, high, ret;
struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti;
low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
low = (int)__pa(src) | 1;
high = ((u64)__pa(src) >> 32);
ret = native_write_msr_safe(msr_kvm_system_time, low, high);
printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
cpu, high, low, txt);
@@ -197,6 +217,8 @@ static void kvm_shutdown(void)
void __init kvmclock_init(void)
{
unsigned long mem;
if (!kvm_para_available())
return;
@@ -209,8 +231,18 @@ void __init kvmclock_init(void)
printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
msr_kvm_system_time, msr_kvm_wall_clock);
if (kvm_register_clock("boot clock"))
mem = memblock_alloc(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS,
PAGE_SIZE);
if (!mem)
return;
hv_clock = __va(mem);
if (kvm_register_clock("boot clock")) {
hv_clock = NULL;
memblock_free(mem,
sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
return;
}
pv_time_ops.sched_clock = kvm_clock_read;
x86_platform.calibrate_tsc = kvm_get_tsc_khz;
x86_platform.get_wallclock = kvm_get_wallclock;
@@ -233,3 +265,37 @@ void __init kvmclock_init(void)
if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
}
int __init kvm_setup_vsyscall_timeinfo(void)
{
#ifdef CONFIG_X86_64
int cpu;
int ret;
u8 flags;
struct pvclock_vcpu_time_info *vcpu_time;
unsigned int size;
size = sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS;
preempt_disable();
cpu = smp_processor_id();
vcpu_time = &hv_clock[cpu].pvti;
flags = pvclock_read_flags(vcpu_time);
if (!(flags & PVCLOCK_TSC_STABLE_BIT)) {
preempt_enable();
return 1;
}
if ((ret = pvclock_init_vsyscall(hv_clock, size))) {
preempt_enable();
return ret;
}
preempt_enable();
kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
#endif
return 0;
}

Fájl megtekintése

@@ -8,8 +8,8 @@
* Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
*
* Maintainers:
* Andreas Herrmann <andreas.herrmann3@amd.com>
* Borislav Petkov <borislav.petkov@amd.com>
* Andreas Herrmann <herrmann.der.user@googlemail.com>
* Borislav Petkov <bp@alien8.de>
*
* This driver allows to upgrade microcode on F10h AMD
* CPUs and later.
@@ -190,6 +190,7 @@ static unsigned int verify_patch_size(int cpu, u32 patch_size,
#define F1XH_MPB_MAX_SIZE 2048
#define F14H_MPB_MAX_SIZE 1824
#define F15H_MPB_MAX_SIZE 4096
#define F16H_MPB_MAX_SIZE 3458
switch (c->x86) {
case 0x14:
@@ -198,6 +199,9 @@ static unsigned int verify_patch_size(int cpu, u32 patch_size,
case 0x15:
max_size = F15H_MPB_MAX_SIZE;
break;
case 0x16:
max_size = F16H_MPB_MAX_SIZE;
break;
default:
max_size = F1XH_MPB_MAX_SIZE;
break;

Fájl megtekintése

@@ -265,7 +265,7 @@ rootfs_initcall(pci_iommu_init);
#ifdef CONFIG_PCI
/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
static __devinit void via_no_dac(struct pci_dev *dev)
static void via_no_dac(struct pci_dev *dev)
{
if (forbid_dac == 0) {
dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n");

Fájl megtekintése

@@ -262,36 +262,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
propagate_user_return_notify(prev_p, next_p);
}
int sys_fork(struct pt_regs *regs)
{
return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
}
/*
* This is trivial, and on the face of it looks like it
* could equally well be done in user mode.
*
* Not so, for quite unobvious reasons - register pressure.
* In user mode vfork() cannot have a stack frame, and if
* done by calling the "clone()" system call directly, you
* do not have enough call-clobbered registers to hold all
* the information you need.
*/
int sys_vfork(struct pt_regs *regs)
{
return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
NULL, NULL);
}
long
sys_clone(unsigned long clone_flags, unsigned long newsp,
void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
{
if (!newsp)
newsp = regs->sp;
return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
}
/*
* Idle related variables and functions
*/
@@ -306,11 +276,6 @@ void (*pm_idle)(void);
EXPORT_SYMBOL(pm_idle);
#endif
static inline int hlt_use_halt(void)
{
return 1;
}
#ifndef CONFIG_SMP
static inline void play_dead(void)
{
@@ -410,28 +375,22 @@ void cpu_idle(void)
*/
void default_idle(void)
{
if (hlt_use_halt()) {
trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
trace_cpu_idle_rcuidle(1, smp_processor_id());
current_thread_info()->status &= ~TS_POLLING;
/*
* TS_POLLING-cleared state must be visible before we
* test NEED_RESCHED:
*/
smp_mb();
trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
trace_cpu_idle_rcuidle(1, smp_processor_id());
current_thread_info()->status &= ~TS_POLLING;
/*
* TS_POLLING-cleared state must be visible before we
* test NEED_RESCHED:
*/
smp_mb();
if (!need_resched())
safe_halt(); /* enables interrupts racelessly */
else
local_irq_enable();
current_thread_info()->status |= TS_POLLING;
trace_power_end_rcuidle(smp_processor_id());
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
} else {
if (!need_resched())
safe_halt(); /* enables interrupts racelessly */
else
local_irq_enable();
/* loop is done by the caller */
cpu_relax();
}
current_thread_info()->status |= TS_POLLING;
trace_power_end_rcuidle(smp_processor_id());
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
}
#ifdef CONFIG_APM_MODULE
EXPORT_SYMBOL(default_idle);

Fájl megtekintése

@@ -128,8 +128,7 @@ void release_thread(struct task_struct *dead_task)
}
int copy_thread(unsigned long clone_flags, unsigned long sp,
unsigned long arg,
struct task_struct *p, struct pt_regs *regs)
unsigned long arg, struct task_struct *p)
{
struct pt_regs *childregs = task_pt_regs(p);
struct task_struct *tsk;
@@ -138,7 +137,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
p->thread.sp = (unsigned long) childregs;
p->thread.sp0 = (unsigned long) (childregs+1);
if (unlikely(!regs)) {
if (unlikely(p->flags & PF_KTHREAD)) {
/* kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
p->thread.ip = (unsigned long) ret_from_kernel_thread;
@@ -156,12 +155,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
return 0;
}
*childregs = *regs;
*childregs = *current_pt_regs();
childregs->ax = 0;
childregs->sp = sp;
if (sp)
childregs->sp = sp;
p->thread.ip = (unsigned long) ret_from_fork;
task_user_gs(p) = get_user_gs(regs);
task_user_gs(p) = get_user_gs(current_pt_regs());
p->fpu_counter = 0;
p->thread.io_bitmap_ptr = NULL;

Fájl megtekintése

@@ -146,8 +146,7 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls)
}
int copy_thread(unsigned long clone_flags, unsigned long sp,
unsigned long arg,
struct task_struct *p, struct pt_regs *regs)
unsigned long arg, struct task_struct *p)
{
int err;
struct pt_regs *childregs;
@@ -169,7 +168,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
savesegment(ds, p->thread.ds);
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
if (unlikely(!regs)) {
if (unlikely(p->flags & PF_KTHREAD)) {
/* kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
childregs->sp = (unsigned long)childregs;
@@ -181,10 +180,11 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
return 0;
}
*childregs = *regs;
*childregs = *current_pt_regs();
childregs->ax = 0;
childregs->sp = sp;
if (sp)
childregs->sp = sp;
err = -ENOMEM;
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));

Fájl megtekintése

@@ -22,6 +22,8 @@
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
#include <linux/rcupdate.h>
#include <linux/module.h>
#include <linux/context_tracking.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -166,6 +168,35 @@ static inline bool invalid_selector(u16 value)
#define FLAG_MASK FLAG_MASK_32
/*
* X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
* when it traps. The previous stack will be directly underneath the saved
* registers, and 'sp/ss' won't even have been saved. Thus the '&regs->sp'.
*
* Now, if the stack is empty, '&regs->sp' is out of range. In this
* case we try to take the previous stack. To always return a non-null
* stack pointer we fall back to regs as stack if no previous stack
* exists.
*
* This is valid only for kernel mode traps.
*/
unsigned long kernel_stack_pointer(struct pt_regs *regs)
{
unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1);
unsigned long sp = (unsigned long)&regs->sp;
struct thread_info *tinfo;
if (context == (sp & ~(THREAD_SIZE - 1)))
return sp;
tinfo = (struct thread_info *)context;
if (tinfo->previous_esp)
return tinfo->previous_esp;
return (unsigned long)regs;
}
EXPORT_SYMBOL_GPL(kernel_stack_pointer);
static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
{
BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
@@ -1461,7 +1492,7 @@ long syscall_trace_enter(struct pt_regs *regs)
{
long ret = 0;
rcu_user_exit();
user_exit();
/*
* If we stepped into a sysenter/syscall insn, it trapped in
@@ -1511,6 +1542,13 @@ void syscall_trace_leave(struct pt_regs *regs)
{
bool step;
/*
* We may come here right after calling schedule_user()
* or do_notify_resume(), in which case we can be in RCU
* user mode.
*/
user_exit();
audit_syscall_exit(regs);
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
@@ -1527,5 +1565,5 @@ void syscall_trace_leave(struct pt_regs *regs)
if (step || test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall_exit(regs, step);
rcu_user_enter();
user_enter();
}

Fájl megtekintése

@@ -17,23 +17,13 @@
#include <linux/kernel.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/sched.h>
#include <linux/gfp.h>
#include <linux/bootmem.h>
#include <asm/fixmap.h>
#include <asm/pvclock.h>
/*
* These are perodically updated
* xen: magic shared_info page
* kvm: gpa registered via msr
* and then copied here.
*/
struct pvclock_shadow_time {
u64 tsc_timestamp; /* TSC at last update of time vals. */
u64 system_timestamp; /* Time, in nanosecs, since boot. */
u32 tsc_to_nsec_mul;
int tsc_shift;
u32 version;
u8 flags;
};
static u8 valid_flags __read_mostly = 0;
void pvclock_set_flags(u8 flags)
@@ -41,34 +31,6 @@ void pvclock_set_flags(u8 flags)
valid_flags = flags;
}
static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
{
u64 delta = native_read_tsc() - shadow->tsc_timestamp;
return pvclock_scale_delta(delta, shadow->tsc_to_nsec_mul,
shadow->tsc_shift);
}
/*
* Reads a consistent set of time-base values from hypervisor,
* into a shadow data area.
*/
static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
struct pvclock_vcpu_time_info *src)
{
do {
dst->version = src->version;
rmb(); /* fetch version before data */
dst->tsc_timestamp = src->tsc_timestamp;
dst->system_timestamp = src->system_time;
dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
dst->tsc_shift = src->tsc_shift;
dst->flags = src->flags;
rmb(); /* test version after fetching data */
} while ((src->version & 1) || (dst->version != src->version));
return dst->version;
}
unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
{
u64 pv_tsc_khz = 1000000ULL << 32;
@@ -88,23 +50,32 @@ void pvclock_resume(void)
atomic64_set(&last_value, 0);
}
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
{
struct pvclock_shadow_time shadow;
unsigned version;
cycle_t ret, offset;
u64 last;
cycle_t ret;
u8 flags;
do {
version = pvclock_get_time_values(&shadow, src);
barrier();
offset = pvclock_get_nsec_offset(&shadow);
ret = shadow.system_timestamp + offset;
barrier();
} while (version != src->version);
version = __pvclock_read_cycles(src, &ret, &flags);
} while ((src->version & 1) || version != src->version);
return flags & valid_flags;
}
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
{
unsigned version;
cycle_t ret;
u64 last;
u8 flags;
do {
version = __pvclock_read_cycles(src, &ret, &flags);
} while ((src->version & 1) || version != src->version);
if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) &&
(shadow.flags & PVCLOCK_TSC_STABLE_BIT))
(flags & PVCLOCK_TSC_STABLE_BIT))
return ret;
/*
@@ -156,3 +127,71 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
}
static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
static struct pvclock_vsyscall_time_info *
pvclock_get_vsyscall_user_time_info(int cpu)
{
if (!pvclock_vdso_info) {
BUG();
return NULL;
}
return &pvclock_vdso_info[cpu];
}
struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
{
return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
}
#ifdef CONFIG_X86_64
static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
void *v)
{
struct task_migration_notifier *mn = v;
struct pvclock_vsyscall_time_info *pvti;
pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
/* this is NULL when pvclock vsyscall is not initialized */
if (unlikely(pvti == NULL))
return NOTIFY_DONE;
pvti->migrate_count++;
return NOTIFY_DONE;
}
static struct notifier_block pvclock_migrate = {
.notifier_call = pvclock_task_migrate,
};
/*
* Initialize the generic pvclock vsyscall state. This will allocate
* a/some page(s) for the per-vcpu pvclock information, set up a
* fixmap mapping for the page(s)
*/
int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
int size)
{
int idx;
WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
pvclock_vdso_info = i;
for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
__set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
__pa_symbol(i) + (idx*PAGE_SIZE),
PAGE_KERNEL_VVAR);
}
register_task_migration_notifier(&pvclock_migrate);
return 0;
}
#endif

Fájl megtekintése

@@ -8,7 +8,7 @@
#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
static void quirk_intel_irqbalance(struct pci_dev *dev)
{
u8 config;
u16 word;
@@ -512,7 +512,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
#if defined(CONFIG_PCI) && defined(CONFIG_NUMA)
/* Set correct numa_node information for AMD NB functions */
static void __devinit quirk_amd_nb_node(struct pci_dev *dev)
static void quirk_amd_nb_node(struct pci_dev *dev)
{
struct pci_dev *nb_ht;
unsigned int devfn;

Fájl megtekintése

@@ -195,12 +195,6 @@ void read_persistent_clock(struct timespec *ts)
ts->tv_nsec = 0;
}
unsigned long long native_read_tsc(void)
{
return __native_read_tsc();
}
EXPORT_SYMBOL(native_read_tsc);
static struct resource rtc_resources[] = {
[0] = {

Fájl megtekintése

@@ -143,11 +143,7 @@ int default_check_phys_apicid_present(int phys_apicid)
}
#endif
#ifndef CONFIG_DEBUG_BOOT_PARAMS
struct boot_params __initdata boot_params;
#else
struct boot_params boot_params;
#endif
/*
* Machine setup..
@@ -614,6 +610,83 @@ static __init void reserve_ibft_region(void)
static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
static bool __init snb_gfx_workaround_needed(void)
{
#ifdef CONFIG_PCI
int i;
u16 vendor, devid;
static const __initconst u16 snb_ids[] = {
0x0102,
0x0112,
0x0122,
0x0106,
0x0116,
0x0126,
0x010a,
};
/* Assume no if something weird is going on with PCI */
if (!early_pci_allowed())
return false;
vendor = read_pci_config_16(0, 2, 0, PCI_VENDOR_ID);
if (vendor != 0x8086)
return false;
devid = read_pci_config_16(0, 2, 0, PCI_DEVICE_ID);
for (i = 0; i < ARRAY_SIZE(snb_ids); i++)
if (devid == snb_ids[i])
return true;
#endif
return false;
}
/*
* Sandy Bridge graphics has trouble with certain ranges, exclude
* them from allocation.
*/
static void __init trim_snb_memory(void)
{
static const __initconst unsigned long bad_pages[] = {
0x20050000,
0x20110000,
0x20130000,
0x20138000,
0x40004000,
};
int i;
if (!snb_gfx_workaround_needed())
return;
printk(KERN_DEBUG "reserving inaccessible SNB gfx pages\n");
/*
* Reserve all memory below the 1 MB mark that has not
* already been reserved.
*/
memblock_reserve(0, 1<<20);
for (i = 0; i < ARRAY_SIZE(bad_pages); i++) {
if (memblock_reserve(bad_pages[i], PAGE_SIZE))
printk(KERN_WARNING "failed to reserve 0x%08lx\n",
bad_pages[i]);
}
}
/*
* Here we put platform-specific memory range workarounds, i.e.
* memory known to be corrupt or otherwise in need to be reserved on
* specific platforms.
*
* If this gets used more widely it could use a real dispatch mechanism.
*/
static void __init trim_platform_memory_ranges(void)
{
trim_snb_memory();
}
static void __init trim_bios_range(void)
{
/*
@@ -634,6 +707,7 @@ static void __init trim_bios_range(void)
* take them out.
*/
e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
}
@@ -912,6 +986,8 @@ void __init setup_arch(char **cmdline_p)
setup_real_mode();
trim_platform_memory_ranges();
init_gbpages();
/* max_pfn_mapped is updated here */
@@ -956,6 +1032,10 @@ void __init setup_arch(char **cmdline_p)
reserve_initrd();
#if defined(CONFIG_ACPI) && defined(CONFIG_BLK_DEV_INITRD)
acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start);
#endif
reserve_crashkernel();
vsmp_init();

Fájl megtekintése

@@ -22,6 +22,7 @@
#include <linux/uaccess.h>
#include <linux/user-return-notifier.h>
#include <linux/uprobes.h>
#include <linux/context_tracking.h>
#include <asm/processor.h>
#include <asm/ucontext.h>
@@ -363,10 +364,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
put_user_ex(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
/* Set up to return from userspace. */
restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
@@ -413,7 +411,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
struct rt_sigframe __user *frame;
void __user *fp = NULL;
int err = 0;
struct task_struct *me = current;
frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe), &fp);
@@ -432,10 +429,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
put_user_ex(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
@@ -502,10 +496,7 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
put_user_ex(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
put_user_ex(0, &frame->uc.uc__pad0);
if (ka->sa.sa_flags & SA_RESTORER) {
@@ -602,13 +593,6 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
}
#endif /* CONFIG_X86_32 */
long
sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
struct pt_regs *regs)
{
return do_sigaltstack(uss, uoss, regs->sp);
}
/*
* Do a signal return; undo the signal stack.
*/
@@ -658,7 +642,7 @@ long sys_rt_sigreturn(struct pt_regs *regs)
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
goto badframe;
if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
if (restore_altstack(&frame->uc.uc_stack))
goto badframe;
return ax;
@@ -816,7 +800,7 @@ static void do_signal(struct pt_regs *regs)
void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
rcu_user_exit();
user_exit();
#ifdef CONFIG_X86_MCE
/* notify userspace of pending MCEs */
@@ -838,7 +822,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
fire_user_return_notifiers();
rcu_user_enter();
user_enter();
}
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
@@ -864,7 +848,6 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
struct rt_sigframe_x32 __user *frame;
sigset_t set;
unsigned long ax;
struct pt_regs tregs;
frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
@@ -878,8 +861,7 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
goto badframe;
tregs = *regs;
if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT)
if (compat_restore_altstack(&frame->uc.uc_stack))
goto badframe;
return ax;

Fájl megtekintése

@@ -68,6 +68,8 @@
#include <asm/mwait.h>
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <asm/i387.h>
#include <asm/fpu-internal.h>
#include <asm/setup.h>
#include <asm/uv/uv.h>
#include <linux/mc146818rtc.h>
@@ -125,8 +127,8 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
atomic_t init_deasserted;
/*
* Report back to the Boot Processor.
* Running on AP.
* Report back to the Boot Processor during boot time or to the caller processor
* during CPU online.
*/
static void __cpuinit smp_callin(void)
{
@@ -138,15 +140,17 @@ static void __cpuinit smp_callin(void)
* we may get here before an INIT-deassert IPI reaches
* our local APIC. We have to wait for the IPI or we'll
* lock up on an APIC access.
*
* Since CPU0 is not wakened up by INIT, it doesn't wait for the IPI.
*/
if (apic->wait_for_init_deassert)
cpuid = smp_processor_id();
if (apic->wait_for_init_deassert && cpuid != 0)
apic->wait_for_init_deassert(&init_deasserted);
/*
* (This works even if the APIC is not enabled.)
*/
phys_id = read_apic_id();
cpuid = smp_processor_id();
if (cpumask_test_cpu(cpuid, cpu_callin_mask)) {
panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
phys_id, cpuid);
@@ -228,6 +232,8 @@ static void __cpuinit smp_callin(void)
cpumask_set_cpu(cpuid, cpu_callin_mask);
}
static int cpu0_logical_apicid;
static int enable_start_cpu0;
/*
* Activate a secondary processor.
*/
@@ -243,6 +249,8 @@ notrace static void __cpuinit start_secondary(void *unused)
preempt_disable();
smp_callin();
enable_start_cpu0 = 0;
#ifdef CONFIG_X86_32
/* switch away from the initial page table */
load_cr3(swapper_pg_dir);
@@ -279,19 +287,30 @@ notrace static void __cpuinit start_secondary(void *unused)
cpu_idle();
}
void __init smp_store_boot_cpu_info(void)
{
int id = 0; /* CPU 0 */
struct cpuinfo_x86 *c = &cpu_data(id);
*c = boot_cpu_data;
c->cpu_index = id;
}
/*
* The bootstrap kernel entry code has set these up. Save them for
* a given CPU
*/
void __cpuinit smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = &cpu_data(id);
*c = boot_cpu_data;
c->cpu_index = id;
if (id != 0)
identify_secondary_cpu(c);
/*
* During boot time, CPU0 has this setup already. Save the info when
* bringing up AP or offlined CPU0.
*/
identify_secondary_cpu(c);
}
static bool __cpuinit
@@ -313,7 +332,7 @@ do { \
static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
if (cpu_has_topoext) {
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
if (c->phys_proc_id == o->phys_proc_id &&
@@ -481,7 +500,7 @@ void __inquire_remote_apic(int apicid)
* won't ... remember to clear down the APIC, etc later.
*/
int __cpuinit
wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
{
unsigned long send_status, accept_status = 0;
int maxlvt;
@@ -489,7 +508,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
/* Target chip */
/* Boot on the stack */
/* Kick the second */
apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid);
apic_icr_write(APIC_DM_NMI | apic->dest_logical, apicid);
pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
@@ -649,6 +668,63 @@ static void __cpuinit announce_cpu(int cpu, int apicid)
node, cpu, apicid);
}
static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs)
{
int cpu;
cpu = smp_processor_id();
if (cpu == 0 && !cpu_online(cpu) && enable_start_cpu0)
return NMI_HANDLED;
return NMI_DONE;
}
/*
* Wake up AP by INIT, INIT, STARTUP sequence.
*
* Instead of waiting for STARTUP after INITs, BSP will execute the BIOS
* boot-strap code which is not a desired behavior for waking up BSP. To
* void the boot-strap code, wake up CPU0 by NMI instead.
*
* This works to wake up soft offlined CPU0 only. If CPU0 is hard offlined
* (i.e. physically hot removed and then hot added), NMI won't wake it up.
* We'll change this code in the future to wake up hard offlined CPU0 if
* real platform and request are available.
*/
static int __cpuinit
wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid,
int *cpu0_nmi_registered)
{
int id;
int boot_error;
/*
* Wake up AP by INIT, INIT, STARTUP sequence.
*/
if (cpu)
return wakeup_secondary_cpu_via_init(apicid, start_ip);
/*
* Wake up BSP by nmi.
*
* Register a NMI handler to help wake up CPU0.
*/
boot_error = register_nmi_handler(NMI_LOCAL,
wakeup_cpu0_nmi, 0, "wake_cpu0");
if (!boot_error) {
enable_start_cpu0 = 1;
*cpu0_nmi_registered = 1;
if (apic->dest_logical == APIC_DEST_LOGICAL)
id = cpu0_logical_apicid;
else
id = apicid;
boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip);
}
return boot_error;
}
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -664,6 +740,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
unsigned long boot_error = 0;
int timeout;
int cpu0_nmi_registered = 0;
/* Just in case we booted with a single CPU. */
alternatives_enable_smp();
@@ -711,13 +788,16 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
}
/*
* Kick the secondary CPU. Use the method in the APIC driver
* if it's defined - or use an INIT boot APIC message otherwise:
* Wake up a CPU in difference cases:
* - Use the method in the APIC driver if it's defined
* Otherwise,
* - Use an INIT boot APIC message for APs or NMI for BSP.
*/
if (apic->wakeup_secondary_cpu)
boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
else
boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
&cpu0_nmi_registered);
if (!boot_error) {
/*
@@ -782,6 +862,13 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
*/
smpboot_restore_warm_reset_vector();
}
/*
* Clean up the nmi handler. Do this after the callin and callout sync
* to avoid impact of possible long unregister time.
*/
if (cpu0_nmi_registered)
unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");
return boot_error;
}
@@ -795,7 +882,7 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle)
pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu);
if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
if (apicid == BAD_APICID ||
!physid_isset(apicid, phys_cpu_present_map) ||
!apic->apic_id_valid(apicid)) {
pr_err("%s: bad cpu %d\n", __func__, cpu);
@@ -818,6 +905,9 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle)
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
/* the FPU context is blank, nobody can own it */
__cpu_disable_lazy_restore(cpu);
err = do_boot_cpu(apicid, cpu, tidle);
if (err) {
pr_debug("do_boot_cpu failed %d\n", err);
@@ -990,7 +1080,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
/*
* Setup boot CPU information
*/
smp_store_cpu_info(0); /* Final full version of the data */
smp_store_boot_cpu_info(); /* Final full version of the data */
cpumask_copy(cpu_callin_mask, cpumask_of(0));
mb();
@@ -1026,6 +1116,11 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
*/
setup_local_APIC();
if (x2apic_mode)
cpu0_logical_apicid = apic_read(APIC_LDR);
else
cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
/*
* Enable IO APIC before setting up error vector
*/
@@ -1214,19 +1309,6 @@ void cpu_disable_common(void)
int native_cpu_disable(void)
{
int cpu = smp_processor_id();
/*
* Perhaps use cpufreq to drop frequency, but that could go
* into generic code.
*
* We won't take down the boot processor on i386 due to some
* interrupts only being able to be serviced by the BSP.
* Especially so if we're not using an IOAPIC -zwane
*/
if (cpu == 0)
return -EBUSY;
clear_local_APIC();
cpu_disable_common();
@@ -1266,6 +1348,14 @@ void play_dead_common(void)
local_irq_disable();
}
static bool wakeup_cpu0(void)
{
if (smp_processor_id() == 0 && enable_start_cpu0)
return true;
return false;
}
/*
* We need to flush the caches before going to sleep, lest we have
* dirty data in our caches when we come back up.
@@ -1329,6 +1419,11 @@ static inline void mwait_play_dead(void)
__monitor(mwait_ptr, 0, 0);
mb();
__mwait(eax, 0);
/*
* If NMI wants to wake up CPU0, start CPU0.
*/
if (wakeup_cpu0())
start_cpu0();
}
}
@@ -1339,6 +1434,11 @@ static inline void hlt_play_dead(void)
while (1) {
native_halt();
/*
* If NMI wants to wake up CPU0, start CPU0.
*/
if (wakeup_cpu0())
start_cpu0();
}
}

Fájl megtekintése

@@ -165,10 +165,11 @@ void set_task_blockstep(struct task_struct *task, bool on)
* Ensure irq/preemption can't change debugctl in between.
* Note also that both TIF_BLOCKSTEP and debugctl should
* be changed atomically wrt preemption.
* FIXME: this means that set/clear TIF_BLOCKSTEP is simply
* wrong if task != current, SIGKILL can wakeup the stopped
* tracee and set/clear can play with the running task, this
* can confuse the next __switch_to_xtra().
*
* NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if
* task is current or it can't be running, otherwise we can race
* with __switch_to_xtra(). We rely on ptrace_freeze_traced() but
* PTRACE_KILL is not safe.
*/
local_irq_disable();
debugctl = get_debugctlmsr();

Fájl megtekintése

@@ -21,37 +21,23 @@
/*
* Align a virtual address to avoid aliasing in the I$ on AMD F15h.
*
* @flags denotes the allocation direction - bottomup or topdown -
* or vDSO; see call sites below.
*/
unsigned long align_addr(unsigned long addr, struct file *filp,
enum align_flags flags)
static unsigned long get_align_mask(void)
{
unsigned long tmp_addr;
/* handle 32- and 64-bit case with a single conditional */
if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32())))
return addr;
return 0;
if (!(current->flags & PF_RANDOMIZE))
return addr;
return 0;
if (!((flags & ALIGN_VDSO) || filp))
return addr;
return va_align.mask;
}
tmp_addr = addr;
/*
* We need an address which is <= than the original
* one only when in topdown direction.
*/
if (!(flags & ALIGN_TOPDOWN))
tmp_addr += va_align.mask;
tmp_addr &= ~va_align.mask;
return tmp_addr;
unsigned long align_vdso_addr(unsigned long addr)
{
unsigned long align_mask = get_align_mask();
return (addr + align_mask) & ~align_mask;
}
static int __init control_va_addr_alignment(char *str)
@@ -126,7 +112,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long start_addr;
struct vm_unmapped_area_info info;
unsigned long begin, end;
if (flags & MAP_FIXED)
@@ -144,50 +130,16 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
(!vma || addr + len <= vma->vm_start))
return addr;
}
if (((flags & MAP_32BIT) || test_thread_flag(TIF_ADDR32))
&& len <= mm->cached_hole_size) {
mm->cached_hole_size = 0;
mm->free_area_cache = begin;
}
addr = mm->free_area_cache;
if (addr < begin)
addr = begin;
start_addr = addr;
full_search:
addr = align_addr(addr, filp, 0);
for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
/* At this point: (!vma || addr < vma->vm_end). */
if (end - len < addr) {
/*
* Start a new search - just in case we missed
* some holes.
*/
if (start_addr != begin) {
start_addr = addr = begin;
mm->cached_hole_size = 0;
goto full_search;
}
return -ENOMEM;
}
if (!vma || addr + len <= vma->vm_start) {
/*
* Remember the place where we stopped the search:
*/
mm->free_area_cache = addr + len;
return addr;
}
if (addr + mm->cached_hole_size < vma->vm_start)
mm->cached_hole_size = vma->vm_start - addr;
addr = vma->vm_end;
addr = align_addr(addr, filp, 0);
}
info.flags = 0;
info.length = len;
info.low_limit = begin;
info.high_limit = end;
info.align_mask = filp ? get_align_mask() : 0;
info.align_offset = pgoff << PAGE_SHIFT;
return vm_unmapped_area(&info);
}
unsigned long
arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
const unsigned long len, const unsigned long pgoff,
@@ -195,7 +147,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
{
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
unsigned long addr = addr0, start_addr;
unsigned long addr = addr0;
struct vm_unmapped_area_info info;
/* requested length too big for entire address space */
if (len > TASK_SIZE)
@@ -217,51 +170,16 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
return addr;
}
/* check if free_area_cache is useful for us */
if (len <= mm->cached_hole_size) {
mm->cached_hole_size = 0;
mm->free_area_cache = mm->mmap_base;
}
try_again:
/* either no address requested or can't fit in requested address hole */
start_addr = addr = mm->free_area_cache;
if (addr < len)
goto fail;
addr -= len;
do {
addr = align_addr(addr, filp, ALIGN_TOPDOWN);
/*
* Lookup failure means no vma is above this address,
* else if new region fits below vma->vm_start,
* return with success:
*/
vma = find_vma(mm, addr);
if (!vma || addr+len <= vma->vm_start)
/* remember the address as a hint for next time */
return mm->free_area_cache = addr;
/* remember the largest hole we saw so far */
if (addr + mm->cached_hole_size < vma->vm_start)
mm->cached_hole_size = vma->vm_start - addr;
/* try just below the current vma->vm_start */
addr = vma->vm_start-len;
} while (len < vma->vm_start);
fail:
/*
* if hint left us with no space for the requested
* mapping then try again:
*/
if (start_addr != mm->mmap_base) {
mm->free_area_cache = mm->mmap_base;
mm->cached_hole_size = 0;
goto try_again;
}
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = PAGE_SIZE;
info.high_limit = mm->mmap_base;
info.align_mask = filp ? get_align_mask() : 0;
info.align_offset = pgoff << PAGE_SHIFT;
addr = vm_unmapped_area(&info);
if (!(addr & ~PAGE_MASK))
return addr;
VM_BUG_ON(addr != -ENOMEM);
bottomup:
/*
@@ -270,14 +188,5 @@ bottomup:
* can happen with large stack limits and large mmap()
* allocations.
*/
mm->cached_hole_size = ~0UL;
mm->free_area_cache = TASK_UNMAPPED_BASE;
addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
/*
* Restore the topdown base:
*/
mm->free_area_cache = mm->mmap_base;
mm->cached_hole_size = ~0UL;
return addr;
return arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
}

Fájl megtekintése

@@ -30,23 +30,110 @@
#include <linux/mmzone.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/irq.h>
#include <asm/cpu.h>
static DEFINE_PER_CPU(struct x86_cpu, cpu_devices);
#ifdef CONFIG_HOTPLUG_CPU
#ifdef CONFIG_BOOTPARAM_HOTPLUG_CPU0
static int cpu0_hotpluggable = 1;
#else
static int cpu0_hotpluggable;
static int __init enable_cpu0_hotplug(char *str)
{
cpu0_hotpluggable = 1;
return 1;
}
__setup("cpu0_hotplug", enable_cpu0_hotplug);
#endif
#ifdef CONFIG_DEBUG_HOTPLUG_CPU0
/*
* This function offlines a CPU as early as possible and allows userspace to
* boot up without the CPU. The CPU can be onlined back by user after boot.
*
* This is only called for debugging CPU offline/online feature.
*/
int __ref _debug_hotplug_cpu(int cpu, int action)
{
struct device *dev = get_cpu_device(cpu);
int ret;
if (!cpu_is_hotpluggable(cpu))
return -EINVAL;
cpu_hotplug_driver_lock();
switch (action) {
case 0:
ret = cpu_down(cpu);
if (!ret) {
pr_info("CPU %u is now offline\n", cpu);
kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
} else
pr_debug("Can't offline CPU%d.\n", cpu);
break;
case 1:
ret = cpu_up(cpu);
if (!ret)
kobject_uevent(&dev->kobj, KOBJ_ONLINE);
else
pr_debug("Can't online CPU%d.\n", cpu);
break;
default:
ret = -EINVAL;
}
cpu_hotplug_driver_unlock();
return ret;
}
static int __init debug_hotplug_cpu(void)
{
_debug_hotplug_cpu(0, 0);
return 0;
}
late_initcall_sync(debug_hotplug_cpu);
#endif /* CONFIG_DEBUG_HOTPLUG_CPU0 */
int __ref arch_register_cpu(int num)
{
struct cpuinfo_x86 *c = &cpu_data(num);
/*
* CPU0 cannot be offlined due to several
* restrictions and assumptions in kernel. This basically
* doesn't add a control file, one cannot attempt to offline
* BSP.
*
* Also certain PCI quirks require not to enable hotplug control
* for all CPU's.
* Currently CPU0 is only hotpluggable on Intel platforms. Other
* vendors can add hotplug support later.
*/
if (num)
if (c->x86_vendor != X86_VENDOR_INTEL)
cpu0_hotpluggable = 0;
/*
* Two known BSP/CPU0 dependencies: Resume from suspend/hibernate
* depends on BSP. PIC interrupts depend on BSP.
*
* If the BSP depencies are under control, one can tell kernel to
* enable BSP hotplug. This basically adds a control file and
* one can attempt to offline BSP.
*/
if (num == 0 && cpu0_hotpluggable) {
unsigned int irq;
/*
* We won't take down the boot processor on i386 if some
* interrupts only are able to be serviced by the BSP in PIC.
*/
for_each_active_irq(irq) {
if (!IO_APIC_IRQ(irq) && irq_has_action(irq)) {
cpu0_hotpluggable = 0;
break;
}
}
}
if (num || cpu0_hotpluggable)
per_cpu(cpu_devices, num).cpu.hotpluggable = 1;
return register_cpu(&per_cpu(cpu_devices, num).cpu, num);

Fájl megtekintése

@@ -0,0 +1,21 @@
/*
* X86 trace clocks
*/
#include <asm/trace_clock.h>
#include <asm/barrier.h>
#include <asm/msr.h>
/*
* trace_clock_x86_tsc(): A clock that is just the cycle counter.
*
* Unlike the other clocks, this is not in nanoseconds.
*/
u64 notrace trace_clock_x86_tsc(void)
{
u64 ret;
rdtsc_barrier();
rdtscll(ret);
return ret;
}

Fájl megtekintése

@@ -55,7 +55,7 @@
#include <asm/i387.h>
#include <asm/fpu-internal.h>
#include <asm/mce.h>
#include <asm/rcu.h>
#include <asm/context_tracking.h>
#include <asm/mach_traps.h>
@@ -69,9 +69,6 @@
asmlinkage int system_call(void);
/* Do we ignore FPU interrupts ? */
char ignore_fpu_irq;
/*
* The IDT has to be page-aligned to simplify the Pentium
* F0 0F bug workaround.
@@ -564,9 +561,6 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr)
dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
{
#ifdef CONFIG_X86_32
ignore_fpu_irq = 1;
#endif
exception_enter(regs);
math_error(regs, error_code, X86_TRAP_MF);
exception_exit(regs);

Fájl megtekintése

@@ -77,6 +77,12 @@ unsigned long long
sched_clock(void) __attribute__((alias("native_sched_clock")));
#endif
unsigned long long native_read_tsc(void)
{
return __native_read_tsc();
}
EXPORT_SYMBOL(native_read_tsc);
int check_tsc_unstable(void)
{
return tsc_unstable;

Fájl megtekintése

@@ -478,6 +478,11 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
regs->ip = current->utask->xol_vaddr;
pre_xol_rip_insn(auprobe, regs, autask);
autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF);
regs->flags |= X86_EFLAGS_TF;
if (test_tsk_thread_flag(current, TIF_BLOCKSTEP))
set_task_blockstep(current, false);
return 0;
}
@@ -603,6 +608,16 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
if (auprobe->fixups & UPROBE_FIX_CALL)
result = adjust_ret_addr(regs->sp, correction);
/*
* arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP
* so we can get an extra SIGTRAP if we do not clear TF. We need
* to examine the opcode to make it right.
*/
if (utask->autask.saved_tf)
send_sig(SIGTRAP, current, 0);
else if (!(auprobe->fixups & UPROBE_FIX_SETF))
regs->flags &= ~X86_EFLAGS_TF;
return result;
}
@@ -647,6 +662,10 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
current->thread.trap_nr = utask->autask.saved_trap_nr;
handle_riprel_post_xol(auprobe, regs, NULL);
instruction_pointer_set(regs, utask->vaddr);
/* clear TF if it was set by us in arch_uprobe_pre_xol() */
if (!utask->autask.saved_tf)
regs->flags &= ~X86_EFLAGS_TF;
}
/*
@@ -676,38 +695,3 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
send_sig(SIGTRAP, current, 0);
return ret;
}
void arch_uprobe_enable_step(struct arch_uprobe *auprobe)
{
struct task_struct *task = current;
struct arch_uprobe_task *autask = &task->utask->autask;
struct pt_regs *regs = task_pt_regs(task);
autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF);
regs->flags |= X86_EFLAGS_TF;
if (test_tsk_thread_flag(task, TIF_BLOCKSTEP))
set_task_blockstep(task, false);
}
void arch_uprobe_disable_step(struct arch_uprobe *auprobe)
{
struct task_struct *task = current;
struct arch_uprobe_task *autask = &task->utask->autask;
bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED);
struct pt_regs *regs = task_pt_regs(task);
/*
* The state of TIF_BLOCKSTEP was not saved so we can get an extra
* SIGTRAP if we do not clear TF. We need to examine the opcode to
* make it right.
*/
if (unlikely(trapped)) {
if (!autask->saved_tf)
regs->flags &= ~X86_EFLAGS_TF;
} else {
if (autask->saved_tf)
send_sig(SIGTRAP, task, 0);
else if (!(auprobe->fixups & UPROBE_FIX_SETF))
regs->flags &= ~X86_EFLAGS_TF;
}
}

Fájl megtekintése

@@ -182,7 +182,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
if (pud_none_or_clear_bad(pud))
goto out;
pmd = pmd_offset(pud, 0xA0000);
split_huge_page_pmd(mm, pmd);
split_huge_page_pmd_mm(mm, 0xA0000, pmd);
if (pmd_none_or_clear_bad(pmd))
goto out;
pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl);

Fájl megtekintése

@@ -145,19 +145,6 @@ static int addr_to_vsyscall_nr(unsigned long addr)
return nr;
}
#ifdef CONFIG_SECCOMP
static int vsyscall_seccomp(struct task_struct *tsk, int syscall_nr)
{
if (!seccomp_mode(&tsk->seccomp))
return 0;
task_pt_regs(tsk)->orig_ax = syscall_nr;
task_pt_regs(tsk)->ax = syscall_nr;
return __secure_computing(syscall_nr);
}
#else
#define vsyscall_seccomp(_tsk, _nr) 0
#endif
static bool write_ok_or_segv(unsigned long ptr, size_t size)
{
/*
@@ -190,10 +177,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
{
struct task_struct *tsk;
unsigned long caller;
int vsyscall_nr;
int vsyscall_nr, syscall_nr, tmp;
int prev_sig_on_uaccess_error;
long ret;
int skip;
/*
* No point in checking CS -- the only way to get here is a user mode
@@ -225,6 +211,64 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
}
tsk = current;
/*
* Check for access_ok violations and find the syscall nr.
*
* NULL is a valid user pointer (in the access_ok sense) on 32-bit and
* 64-bit, so we don't need to special-case it here. For all the
* vsyscalls, NULL means "don't write anything" not "write it at
* address 0".
*/
switch (vsyscall_nr) {
case 0:
if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
!write_ok_or_segv(regs->si, sizeof(struct timezone))) {
ret = -EFAULT;
goto check_fault;
}
syscall_nr = __NR_gettimeofday;
break;
case 1:
if (!write_ok_or_segv(regs->di, sizeof(time_t))) {
ret = -EFAULT;
goto check_fault;
}
syscall_nr = __NR_time;
break;
case 2:
if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
!write_ok_or_segv(regs->si, sizeof(unsigned))) {
ret = -EFAULT;
goto check_fault;
}
syscall_nr = __NR_getcpu;
break;
}
/*
* Handle seccomp. regs->ip must be the original value.
* See seccomp_send_sigsys and Documentation/prctl/seccomp_filter.txt.
*
* We could optimize the seccomp disabled case, but performance
* here doesn't matter.
*/
regs->orig_ax = syscall_nr;
regs->ax = -ENOSYS;
tmp = secure_computing(syscall_nr);
if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
warn_bad_vsyscall(KERN_DEBUG, regs,
"seccomp tried to change syscall nr or ip");
do_exit(SIGSYS);
}
if (tmp)
goto do_ret; /* skip requested */
/*
* With a real vsyscall, page faults cause SIGSEGV. We want to
* preserve that behavior to make writing exploits harder.
@@ -232,49 +276,19 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
current_thread_info()->sig_on_uaccess_error = 1;
/*
* NULL is a valid user pointer (in the access_ok sense) on 32-bit and
* 64-bit, so we don't need to special-case it here. For all the
* vsyscalls, NULL means "don't write anything" not "write it at
* address 0".
*/
ret = -EFAULT;
skip = 0;
switch (vsyscall_nr) {
case 0:
skip = vsyscall_seccomp(tsk, __NR_gettimeofday);
if (skip)
break;
if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
!write_ok_or_segv(regs->si, sizeof(struct timezone)))
break;
ret = sys_gettimeofday(
(struct timeval __user *)regs->di,
(struct timezone __user *)regs->si);
break;
case 1:
skip = vsyscall_seccomp(tsk, __NR_time);
if (skip)
break;
if (!write_ok_or_segv(regs->di, sizeof(time_t)))
break;
ret = sys_time((time_t __user *)regs->di);
break;
case 2:
skip = vsyscall_seccomp(tsk, __NR_getcpu);
if (skip)
break;
if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
!write_ok_or_segv(regs->si, sizeof(unsigned)))
break;
ret = sys_getcpu((unsigned __user *)regs->di,
(unsigned __user *)regs->si,
NULL);
@@ -283,12 +297,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
if (skip) {
if ((long)regs->ax <= 0L) /* seccomp errno emulation */
goto do_ret;
goto done; /* seccomp trace/trap */
}
check_fault:
if (ret == -EFAULT) {
/* Bad news -- userspace fed a bad pointer to a vsyscall. */
warn_bad_vsyscall(KERN_INFO, regs,
@@ -311,7 +320,6 @@ do_ret:
/* Emulate a ret instruction. */
regs->ip = caller;
regs->sp += 8;
done:
return true;
sigsegv: