Merge branch 'x86/urgent' into x86/cpu, to pick up dependency

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar
2016-06-08 13:02:16 +02:00
9445 changed files with 486940 additions and 191795 deletions

View File

@@ -565,14 +565,17 @@ static void early_init_amd(struct cpuinfo_x86 *c)
* can safely set X86_FEATURE_EXTD_APICID unconditionally for families
* after 16h.
*/
if (cpu_has_apic && c->x86 > 0x16) {
set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
} else if (cpu_has_apic && c->x86 >= 0xf) {
/* check CPU config space for extended APIC ID */
unsigned int val;
val = read_pci_config(0, 24, 0, 0x68);
if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18)))
if (boot_cpu_has(X86_FEATURE_APIC)) {
if (c->x86 > 0x16)
set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
else if (c->x86 >= 0xf) {
/* check CPU config space for extended APIC ID */
unsigned int val;
val = read_pci_config(0, 24, 0, 0x68);
if ((val >> 17 & 0x3) == 0x3)
set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
}
}
#endif
@@ -628,6 +631,7 @@ static void init_amd_k8(struct cpuinfo_x86 *c)
*/
msr_set_bit(MSR_K7_HWCR, 6);
#endif
set_cpu_bug(c, X86_BUG_SWAPGS_FENCE);
}
static void init_amd_gh(struct cpuinfo_x86 *c)

View File

@@ -37,6 +37,7 @@
#include <asm/mtrr.h>
#include <linux/numa.h>
#include <asm/asm.h>
#include <asm/bugs.h>
#include <asm/cpu.h>
#include <asm/mce.h>
#include <asm/msr.h>
@@ -270,6 +271,8 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
static __init int setup_disable_smep(char *arg)
{
setup_clear_cpu_cap(X86_FEATURE_SMEP);
/* Check for things that depend on SMEP being enabled: */
check_mpx_erratum(&boot_cpu_data);
return 1;
}
__setup("nosmep", setup_disable_smep);
@@ -310,6 +313,10 @@ static bool pku_disabled;
static __always_inline void setup_pku(struct cpuinfo_x86 *c)
{
/* check the boot processor, plus compile options for PKU: */
if (!cpu_feature_enabled(X86_FEATURE_PKU))
return;
/* checks the actual processor's cpuid bits: */
if (!cpu_has(c, X86_FEATURE_PKU))
return;
if (pku_disabled)
@@ -430,7 +437,7 @@ void load_percpu_segment(int cpu)
#ifdef CONFIG_X86_32
loadsegment(fs, __KERNEL_PERCPU);
#else
loadsegment(gs, 0);
__loadsegment_simple(gs, 0);
wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
#endif
load_stack_canary_segment();
@@ -717,6 +724,13 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
}
}
if (c->extended_cpuid_level >= 0x80000007) {
cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
c->x86_capability[CPUID_8000_0007_EBX] = ebx;
c->x86_power = edx;
}
if (c->extended_cpuid_level >= 0x80000008) {
cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
@@ -729,9 +743,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_phys_bits = 36;
#endif
if (c->extended_cpuid_level >= 0x80000007)
c->x86_power = cpuid_edx(0x80000007);
if (c->extended_cpuid_level >= 0x8000000a)
c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
@@ -862,30 +873,34 @@ static void detect_nopl(struct cpuinfo_x86 *c)
#else
set_cpu_cap(c, X86_FEATURE_NOPL);
#endif
}
static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_64
/*
* ESPFIX is a strange bug. All real CPUs have it. Paravirt
* systems that run Linux at CPL > 0 may or may not have the
* issue, but, even if they have the issue, there's absolutely
* nothing we can do about it because we can't use the real IRET
* instruction.
* Empirically, writing zero to a segment selector on AMD does
* not clear the base, whereas writing zero to a segment
* selector on Intel does clear the base. Intel's behavior
* allows slightly faster context switches in the common case
* where GS is unused by the prev and next threads.
*
* NB: For the time being, only 32-bit kernels support
* X86_BUG_ESPFIX as such. 64-bit kernels directly choose
* whether to apply espfix using paravirt hooks. If any
* non-paravirt system ever shows up that does *not* have the
* ESPFIX issue, we can change this.
* Since neither vendor documents this anywhere that I can see,
* detect it directly instead of hardcoding the choice by
* vendor.
*
* I've designated AMD's behavior as the "bug" because it's
* counterintuitive and less friendly.
*/
#ifdef CONFIG_X86_32
#ifdef CONFIG_PARAVIRT
do {
extern void native_iret(void);
if (pv_cpu_ops.iret == native_iret)
set_cpu_bug(c, X86_BUG_ESPFIX);
} while (0);
#else
set_cpu_bug(c, X86_BUG_ESPFIX);
#endif
unsigned long old_base, tmp;
rdmsrl(MSR_FS_BASE, old_base);
wrmsrl(MSR_FS_BASE, 1);
loadsegment(fs, 0);
rdmsrl(MSR_FS_BASE, tmp);
if (tmp != 0)
set_cpu_bug(c, X86_BUG_NULL_SEG);
wrmsrl(MSR_FS_BASE, old_base);
#endif
}
@@ -921,6 +936,33 @@ static void generic_identify(struct cpuinfo_x86 *c)
get_model_name(c); /* Default name */
detect_nopl(c);
detect_null_seg_behavior(c);
/*
* ESPFIX is a strange bug. All real CPUs have it. Paravirt
* systems that run Linux at CPL > 0 may or may not have the
* issue, but, even if they have the issue, there's absolutely
* nothing we can do about it because we can't use the real IRET
* instruction.
*
* NB: For the time being, only 32-bit kernels support
* X86_BUG_ESPFIX as such. 64-bit kernels directly choose
* whether to apply espfix using paravirt hooks. If any
* non-paravirt system ever shows up that does *not* have the
* ESPFIX issue, we can change this.
*/
#ifdef CONFIG_X86_32
# ifdef CONFIG_PARAVIRT
do {
extern void native_iret(void);
if (pv_cpu_ops.iret == native_iret)
set_cpu_bug(c, X86_BUG_ESPFIX);
} while (0);
# else
set_cpu_bug(c, X86_BUG_ESPFIX);
# endif
#endif
}
static void x86_init_cache_qos(struct cpuinfo_x86 *c)
@@ -1076,12 +1118,12 @@ void enable_sep_cpu(void)
struct tss_struct *tss;
int cpu;
if (!boot_cpu_has(X86_FEATURE_SEP))
return;
cpu = get_cpu();
tss = &per_cpu(cpu_tss, cpu);
if (!boot_cpu_has(X86_FEATURE_SEP))
goto out;
/*
* We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
* see the big comment in struct x86_hw_tss's definition.
@@ -1096,7 +1138,6 @@ void enable_sep_cpu(void)
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
out:
put_cpu();
}
#endif
@@ -1528,7 +1569,7 @@ void cpu_init(void)
pr_info("Initializing CPU#%d\n", cpu);
if (cpu_feature_enabled(X86_FEATURE_VME) ||
cpu_has_tsc ||
boot_cpu_has(X86_FEATURE_TSC) ||
boot_cpu_has(X86_FEATURE_DE))
cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);

View File

@@ -333,7 +333,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
switch (dir0_lsn) {
case 0xd: /* either a 486SLC or DLC w/o DEVID */
dir0_msn = 0;
p = Cx486_name[(cpu_has_fpu ? 1 : 0)];
p = Cx486_name[!!boot_cpu_has(X86_FEATURE_FPU)];
break;
case 0xe: /* a 486S A step */

View File

@@ -25,6 +25,41 @@
#include <asm/apic.h>
#endif
/*
* Just in case our CPU detection goes bad, or you have a weird system,
* allow a way to override the automatic disabling of MPX.
*/
static int forcempx;
static int __init forcempx_setup(char *__unused)
{
forcempx = 1;
return 1;
}
__setup("intel-skd-046-workaround=disable", forcempx_setup);
void check_mpx_erratum(struct cpuinfo_x86 *c)
{
if (forcempx)
return;
/*
* Turn off the MPX feature on CPUs where SMEP is not
* available or disabled.
*
* Works around Intel Erratum SKD046: "Branch Instructions
* May Initialize MPX Bound Registers Incorrectly".
*
* This might falsely disable MPX on systems without
* SMEP, like Atom processors without SMEP. But there
* is no such hardware known at the moment.
*/
if (cpu_has(c, X86_FEATURE_MPX) && !cpu_has(c, X86_FEATURE_SMEP)) {
setup_clear_cpu_cap(X86_FEATURE_MPX);
pr_warn("x86/mpx: Disabling MPX since SMEP not present\n");
}
}
static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
@@ -173,6 +208,8 @@ static void early_init_intel(struct cpuinfo_x86 *c)
if (edx & (1U << 28))
c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
}
check_mpx_erratum(c);
}
#ifdef CONFIG_X86_32
@@ -233,7 +270,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
* The Quark is also family 5, but does not have the same bug.
*/
clear_cpu_bug(c, X86_BUG_F00F);
if (!paravirt_enabled() && c->x86 == 5 && c->x86_model < 9) {
if (c->x86 == 5 && c->x86_model < 9) {
static int f00f_workaround_enabled;
set_cpu_bug(c, X86_BUG_F00F);
@@ -280,7 +317,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
* integrated APIC (see 11AP erratum in "Pentium Processor
* Specification Update").
*/
if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
(c->x86_mask < 0x6 || c->x86_mask == 0xb))
set_cpu_bug(c, X86_BUG_11AP);
@@ -335,7 +372,7 @@ static int intel_num_cpu_cores(struct cpuinfo_x86 *c)
{
unsigned int eax, ebx, ecx, edx;
if (c->cpuid_level < 4)
if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
return 1;
/* Intel has a non-standard dependency on %ecx for this CPUID level. */

View File

@@ -26,6 +26,52 @@ static struct gen_pool *mce_evt_pool;
static LLIST_HEAD(mce_event_llist);
static char gen_pool_buf[MCE_POOLSZ];
/*
* Compare the record "t" with each of the records on list "l" to see if
* an equivalent one is present in the list.
*/
static bool is_duplicate_mce_record(struct mce_evt_llist *t, struct mce_evt_llist *l)
{
struct mce_evt_llist *node;
struct mce *m1, *m2;
m1 = &t->mce;
llist_for_each_entry(node, &l->llnode, llnode) {
m2 = &node->mce;
if (!mce_cmp(m1, m2))
return true;
}
return false;
}
/*
* The system has panicked - we'd like to peruse the list of MCE records
* that have been queued, but not seen by anyone yet. The list is in
* reverse time order, so we need to reverse it. While doing that we can
* also drop duplicate records (these were logged because some banks are
* shared between cores or by all threads on a socket).
*/
struct llist_node *mce_gen_pool_prepare_records(void)
{
struct llist_node *head;
LLIST_HEAD(new_head);
struct mce_evt_llist *node, *t;
head = llist_del_all(&mce_event_llist);
if (!head)
return NULL;
/* squeeze out duplicates while reversing order */
llist_for_each_entry_safe(node, t, head, llnode) {
if (!is_duplicate_mce_record(node, t))
llist_add(&node->llnode, &new_head);
}
return new_head.first;
}
void mce_gen_pool_process(void)
{
struct llist_node *head;

View File

@@ -35,6 +35,7 @@ void mce_gen_pool_process(void);
bool mce_gen_pool_empty(void);
int mce_gen_pool_add(struct mce *mce);
int mce_gen_pool_init(void);
struct llist_node *mce_gen_pool_prepare_records(void);
extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
struct dentry *mce_get_debugfs_dir(void);
@@ -81,3 +82,17 @@ static inline int apei_clear_mce(u64 record_id)
#endif
void mce_inject_log(struct mce *m);
/*
* We consider records to be equivalent if bank+status+addr+misc all match.
* This is only used when the system is going down because of a fatal error
* to avoid cluttering the console log with essentially repeated information.
* In normal processing all errors seen are logged.
*/
static inline bool mce_cmp(struct mce *m1, struct mce *m2)
{
return m1->bank != m2->bank ||
m1->status != m2->status ||
m1->addr != m2->addr ||
m1->misc != m2->misc;
}

View File

@@ -204,6 +204,33 @@ static int error_context(struct mce *m)
return IN_KERNEL;
}
static int mce_severity_amd_smca(struct mce *m, int err_ctx)
{
u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
u32 low, high;
/*
* We need to look at the following bits:
* - "succor" bit (data poisoning support), and
* - TCC bit (Task Context Corrupt)
* in MCi_STATUS to determine error severity.
*/
if (!mce_flags.succor)
return MCE_PANIC_SEVERITY;
if (rdmsr_safe(addr, &low, &high))
return MCE_PANIC_SEVERITY;
/* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */
if ((low & MCI_CONFIG_MCAX) &&
(m->status & MCI_STATUS_TCC) &&
(err_ctx == IN_KERNEL))
return MCE_PANIC_SEVERITY;
/* ...otherwise invoke hwpoison handler. */
return MCE_AR_SEVERITY;
}
/*
* See AMD Error Scope Hierarchy table in a newer BKDG. For example
* 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
@@ -225,6 +252,9 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
* to at least kill process to prolong system operation.
*/
if (mce_flags.overflow_recov) {
if (mce_flags.smca)
return mce_severity_amd_smca(m, ctx);
/* software can try to contain */
if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL))
return MCE_PANIC_SEVERITY;

View File

@@ -161,7 +161,6 @@ void mce_log(struct mce *mce)
if (!mce_gen_pool_add(mce))
irq_work_queue(&mce_irq_work);
mce->finished = 0;
wmb();
for (;;) {
entry = mce_log_get_idx_check(mcelog.next);
@@ -194,7 +193,6 @@ void mce_log(struct mce *mce)
mcelog.entry[entry].finished = 1;
wmb();
mce->finished = 1;
set_bit(0, &mce_need_notify);
}
@@ -224,6 +222,53 @@ void mce_unregister_decode_chain(struct notifier_block *nb)
}
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
static inline u32 ctl_reg(int bank)
{
return MSR_IA32_MCx_CTL(bank);
}
static inline u32 status_reg(int bank)
{
return MSR_IA32_MCx_STATUS(bank);
}
static inline u32 addr_reg(int bank)
{
return MSR_IA32_MCx_ADDR(bank);
}
static inline u32 misc_reg(int bank)
{
return MSR_IA32_MCx_MISC(bank);
}
static inline u32 smca_ctl_reg(int bank)
{
return MSR_AMD64_SMCA_MCx_CTL(bank);
}
static inline u32 smca_status_reg(int bank)
{
return MSR_AMD64_SMCA_MCx_STATUS(bank);
}
static inline u32 smca_addr_reg(int bank)
{
return MSR_AMD64_SMCA_MCx_ADDR(bank);
}
static inline u32 smca_misc_reg(int bank)
{
return MSR_AMD64_SMCA_MCx_MISC(bank);
}
struct mca_msr_regs msr_ops = {
.ctl = ctl_reg,
.status = status_reg,
.addr = addr_reg,
.misc = misc_reg
};
static void print_mce(struct mce *m)
{
int ret = 0;
@@ -290,7 +335,9 @@ static void wait_for_panic(void)
static void mce_panic(const char *msg, struct mce *final, char *exp)
{
int i, apei_err = 0;
int apei_err = 0;
struct llist_node *pending;
struct mce_evt_llist *l;
if (!fake_panic) {
/*
@@ -307,11 +354,10 @@ static void mce_panic(const char *msg, struct mce *final, char *exp)
if (atomic_inc_return(&mce_fake_panicked) > 1)
return;
}
pending = mce_gen_pool_prepare_records();
/* First print corrected ones that are still unlogged */
for (i = 0; i < MCE_LOG_LEN; i++) {
struct mce *m = &mcelog.entry[i];
if (!(m->status & MCI_STATUS_VAL))
continue;
llist_for_each_entry(l, pending, llnode) {
struct mce *m = &l->mce;
if (!(m->status & MCI_STATUS_UC)) {
print_mce(m);
if (!apei_err)
@@ -319,13 +365,11 @@ static void mce_panic(const char *msg, struct mce *final, char *exp)
}
}
/* Now print uncorrected but with the final one last */
for (i = 0; i < MCE_LOG_LEN; i++) {
struct mce *m = &mcelog.entry[i];
if (!(m->status & MCI_STATUS_VAL))
continue;
llist_for_each_entry(l, pending, llnode) {
struct mce *m = &l->mce;
if (!(m->status & MCI_STATUS_UC))
continue;
if (!final || memcmp(m, final, sizeof(struct mce))) {
if (!final || mce_cmp(m, final)) {
print_mce(m);
if (!apei_err)
apei_err = apei_write_mce(m);
@@ -356,11 +400,11 @@ static int msr_to_offset(u32 msr)
if (msr == mca_cfg.rip_msr)
return offsetof(struct mce, ip);
if (msr == MSR_IA32_MCx_STATUS(bank))
if (msr == msr_ops.status(bank))
return offsetof(struct mce, status);
if (msr == MSR_IA32_MCx_ADDR(bank))
if (msr == msr_ops.addr(bank))
return offsetof(struct mce, addr);
if (msr == MSR_IA32_MCx_MISC(bank))
if (msr == msr_ops.misc(bank))
return offsetof(struct mce, misc);
if (msr == MSR_IA32_MCG_STATUS)
return offsetof(struct mce, mcgstatus);
@@ -523,9 +567,9 @@ static struct notifier_block mce_srao_nb = {
static void mce_read_aux(struct mce *m, int i)
{
if (m->status & MCI_STATUS_MISCV)
m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
m->misc = mce_rdmsrl(msr_ops.misc(i));
if (m->status & MCI_STATUS_ADDRV) {
m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
m->addr = mce_rdmsrl(msr_ops.addr(i));
/*
* Mask the reported address by the reported granularity.
@@ -607,7 +651,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
m.tsc = 0;
barrier();
m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
m.status = mce_rdmsrl(msr_ops.status(i));
if (!(m.status & MCI_STATUS_VAL))
continue;
@@ -654,7 +698,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
/*
* Clear state for this bank.
*/
mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
mce_wrmsrl(msr_ops.status(i), 0);
}
/*
@@ -679,7 +723,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
char *tmp;
for (i = 0; i < mca_cfg.banks; i++) {
m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
m->status = mce_rdmsrl(msr_ops.status(i));
if (m->status & MCI_STATUS_VAL) {
__set_bit(i, validp);
if (quirk_no_way_out)
@@ -830,9 +874,9 @@ static int mce_start(int *no_way_out)
atomic_add(*no_way_out, &global_nwo);
/*
* global_nwo should be updated before mce_callin
* Rely on the implied barrier below, such that global_nwo
* is updated before mce_callin.
*/
smp_wmb();
order = atomic_inc_return(&mce_callin);
/*
@@ -957,7 +1001,7 @@ static void mce_clear_state(unsigned long *toclear)
for (i = 0; i < mca_cfg.banks; i++) {
if (test_bit(i, toclear))
mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
mce_wrmsrl(msr_ops.status(i), 0);
}
}
@@ -994,11 +1038,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
int i;
int worst = 0;
int severity;
/*
* Establish sequential order between the CPUs entering the machine
* check handler.
*/
int order;
int order = -1;
/*
* If no_way_out gets set, there is no safe way to recover from this
* MCE. If mca_cfg.tolerant is cranked up, we'll try anyway.
@@ -1012,7 +1057,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
char *msg = "Unknown";
int lmce = 0;
/*
* MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES
* on Intel.
*/
int lmce = 1;
/* If this CPU is offline, just bail out. */
if (cpu_is_offline(smp_processor_id())) {
@@ -1051,19 +1101,20 @@ void do_machine_check(struct pt_regs *regs, long error_code)
kill_it = 1;
/*
* Check if this MCE is signaled to only this logical processor
* Check if this MCE is signaled to only this logical processor,
* on Intel only.
*/
if (m.mcgstatus & MCG_STATUS_LMCES)
lmce = 1;
else {
/*
* Go through all the banks in exclusion of the other CPUs.
* This way we don't report duplicated events on shared banks
* because the first one to see it will clear it.
* If this is a Local MCE, then no need to perform rendezvous.
*/
if (m.cpuvendor == X86_VENDOR_INTEL)
lmce = m.mcgstatus & MCG_STATUS_LMCES;
/*
* Go through all banks in exclusion of the other CPUs. This way we
* don't report duplicated events on shared banks because the first one
* to see it will clear it. If this is a Local MCE, then no need to
* perform rendezvous.
*/
if (!lmce)
order = mce_start(&no_way_out);
}
for (i = 0; i < cfg->banks; i++) {
__clear_bit(i, toclear);
@@ -1076,7 +1127,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
m.addr = 0;
m.bank = i;
m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
m.status = mce_rdmsrl(msr_ops.status(i));
if ((m.status & MCI_STATUS_VAL) == 0)
continue;
@@ -1420,7 +1471,6 @@ static void __mcheck_cpu_init_generic(void)
enum mcp_flags m_fl = 0;
mce_banks_t all_banks;
u64 cap;
int i;
if (!mca_cfg.bootlog)
m_fl = MCP_DONTLOG;
@@ -1436,14 +1486,19 @@ static void __mcheck_cpu_init_generic(void)
rdmsrl(MSR_IA32_MCG_CAP, cap);
if (cap & MCG_CTL_P)
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
}
static void __mcheck_cpu_init_clear_banks(void)
{
int i;
for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
if (!b->init)
continue;
wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
wrmsrl(msr_ops.ctl(i), b->ctl);
wrmsrl(msr_ops.status(i), 0);
}
}
@@ -1495,7 +1550,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
*/
clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
}
if (c->x86 <= 17 && cfg->bootlog < 0) {
if (c->x86 < 17 && cfg->bootlog < 0) {
/*
* Lots of broken BIOS around that don't clear them
* by default and leave crap in there. Don't log:
@@ -1628,11 +1683,19 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
break;
case X86_VENDOR_AMD: {
u32 ebx = cpuid_ebx(0x80000007);
mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV);
mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
mce_flags.overflow_recov = !!(ebx & BIT(0));
mce_flags.succor = !!(ebx & BIT(1));
mce_flags.smca = !!(ebx & BIT(3));
/*
* Install proper ops for Scalable MCA enabled processors
*/
if (mce_flags.smca) {
msr_ops.ctl = smca_ctl_reg;
msr_ops.status = smca_status_reg;
msr_ops.addr = smca_addr_reg;
msr_ops.misc = smca_misc_reg;
}
mce_amd_feature_init(c);
break;
@@ -1717,6 +1780,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(c);
__mcheck_cpu_init_clear_banks();
__mcheck_cpu_init_timer();
}
@@ -2082,7 +2146,7 @@ static void mce_disable_error_reporting(void)
struct mce_bank *b = &mce_banks[i];
if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), 0);
wrmsrl(msr_ops.ctl(i), 0);
}
return;
}
@@ -2121,6 +2185,7 @@ static void mce_syscore_resume(void)
{
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info));
__mcheck_cpu_init_clear_banks();
}
static struct syscore_ops mce_syscore_ops = {
@@ -2138,6 +2203,7 @@ static void mce_cpu_restart(void *data)
if (!mce_available(raw_cpu_ptr(&cpu_info)))
return;
__mcheck_cpu_init_generic();
__mcheck_cpu_init_clear_banks();
__mcheck_cpu_init_timer();
}
@@ -2413,7 +2479,7 @@ static void mce_reenable_cpu(void *h)
struct mce_bank *b = &mce_banks[i];
if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
wrmsrl(msr_ops.ctl(i), b->ctl);
}
}

View File

@@ -54,14 +54,6 @@
/* Threshold LVT offset is at MSR0xC0000410[15:12] */
#define SMCA_THR_LVT_OFF 0xF000
/*
* OS is required to set the MCAX bit to acknowledge that it is now using the
* new MSR ranges and new registers under each bank. It also means that the OS
* will configure deferred errors in the new MCx_CONFIG register. If the bit is
* not set, uncorrectable errors will cause a system panic.
*/
#define SMCA_MCAX_EN_OFF 0x1
static const char * const th_names[] = {
"load_store",
"insn_fetch",
@@ -333,7 +325,7 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
/* Fall back to method we used for older processors: */
switch (block) {
case 0:
addr = MSR_IA32_MCx_MISC(bank);
addr = msr_ops.misc(bank);
break;
case 1:
offset = ((low & MASK_BLKPTR_LO) >> 21);
@@ -351,6 +343,7 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
int offset, u32 misc_high)
{
unsigned int cpu = smp_processor_id();
u32 smca_low, smca_high, smca_addr;
struct threshold_block b;
int new;
@@ -369,24 +362,49 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
b.interrupt_enable = 1;
if (mce_flags.smca) {
u32 smca_low, smca_high;
u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
smca_high |= SMCA_MCAX_EN_OFF;
wrmsr(smca_addr, smca_low, smca_high);
}
/* Gather LVT offset for thresholding: */
if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
goto out;
new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
} else {
if (!mce_flags.smca) {
new = (misc_high & MASK_LVTOFF_HI) >> 20;
goto set_offset;
}
smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
/*
* OS is required to set the MCAX bit to acknowledge that it is
* now using the new MSR ranges and new registers under each
* bank. It also means that the OS will configure deferred
* errors in the new MCx_CONFIG register. If the bit is not set,
* uncorrectable errors will cause a system panic.
*
* MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.)
*/
smca_high |= BIT(0);
/*
* SMCA logs Deferred Error information in MCA_DE{STAT,ADDR}
* registers with the option of additionally logging to
* MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set.
*
* This bit is usually set by BIOS to retain the old behavior
* for OSes that don't use the new registers. Linux supports the
* new registers so let's disable that additional logging here.
*
* MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high
* portion of the MSR).
*/
smca_high &= ~BIT(2);
wrmsr(smca_addr, smca_low, smca_high);
}
/* Gather LVT offset for thresholding: */
if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
goto out;
new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
set_offset:
offset = setup_APIC_mce_threshold(offset, new);
if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
@@ -430,12 +448,23 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
deferred_error_interrupt_enable(c);
}
static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
static void
__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
{
u32 msr_status = msr_ops.status(bank);
u32 msr_addr = msr_ops.addr(bank);
struct mce m;
u64 status;
rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
WARN_ON_ONCE(deferred_err && threshold_err);
if (deferred_err && mce_flags.smca) {
msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank);
msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank);
}
rdmsrl(msr_status, status);
if (!(status & MCI_STATUS_VAL))
return;
@@ -448,10 +477,11 @@ static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
m.misc = misc;
if (m.status & MCI_STATUS_ADDRV)
rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr);
rdmsrl(msr_addr, m.addr);
mce_log(&m);
wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
wrmsrl(msr_status, 0);
}
static inline void __smp_deferred_error_interrupt(void)
@@ -479,17 +509,21 @@ asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
/* APIC interrupt handler for deferred errors */
static void amd_deferred_error_interrupt(void)
{
u64 status;
unsigned int bank;
u32 msr_status;
u64 status;
for (bank = 0; bank < mca_cfg.banks; ++bank) {
rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank)
: msr_ops.status(bank);
rdmsrl(msr_status, status);
if (!(status & MCI_STATUS_VAL) ||
!(status & MCI_STATUS_DEFERRED))
continue;
__log_error(bank, false, 0);
__log_error(bank, true, false, 0);
break;
}
}
@@ -544,7 +578,7 @@ static void amd_threshold_interrupt(void)
return;
log:
__log_error(bank, true, ((u64)high << 32) | low);
__log_error(bank, false, true, ((u64)high << 32) | low);
}
/*

View File

@@ -84,7 +84,7 @@ static int cmci_supported(int *banks)
*/
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return 0;
if (!cpu_has_apic || lapic_get_maxlvt() < 6)
if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
return 0;
rdmsrl(MSR_IA32_MCG_CAP, cap);
*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);

View File

@@ -450,7 +450,7 @@ asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs)
/* Thermal monitoring depends on APIC, ACPI and clock modulation */
static int intel_thermal_supported(struct cpuinfo_x86 *c)
{
if (!cpu_has_apic)
if (!boot_cpu_has(X86_FEATURE_APIC))
return 0;
if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
return 0;

View File

@@ -422,7 +422,7 @@ static void show_saved_mc(void)
data_size = get_datasize(mc_saved_header);
date = mc_saved_header->date;
pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n",
pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, total size=0x%x, date = %04x-%02x-%02x\n",
i, sig, pf, rev, total_size,
date & 0xffff,
date >> 24,

View File

@@ -444,11 +444,24 @@ static void __init print_mtrr_state(void)
pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
}
/* PAT setup for BP. We need to go through sync steps here */
void __init mtrr_bp_pat_init(void)
{
unsigned long flags;
local_irq_save(flags);
prepare_set();
pat_init();
post_set();
local_irq_restore(flags);
}
/* Grab all of the MTRR state for this CPU into *state */
bool __init get_mtrr_state(void)
{
struct mtrr_var_range *vrs;
unsigned long flags;
unsigned lo, dummy;
unsigned int i;
@@ -481,15 +494,6 @@ bool __init get_mtrr_state(void)
mtrr_state_set = 1;
/* PAT setup for BP. We need to go through sync steps here */
local_irq_save(flags);
prepare_set();
pat_init();
post_set();
local_irq_restore(flags);
return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED);
}

View File

@@ -752,6 +752,9 @@ void __init mtrr_bp_init(void)
/* BIOS may override */
__mtrr_enabled = get_mtrr_state();
if (mtrr_enabled())
mtrr_bp_pat_init();
if (mtrr_cleanup(phys_addr)) {
changed_by_mtrr_cleanup = 1;
mtrr_if->set_all();
@@ -759,8 +762,16 @@ void __init mtrr_bp_init(void)
}
}
if (!mtrr_enabled())
if (!mtrr_enabled()) {
pr_info("MTRR: Disabled\n");
/*
* PAT initialization relies on MTRR's rendezvous handler.
* Skip PAT init until the handler can initialize both
* features independently.
*/
pat_disable("MTRRs disabled, skipping PAT initialization too.");
}
}
void mtrr_ap_init(void)

View File

@@ -52,6 +52,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
void fill_mtrr_var_range(unsigned int index,
u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
bool get_mtrr_state(void);
void mtrr_bp_pat_init(void);
extern void set_mtrr_ops(const struct mtrr_ops *ops);