Merge branch 'ras/core' into core/objtool, to pick up the new exception table format
Signed-off-by: Ingo Molnar <mingo@kernel.org>
这个提交包含在:
@@ -14,6 +14,7 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
#include "mce-internal.h"
|
||||
|
||||
@@ -29,7 +30,7 @@
|
||||
* panic situations)
|
||||
*/
|
||||
|
||||
enum context { IN_KERNEL = 1, IN_USER = 2 };
|
||||
enum context { IN_KERNEL = 1, IN_USER = 2, IN_KERNEL_RECOV = 3 };
|
||||
enum ser { SER_REQUIRED = 1, NO_SER = 2 };
|
||||
enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
|
||||
|
||||
@@ -48,6 +49,7 @@ static struct severity {
|
||||
#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
|
||||
#define KERNEL .context = IN_KERNEL
|
||||
#define USER .context = IN_USER
|
||||
#define KERNEL_RECOV .context = IN_KERNEL_RECOV
|
||||
#define SER .ser = SER_REQUIRED
|
||||
#define NOSER .ser = NO_SER
|
||||
#define EXCP .excp = EXCP_CONTEXT
|
||||
@@ -86,6 +88,10 @@ static struct severity {
|
||||
PANIC, "In kernel and no restart IP",
|
||||
EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
|
||||
),
|
||||
MCESEV(
|
||||
PANIC, "In kernel and no restart IP",
|
||||
EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
|
||||
),
|
||||
MCESEV(
|
||||
DEFERRED, "Deferred error",
|
||||
NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
|
||||
@@ -122,6 +128,11 @@ static struct severity {
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR),
|
||||
MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
|
||||
),
|
||||
MCESEV(
|
||||
AR, "Action required: data load in error recoverable area of kernel",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
|
||||
KERNEL_RECOV
|
||||
),
|
||||
MCESEV(
|
||||
AR, "Action required: data load error in a user process",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
|
||||
@@ -170,6 +181,9 @@ static struct severity {
|
||||
) /* always matches. keep at end */
|
||||
};
|
||||
|
||||
#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
|
||||
(MCG_STATUS_RIPV|MCG_STATUS_EIPV))
|
||||
|
||||
/*
|
||||
* If mcgstatus indicated that ip/cs on the stack were
|
||||
* no good, then "m->cs" will be zero and we will have
|
||||
@@ -183,7 +197,11 @@ static struct severity {
|
||||
*/
|
||||
static int error_context(struct mce *m)
|
||||
{
|
||||
return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
|
||||
if ((m->cs & 3) == 3)
|
||||
return IN_USER;
|
||||
if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip))
|
||||
return IN_KERNEL_RECOV;
|
||||
return IN_KERNEL;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -961,6 +961,20 @@ static void mce_clear_state(unsigned long *toclear)
|
||||
}
|
||||
}
|
||||
|
||||
static int do_memory_failure(struct mce *m)
|
||||
{
|
||||
int flags = MF_ACTION_REQUIRED;
|
||||
int ret;
|
||||
|
||||
pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr);
|
||||
if (!(m->mcgstatus & MCG_STATUS_RIPV))
|
||||
flags |= MF_MUST_KILL;
|
||||
ret = memory_failure(m->addr >> PAGE_SHIFT, MCE_VECTOR, flags);
|
||||
if (ret)
|
||||
pr_err("Memory error not recovered");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The actual machine check handler. This only handles real
|
||||
* exceptions when something got corrupted coming in through int 18.
|
||||
@@ -998,8 +1012,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
|
||||
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
|
||||
char *msg = "Unknown";
|
||||
u64 recover_paddr = ~0ull;
|
||||
int flags = MF_ACTION_REQUIRED;
|
||||
int lmce = 0;
|
||||
|
||||
/* If this CPU is offline, just bail out. */
|
||||
@@ -1136,22 +1148,13 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
}
|
||||
|
||||
/*
|
||||
* At insane "tolerant" levels we take no action. Otherwise
|
||||
* we only die if we have no other choice. For less serious
|
||||
* issues we try to recover, or limit damage to the current
|
||||
* process.
|
||||
* If tolerant is at an insane level we drop requests to kill
|
||||
* processes and continue even when there is no way out.
|
||||
*/
|
||||
if (cfg->tolerant < 3) {
|
||||
if (no_way_out)
|
||||
mce_panic("Fatal machine check on current CPU", &m, msg);
|
||||
if (worst == MCE_AR_SEVERITY) {
|
||||
recover_paddr = m.addr;
|
||||
if (!(m.mcgstatus & MCG_STATUS_RIPV))
|
||||
flags |= MF_MUST_KILL;
|
||||
} else if (kill_it) {
|
||||
force_sig(SIGBUS, current);
|
||||
}
|
||||
}
|
||||
if (cfg->tolerant == 3)
|
||||
kill_it = 0;
|
||||
else if (no_way_out)
|
||||
mce_panic("Fatal machine check on current CPU", &m, msg);
|
||||
|
||||
if (worst > 0)
|
||||
mce_report_event(regs);
|
||||
@@ -1159,25 +1162,24 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
out:
|
||||
sync_core();
|
||||
|
||||
if (recover_paddr == ~0ull)
|
||||
goto done;
|
||||
if (worst != MCE_AR_SEVERITY && !kill_it)
|
||||
goto out_ist;
|
||||
|
||||
pr_err("Uncorrected hardware memory error in user-access at %llx",
|
||||
recover_paddr);
|
||||
/*
|
||||
* We must call memory_failure() here even if the current process is
|
||||
* doomed. We still need to mark the page as poisoned and alert any
|
||||
* other users of the page.
|
||||
*/
|
||||
ist_begin_non_atomic(regs);
|
||||
local_irq_enable();
|
||||
if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) {
|
||||
pr_err("Memory error not recovered");
|
||||
force_sig(SIGBUS, current);
|
||||
/* Fault was in user mode and we need to take some action */
|
||||
if ((m.cs & 3) == 3) {
|
||||
ist_begin_non_atomic(regs);
|
||||
local_irq_enable();
|
||||
|
||||
if (kill_it || do_memory_failure(&m))
|
||||
force_sig(SIGBUS, current);
|
||||
local_irq_disable();
|
||||
ist_end_non_atomic();
|
||||
} else {
|
||||
if (!fixup_exception(regs, X86_TRAP_MC))
|
||||
mce_panic("Failed kernel mode recovery", &m, NULL);
|
||||
}
|
||||
local_irq_disable();
|
||||
ist_end_non_atomic();
|
||||
done:
|
||||
|
||||
out_ist:
|
||||
ist_exit(regs);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(do_machine_check);
|
||||
@@ -1628,10 +1630,10 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
|
||||
case X86_VENDOR_AMD: {
|
||||
u32 ebx = cpuid_ebx(0x80000007);
|
||||
|
||||
mce_amd_feature_init(c);
|
||||
mce_flags.overflow_recov = !!(ebx & BIT(0));
|
||||
mce_flags.succor = !!(ebx & BIT(1));
|
||||
mce_flags.smca = !!(ebx & BIT(3));
|
||||
mce_amd_feature_init(c);
|
||||
|
||||
break;
|
||||
}
|
||||
|
@@ -28,7 +28,7 @@
|
||||
#include <asm/msr.h>
|
||||
#include <asm/trace/irq_vectors.h>
|
||||
|
||||
#define NR_BLOCKS 9
|
||||
#define NR_BLOCKS 5
|
||||
#define THRESHOLD_MAX 0xFFF
|
||||
#define INT_TYPE_APIC 0x00020000
|
||||
#define MASK_VALID_HI 0x80000000
|
||||
@@ -49,6 +49,19 @@
|
||||
#define DEF_LVT_OFF 0x2
|
||||
#define DEF_INT_TYPE_APIC 0x2
|
||||
|
||||
/* Scalable MCA: */
|
||||
|
||||
/* Threshold LVT offset is at MSR0xC0000410[15:12] */
|
||||
#define SMCA_THR_LVT_OFF 0xF000
|
||||
|
||||
/*
|
||||
* OS is required to set the MCAX bit to acknowledge that it is now using the
|
||||
* new MSR ranges and new registers under each bank. It also means that the OS
|
||||
* will configure deferred errors in the new MCx_CONFIG register. If the bit is
|
||||
* not set, uncorrectable errors will cause a system panic.
|
||||
*/
|
||||
#define SMCA_MCAX_EN_OFF 0x1
|
||||
|
||||
static const char * const th_names[] = {
|
||||
"load_store",
|
||||
"insn_fetch",
|
||||
@@ -84,6 +97,13 @@ struct thresh_restart {
|
||||
|
||||
static inline bool is_shared_bank(int bank)
|
||||
{
|
||||
/*
|
||||
* Scalable MCA provides for only one core to have access to the MSRs of
|
||||
* a shared bank.
|
||||
*/
|
||||
if (mce_flags.smca)
|
||||
return false;
|
||||
|
||||
/* Bank 4 is for northbridge reporting and is thus shared */
|
||||
return (bank == 4);
|
||||
}
|
||||
@@ -135,6 +155,14 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
|
||||
}
|
||||
|
||||
if (apic != msr) {
|
||||
/*
|
||||
* On SMCA CPUs, LVT offset is programmed at a different MSR, and
|
||||
* the BIOS provides the value. The original field where LVT offset
|
||||
* was set is reserved. Return early here:
|
||||
*/
|
||||
if (mce_flags.smca)
|
||||
return 0;
|
||||
|
||||
pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
|
||||
"for bank %d, block %d (MSR%08X=0x%x%08x)\n",
|
||||
b->cpu, apic, b->bank, b->block, b->address, hi, lo);
|
||||
@@ -247,14 +275,65 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
|
||||
wrmsr(MSR_CU_DEF_ERR, low, high);
|
||||
}
|
||||
|
||||
static int
|
||||
prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
|
||||
int offset, u32 misc_high)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
struct threshold_block b;
|
||||
int new;
|
||||
|
||||
if (!block)
|
||||
per_cpu(bank_map, cpu) |= (1 << bank);
|
||||
|
||||
memset(&b, 0, sizeof(b));
|
||||
b.cpu = cpu;
|
||||
b.bank = bank;
|
||||
b.block = block;
|
||||
b.address = addr;
|
||||
b.interrupt_capable = lvt_interrupt_supported(bank, misc_high);
|
||||
|
||||
if (!b.interrupt_capable)
|
||||
goto done;
|
||||
|
||||
b.interrupt_enable = 1;
|
||||
|
||||
if (mce_flags.smca) {
|
||||
u32 smca_low, smca_high;
|
||||
u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
|
||||
|
||||
if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
|
||||
smca_high |= SMCA_MCAX_EN_OFF;
|
||||
wrmsr(smca_addr, smca_low, smca_high);
|
||||
}
|
||||
|
||||
/* Gather LVT offset for thresholding: */
|
||||
if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
|
||||
goto out;
|
||||
|
||||
new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
|
||||
} else {
|
||||
new = (misc_high & MASK_LVTOFF_HI) >> 20;
|
||||
}
|
||||
|
||||
offset = setup_APIC_mce_threshold(offset, new);
|
||||
|
||||
if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
|
||||
mce_threshold_vector = amd_threshold_interrupt;
|
||||
|
||||
done:
|
||||
mce_threshold_block_init(&b, offset);
|
||||
|
||||
out:
|
||||
return offset;
|
||||
}
|
||||
|
||||
/* cpu init entry point, called from mce.c with preempt off */
|
||||
void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
struct threshold_block b;
|
||||
unsigned int cpu = smp_processor_id();
|
||||
u32 low = 0, high = 0, address = 0;
|
||||
unsigned int bank, block;
|
||||
int offset = -1, new;
|
||||
int offset = -1;
|
||||
|
||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||
for (block = 0; block < NR_BLOCKS; ++block) {
|
||||
@@ -279,29 +358,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
||||
(high & MASK_LOCKED_HI))
|
||||
continue;
|
||||
|
||||
if (!block)
|
||||
per_cpu(bank_map, cpu) |= (1 << bank);
|
||||
|
||||
memset(&b, 0, sizeof(b));
|
||||
b.cpu = cpu;
|
||||
b.bank = bank;
|
||||
b.block = block;
|
||||
b.address = address;
|
||||
b.interrupt_capable = lvt_interrupt_supported(bank, high);
|
||||
|
||||
if (!b.interrupt_capable)
|
||||
goto init;
|
||||
|
||||
b.interrupt_enable = 1;
|
||||
new = (high & MASK_LVTOFF_HI) >> 20;
|
||||
offset = setup_APIC_mce_threshold(offset, new);
|
||||
|
||||
if ((offset == new) &&
|
||||
(mce_threshold_vector != amd_threshold_interrupt))
|
||||
mce_threshold_vector = amd_threshold_interrupt;
|
||||
|
||||
init:
|
||||
mce_threshold_block_init(&b, offset);
|
||||
offset = prepare_threshold_block(bank, block, address, offset, high);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -987,7 +987,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
|
||||
* In case the user-specified fault handler returned
|
||||
* zero, try to fix up.
|
||||
*/
|
||||
if (fixup_exception(regs))
|
||||
if (fixup_exception(regs, trapnr))
|
||||
return 1;
|
||||
|
||||
/*
|
||||
|
@@ -185,7 +185,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
|
||||
}
|
||||
|
||||
if (!user_mode(regs)) {
|
||||
if (!fixup_exception(regs)) {
|
||||
if (!fixup_exception(regs, trapnr)) {
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_nr = trapnr;
|
||||
die(str, regs, error_code);
|
||||
@@ -439,7 +439,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
|
||||
|
||||
tsk = current;
|
||||
if (!user_mode(regs)) {
|
||||
if (fixup_exception(regs))
|
||||
if (fixup_exception(regs, X86_TRAP_GP))
|
||||
return;
|
||||
|
||||
tsk->thread.error_code = error_code;
|
||||
@@ -690,7 +690,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
|
||||
cond_local_irq_enable(regs);
|
||||
|
||||
if (!user_mode(regs)) {
|
||||
if (!fixup_exception(regs)) {
|
||||
if (!fixup_exception(regs, trapnr)) {
|
||||
task->thread.error_code = error_code;
|
||||
task->thread.trap_nr = trapnr;
|
||||
die(str, regs, error_code);
|
||||
|
在新工单中引用
屏蔽一个用户