Merge branch 'x86/mce' into x86/ras

Pursue a single RAS/MCE topic branch on x86.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar
2013-08-12 17:54:05 +02:00
11 changed files with 150 additions and 20 deletions

View File

@@ -25,15 +25,18 @@ int mce_severity(struct mce *a, int tolerant, char **msg);
struct dentry *mce_get_debugfs_dir(void);
extern struct mce_bank *mce_banks;
extern mce_banks_t mce_banks_ce_disabled;
#ifdef CONFIG_X86_MCE_INTEL
unsigned long mce_intel_adjust_timer(unsigned long interval);
void mce_intel_cmci_poll(void);
void mce_intel_hcpu_update(unsigned long cpu);
void cmci_disable_bank(int bank);
#else
# define mce_intel_adjust_timer mce_adjust_timer_default
static inline void mce_intel_cmci_poll(void) { }
static inline void mce_intel_hcpu_update(unsigned long cpu) { }
static inline void cmci_disable_bank(int bank) { }
#endif
void mce_timer_kick(unsigned long interval);

View File

@@ -97,6 +97,15 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
};
/*
* MCA banks controlled through firmware first for corrected errors.
* This is a global list of banks for which we won't enable CMCI and we
* won't poll. Firmware controls these banks and is responsible for
* reporting corrected errors through GHES. Uncorrected/recoverable
* errors are still notified through a machine check.
*/
mce_banks_t mce_banks_ce_disabled;
static DEFINE_PER_CPU(struct work_struct, mce_work);
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
@@ -1935,6 +1944,25 @@ static struct miscdevice mce_chrdev_device = {
&mce_chrdev_ops,
};
static void __mce_disable_bank(void *arg)
{
int bank = *((int *)arg);
__clear_bit(bank, __get_cpu_var(mce_poll_banks));
cmci_disable_bank(bank);
}
void mce_disable_bank(int bank)
{
if (bank >= mca_cfg.banks) {
pr_warn(FW_BUG
"Ignoring request to disable invalid MCA bank %d.\n",
bank);
return;
}
set_bit(bank, mce_banks_ce_disabled);
on_each_cpu(__mce_disable_bank, &bank, 1);
}
/*
* mce=off Disables machine check
* mce=no_cmci Disables CMCI

View File

@@ -203,6 +203,10 @@ static void cmci_discover(int banks)
if (test_bit(i, owned))
continue;
/* Skip banks in firmware first mode */
if (test_bit(i, mce_banks_ce_disabled))
continue;
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
/* Already owned by someone else? */
@@ -271,6 +275,19 @@ void cmci_recheck(void)
local_irq_restore(flags);
}
/* Caller must hold the lock on cmci_discover_lock */
static void __cmci_disable_bank(int bank)
{
u64 val;
if (!test_bit(bank, __get_cpu_var(mce_banks_owned)))
return;
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
val &= ~MCI_CTL2_CMCI_EN;
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
__clear_bit(bank, __get_cpu_var(mce_banks_owned));
}
/*
* Disable CMCI on this CPU for all banks it owns when it goes down.
* This allows other CPUs to claim the banks on rediscovery.
@@ -280,20 +297,12 @@ void cmci_clear(void)
unsigned long flags;
int i;
int banks;
u64 val;
if (!cmci_supported(&banks))
return;
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
continue;
/* Disable CMCI */
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
val &= ~MCI_CTL2_CMCI_EN;
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
__clear_bit(i, __get_cpu_var(mce_banks_owned));
}
for (i = 0; i < banks; i++)
__cmci_disable_bank(i);
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
@@ -327,6 +336,19 @@ void cmci_reenable(void)
cmci_discover(banks);
}
void cmci_disable_bank(int bank)
{
int banks;
unsigned long flags;
if (!cmci_supported(&banks))
return;
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
__cmci_disable_bank(bank);
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
static void intel_init_cmci(void)
{
int banks;