Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Borislav Petkov:
 "This time around we have in store:

   - Disable MC4_MISC thresholding banks on all AMD family 0x15 models
     (Shirish S)

   - AMD MCE error descriptions update and error decode improvements
     (Yazen Ghannam)

   - The usual smaller conversions and fixes"

* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mce: Improve error message when kernel cannot recover, p2
  EDAC/mce_amd: Decode MCA_STATUS in bit definition order
  EDAC/mce_amd: Decode MCA_STATUS[Scrub] bit
  EDAC, mce_amd: Print ExtErrorCode and description on a single line
  EDAC, mce_amd: Match error descriptions to latest documentation
  x86/MCE/AMD, EDAC/mce_amd: Add new error descriptions for some SMCA bank types
  x86/MCE/AMD, EDAC/mce_amd: Add new McaTypes for CS, PSP, and SMU units
  x86/MCE/AMD, EDAC/mce_amd: Add new MP5, NBIO, and PCIE SMCA bank types
  RAS: Add a MAINTAINERS entry
  RAS: Use consistent types for UUIDs
  x86/MCE/AMD: Carve out the MC4_MISC thresholding quirk
  x86/MCE/AMD: Turn off MC4_MISC thresholding on all family 0x15 models
  x86/MCE: Switch to use the new generic UUID API
This commit is contained in:
Linus Torvalds
2019-03-08 09:11:39 -08:00
當前提交 e13284da94
共有 9 個文件被更改,包括 287 次插入148 次删除

查看文件

@@ -88,11 +88,17 @@ static struct smca_bank_name smca_names[] = {
[SMCA_FP] = { "floating_point", "Floating Point Unit" },
[SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
[SMCA_CS] = { "coherent_slave", "Coherent Slave" },
[SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" },
[SMCA_PIE] = { "pie", "Power, Interrupts, etc." },
[SMCA_UMC] = { "umc", "Unified Memory Controller" },
[SMCA_PB] = { "param_block", "Parameter Block" },
[SMCA_PSP] = { "psp", "Platform Security Processor" },
[SMCA_PSP_V2] = { "psp", "Platform Security Processor" },
[SMCA_SMU] = { "smu", "System Management Unit" },
[SMCA_SMU_V2] = { "smu", "System Management Unit" },
[SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" },
[SMCA_NBIO] = { "nbio", "Northbridge IO Unit" },
[SMCA_PCIE] = { "pcie", "PCI Express Unit" },
};
static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
@@ -138,30 +144,42 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
/* ZN Core (HWID=0xB0) MCA types */
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF },
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },
/* HWID 0xB0 MCATYPE 0x4 is Reserved */
{ SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0x7FF },
{ SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0xFFF },
{ SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F },
{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
/* Data Fabric MCA types */
{ SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF },
{ SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0xF },
{ SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0x1F },
{ SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2), 0x3FFF },
/* Unified Memory Controller MCA type */
{ SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0x3F },
{ SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0xFF },
/* Parameter Block MCA type */
{ SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 },
/* Platform Security Processor MCA type */
{ SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 },
{ SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1), 0x3FFFF },
/* System Management Unit MCA type */
{ SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 },
{ SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1), 0x7FF },
/* Microprocessor 5 Unit MCA type */
{ SMCA_MP5, HWID_MCATYPE(0x01, 0x2), 0x3FF },
/* Northbridge IO Unit MCA type */
{ SMCA_NBIO, HWID_MCATYPE(0x18, 0x0), 0x1F },
/* PCI Express Unit MCA type */
{ SMCA_PCIE, HWID_MCATYPE(0x46, 0x0), 0x1F },
};
struct smca_bank smca_banks[MAX_NR_BANKS];
@@ -545,6 +563,40 @@ out:
return offset;
}
/*
* Turn off MC4_MISC thresholding banks on all family 0x15 models since
* they're not supported there.
*/
void disable_err_thresholding(struct cpuinfo_x86 *c)
{
int i;
u64 hwcr;
bool need_toggle;
u32 msrs[] = {
0x00000413, /* MC4_MISC0 */
0xc0000408, /* MC4_MISC1 */
};
if (c->x86 != 0x15)
return;
rdmsrl(MSR_K7_HWCR, hwcr);
/* McStatusWrEn has to be set */
need_toggle = !(hwcr & BIT(18));
if (need_toggle)
wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
/* Clear CntP bit safely */
for (i = 0; i < ARRAY_SIZE(msrs); i++)
msr_clear_bit(msrs[i], 62);
/* restore old settings */
if (need_toggle)
wrmsrl(MSR_K7_HWCR, hwcr);
}
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
@@ -552,6 +604,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
unsigned int bank, block, cpu = smp_processor_id();
int offset = -1;
disable_err_thresholding(c);
for (bank = 0; bank < mca_cfg.banks; ++bank) {
if (mce_flags.smca)
smca_configure(bank, cpu);

查看文件

@@ -64,11 +64,11 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
#define CPER_CREATOR_MCE \
UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
0x64, 0x90, 0xb8, 0x9d)
GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
0x64, 0x90, 0xb8, 0x9d)
#define CPER_SECTION_TYPE_MCE \
UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
0x04, 0x4a, 0x38, 0xfc)
GUID_INIT(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
0x04, 0x4a, 0x38, 0xfc)
/*
* CPER specification (in UEFI specification 2.3 appendix N) requires
@@ -135,7 +135,7 @@ retry:
goto out;
/* try to skip other type records in storage */
else if (rc != sizeof(rcd) ||
uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE))
!guid_equal(&rcd.hdr.creator_id, &CPER_CREATOR_MCE))
goto retry;
memcpy(m, &rcd.mce, sizeof(*m));
rc = sizeof(*m);

查看文件

@@ -1612,36 +1612,6 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
if (c->x86 == 0x15 && c->x86_model <= 0xf)
mce_flags.overflow_recov = 1;
/*
* Turn off MC4_MISC thresholding banks on those models since
* they're not supported there.
*/
if (c->x86 == 0x15 &&
(c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
int i;
u64 hwcr;
bool need_toggle;
u32 msrs[] = {
0x00000413, /* MC4_MISC0 */
0xc0000408, /* MC4_MISC1 */
};
rdmsrl(MSR_K7_HWCR, hwcr);
/* McStatusWrEn has to be set */
need_toggle = !(hwcr & BIT(18));
if (need_toggle)
wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
/* Clear CntP bit safely */
for (i = 0; i < ARRAY_SIZE(msrs); i++)
msr_clear_bit(msrs[i], 62);
/* restore old settings */
if (need_toggle)
wrmsrl(MSR_K7_HWCR, hwcr);
}
}
if (c->x86_vendor == X86_VENDOR_INTEL) {

查看文件

@@ -165,6 +165,11 @@ static struct severity {
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
KERNEL
),
MCESEV(
PANIC, "Instruction fetch error in kernel",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
KERNEL
),
#endif
MCESEV(
PANIC, "Action required: unknown MCACOD",