Merge branch 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Ingo Molnar:
 "The main changes in this cycle are:

   - RAS tracing/events infrastructure, by Gong Chen.

   - Various generalizations of the APEI code to make it available to
     non-x86 architectures, by Tomasz Nowicki"

* 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/ras: Fix build warnings in <linux/aer.h>
  acpi, apei, ghes: Factor out ioremap virtual memory for IRQ and NMI context.
  acpi, apei, ghes: Make NMI error notification to be GHES architecture extension.
  apei, mce: Factor out APEI architecture specific MCE calls.
  RAS, extlog: Adjust init flow
  trace, eMCA: Add a knob to adjust where to save event log
  trace, RAS: Add eMCA trace event interface
  RAS, debugfs: Add debugfs interface for RAS subsystem
  CPER: Adjust code flow of some functions
  x86, MCE: Robustify mcheck_init_device
  trace, AER: Move trace into unified interface
  trace, RAS: Add basic RAS trace event
  x86, MCE: Kill CPU_POST_DEAD
This commit is contained in:
Linus Torvalds
2014-08-04 17:21:59 -07:00
28 changed files with 656 additions and 250 deletions

View File

@@ -34,6 +34,9 @@
#include <linux/aer.h>
#define INDENT_SP " "
static char rcd_decode_str[CPER_REC_LEN];
/*
* CPER record ID need to be unique even after reboot, because record
* ID is used as index for ERST storage, while CPER records from
@@ -50,18 +53,19 @@ u64 cper_next_record_id(void)
}
EXPORT_SYMBOL_GPL(cper_next_record_id);
static const char *cper_severity_strs[] = {
static const char * const severity_strs[] = {
"recoverable",
"fatal",
"corrected",
"info",
};
static const char *cper_severity_str(unsigned int severity)
const char *cper_severity_str(unsigned int severity)
{
return severity < ARRAY_SIZE(cper_severity_strs) ?
cper_severity_strs[severity] : "unknown";
return severity < ARRAY_SIZE(severity_strs) ?
severity_strs[severity] : "unknown";
}
EXPORT_SYMBOL_GPL(cper_severity_str);
/*
* cper_print_bits - print strings for set bits
@@ -100,32 +104,32 @@ void cper_print_bits(const char *pfx, unsigned int bits,
printk("%s\n", buf);
}
static const char * const cper_proc_type_strs[] = {
static const char * const proc_type_strs[] = {
"IA32/X64",
"IA64",
};
static const char * const cper_proc_isa_strs[] = {
static const char * const proc_isa_strs[] = {
"IA32",
"IA64",
"X64",
};
static const char * const cper_proc_error_type_strs[] = {
static const char * const proc_error_type_strs[] = {
"cache error",
"TLB error",
"bus error",
"micro-architectural error",
};
static const char * const cper_proc_op_strs[] = {
static const char * const proc_op_strs[] = {
"unknown or generic",
"data read",
"data write",
"instruction execution",
};
static const char * const cper_proc_flag_strs[] = {
static const char * const proc_flag_strs[] = {
"restartable",
"precise IP",
"overflow",
@@ -137,26 +141,26 @@ static void cper_print_proc_generic(const char *pfx,
{
if (proc->validation_bits & CPER_PROC_VALID_TYPE)
printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ?
cper_proc_type_strs[proc->proc_type] : "unknown");
proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
proc_type_strs[proc->proc_type] : "unknown");
if (proc->validation_bits & CPER_PROC_VALID_ISA)
printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ?
cper_proc_isa_strs[proc->proc_isa] : "unknown");
proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
proc_isa_strs[proc->proc_isa] : "unknown");
if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
cper_print_bits(pfx, proc->proc_error_type,
cper_proc_error_type_strs,
ARRAY_SIZE(cper_proc_error_type_strs));
proc_error_type_strs,
ARRAY_SIZE(proc_error_type_strs));
}
if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
printk("%s""operation: %d, %s\n", pfx, proc->operation,
proc->operation < ARRAY_SIZE(cper_proc_op_strs) ?
cper_proc_op_strs[proc->operation] : "unknown");
proc->operation < ARRAY_SIZE(proc_op_strs) ?
proc_op_strs[proc->operation] : "unknown");
if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
printk("%s""flags: 0x%02x\n", pfx, proc->flags);
cper_print_bits(pfx, proc->flags, cper_proc_flag_strs,
ARRAY_SIZE(cper_proc_flag_strs));
cper_print_bits(pfx, proc->flags, proc_flag_strs,
ARRAY_SIZE(proc_flag_strs));
}
if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
printk("%s""level: %d\n", pfx, proc->level);
@@ -177,7 +181,7 @@ static void cper_print_proc_generic(const char *pfx,
printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
}
static const char *cper_mem_err_type_strs[] = {
static const char * const mem_err_type_strs[] = {
"unknown",
"no error",
"single-bit ECC",
@@ -196,8 +200,115 @@ static const char *cper_mem_err_type_strs[] = {
"physical memory map-out event",
};
const char *cper_mem_err_type_str(unsigned int etype)
{
return etype < ARRAY_SIZE(mem_err_type_strs) ?
mem_err_type_strs[etype] : "unknown";
}
EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
{
u32 len, n;
if (!msg)
return 0;
n = 0;
len = CPER_REC_LEN - 1;
if (mem->validation_bits & CPER_MEM_VALID_NODE)
n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
if (mem->validation_bits & CPER_MEM_VALID_CARD)
n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
if (mem->validation_bits & CPER_MEM_VALID_MODULE)
n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
if (mem->validation_bits & CPER_MEM_VALID_BANK)
n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
if (mem->validation_bits & CPER_MEM_VALID_ROW)
n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
n += scnprintf(msg + n, len - n, "bit_position: %d ",
mem->bit_pos);
if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
mem->requestor_id);
if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
mem->responder_id);
if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
mem->target_id);
msg[n] = '\0';
return n;
}
static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
{
u32 len, n;
const char *bank = NULL, *device = NULL;
if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
return 0;
n = 0;
len = CPER_REC_LEN - 1;
dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
if (bank && device)
n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
else
n = snprintf(msg, len,
"DIMM location: not present. DMI handle: 0x%.4x ",
mem->mem_dev_handle);
msg[n] = '\0';
return n;
}
void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
struct cper_mem_err_compact *cmem)
{
cmem->validation_bits = mem->validation_bits;
cmem->node = mem->node;
cmem->card = mem->card;
cmem->module = mem->module;
cmem->bank = mem->bank;
cmem->device = mem->device;
cmem->row = mem->row;
cmem->column = mem->column;
cmem->bit_pos = mem->bit_pos;
cmem->requestor_id = mem->requestor_id;
cmem->responder_id = mem->responder_id;
cmem->target_id = mem->target_id;
cmem->rank = mem->rank;
cmem->mem_array_handle = mem->mem_array_handle;
cmem->mem_dev_handle = mem->mem_dev_handle;
}
const char *cper_mem_err_unpack(struct trace_seq *p,
struct cper_mem_err_compact *cmem)
{
const char *ret = p->buffer + p->len;
if (cper_mem_err_location(cmem, rcd_decode_str))
trace_seq_printf(p, "%s", rcd_decode_str);
if (cper_dimm_err_location(cmem, rcd_decode_str))
trace_seq_printf(p, "%s", rcd_decode_str);
trace_seq_putc(p, '\0');
return ret;
}
static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
{
struct cper_mem_err_compact cmem;
if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
if (mem->validation_bits & CPER_MEM_VALID_PA)
@@ -206,48 +317,19 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
printk("%s""physical_address_mask: 0x%016llx\n",
pfx, mem->physical_addr_mask);
if (mem->validation_bits & CPER_MEM_VALID_NODE)
pr_debug("node: %d\n", mem->node);
if (mem->validation_bits & CPER_MEM_VALID_CARD)
pr_debug("card: %d\n", mem->card);
if (mem->validation_bits & CPER_MEM_VALID_MODULE)
pr_debug("module: %d\n", mem->module);
if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
pr_debug("rank: %d\n", mem->rank);
if (mem->validation_bits & CPER_MEM_VALID_BANK)
pr_debug("bank: %d\n", mem->bank);
if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
pr_debug("device: %d\n", mem->device);
if (mem->validation_bits & CPER_MEM_VALID_ROW)
pr_debug("row: %d\n", mem->row);
if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
pr_debug("column: %d\n", mem->column);
if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
pr_debug("bit_position: %d\n", mem->bit_pos);
if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
pr_debug("requestor_id: 0x%016llx\n", mem->requestor_id);
if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
pr_debug("responder_id: 0x%016llx\n", mem->responder_id);
if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
pr_debug("target_id: 0x%016llx\n", mem->target_id);
cper_mem_err_pack(mem, &cmem);
if (cper_mem_err_location(&cmem, rcd_decode_str))
printk("%s%s\n", pfx, rcd_decode_str);
if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
u8 etype = mem->error_type;
printk("%s""error_type: %d, %s\n", pfx, etype,
etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
cper_mem_err_type_strs[etype] : "unknown");
}
if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
const char *bank = NULL, *device = NULL;
dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
if (bank != NULL && device != NULL)
printk("%s""DIMM location: %s %s", pfx, bank, device);
else
printk("%s""DIMM DMI handle: 0x%.4x",
pfx, mem->mem_dev_handle);
cper_mem_err_type_str(etype));
}
if (cper_dimm_err_location(&cmem, rcd_decode_str))
printk("%s%s\n", pfx, rcd_decode_str);
}
static const char *cper_pcie_port_type_strs[] = {
static const char * const pcie_port_type_strs[] = {
"PCIe end point",
"legacy PCI end point",
"unknown",
@@ -266,8 +348,8 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
{
if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
cper_pcie_port_type_strs[pcie->port_type] : "unknown");
pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
pcie_port_type_strs[pcie->port_type] : "unknown");
if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
printk("%s""version: %d.%d\n", pfx,
pcie->version.major, pcie->version.minor);