powerpc/pseries: Dump the SLB contents on SLB MCE errors.

If we get a machine check exceptions due to SLB errors then dump the
current SLB contents which will be very much helpful in debugging the
root cause of SLB errors. Introduce an exclusive buffer per cpu to hold
faulty SLB entries. In real mode mce handler saves the old SLB contents
into this buffer accessible through paca and print it out later in virtual
mode.

With this patch the console will log SLB contents like below on SLB MCE
errors:

[  507.297236] SLB contents of cpu 0x1
[  507.297237] Last SLB entry inserted at slot 16
[  507.297238] 00 c000000008000000 400ea1b217000500
[  507.297239]   1T  ESID=   c00000  VSID=      ea1b217 LLP:100
[  507.297240] 01 d000000008000000 400d43642f000510
[  507.297242]   1T  ESID=   d00000  VSID=      d43642f LLP:110
[  507.297243] 11 f000000008000000 400a86c85f000500
[  507.297244]   1T  ESID=   f00000  VSID=      a86c85f LLP:100
[  507.297245] 12 00007f0008000000 4008119624000d90
[  507.297246]   1T  ESID=       7f  VSID=      8119624 LLP:110
[  507.297247] 13 0000000018000000 00092885f5150d90
[  507.297247]  256M ESID=        1  VSID=   92885f5150 LLP:110
[  507.297248] 14 0000010008000000 4009e7cb50000d90
[  507.297249]   1T  ESID=        1  VSID=      9e7cb50 LLP:110
[  507.297250] 15 d000000008000000 400d43642f000510
[  507.297251]   1T  ESID=   d00000  VSID=      d43642f LLP:110
[  507.297252] 16 d000000008000000 400d43642f000510
[  507.297253]   1T  ESID=   d00000  VSID=      d43642f LLP:110
[  507.297253] ----------------------------------
[  507.297254] SLB cache ptr value = 3
[  507.297254] Valid SLB cache entries:
[  507.297255] 00 EA[0-35]=    7f000
[  507.297256] 01 EA[0-35]=        1
[  507.297257] 02 EA[0-35]=     1000
[  507.297257] Rest of SLB cache entries:
[  507.297258] 03 EA[0-35]=    7f000
[  507.297258] 04 EA[0-35]=        1
[  507.297259] 05 EA[0-35]=     1000
[  507.297260] 06 EA[0-35]=       12
[  507.297260] 07 EA[0-35]=    7f000

Suggested-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:
Mahesh Salgaonkar
2018-09-11 19:57:15 +05:30
committed by Michael Ellerman
parent 8f0b80561f
commit c6d15258cd
5 changed files with 112 additions and 1 deletions

View File

@@ -612,6 +612,12 @@ static void pseries_print_mce_info(struct pt_regs *regs,
break;
}
#ifdef CONFIG_PPC_BOOK3S_64
/* Display faulty slb contents for SLB errors. */
if (error_type == MC_ERROR_TYPE_SLB)
slb_dump_contents(local_paca->mce_faulty_slbs);
#endif
printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
disposition == RTAS_DISP_FULLY_RECOVERED ?
"Recovered" : "Not recovered");
@@ -675,7 +681,16 @@ static int mce_handle_error(struct rtas_error_log *errp)
switch (error_type) {
case MC_ERROR_TYPE_SLB:
case MC_ERROR_TYPE_ERAT:
/* Store the old slb content someplace. */
/*
* Store the old slb content in paca before flushing.
* Print this when we go to virtual mode.
* There are chances that we may hit MCE again if there
* is a parity error on the SLB entry we trying to read
* for saving. Hence limit the slb saving to single
* level of recursion.
*/
if (local_paca->in_mce == 1)
slb_save_contents(local_paca->mce_faulty_slbs);
flush_and_reload_slb();
disposition = RTAS_DISP_FULLY_RECOVERED;
rtas_set_disposition_recovered(errp);

View File

@@ -107,6 +107,10 @@ static void __init fwnmi_init(void)
u8 *mce_data_buf;
unsigned int i;
int nr_cpus = num_possible_cpus();
#ifdef CONFIG_PPC_BOOK3S_64
struct slb_entry *slb_ptr;
size_t size;
#endif
int ibm_nmi_register = rtas_token("ibm,nmi-register");
if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE)
@@ -132,6 +136,15 @@ static void __init fwnmi_init(void)
paca_ptrs[i]->mce_data_buf = mce_data_buf +
(RTAS_ERROR_LOG_MAX * i);
}
#ifdef CONFIG_PPC_BOOK3S_64
/* Allocate per cpu slb area to save old slb contents during MCE */
size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus;
slb_ptr = __va(memblock_alloc_base(size, sizeof(struct slb_entry),
ppc64_rma_size));
for_each_possible_cpu(i)
paca_ptrs[i]->mce_faulty_slbs = slb_ptr + (mmu_slb_size * i);
#endif
}
static void pseries_8259_cascade(struct irq_desc *desc)