powerpc/powernv: Invoke opal_cec_reboot2() on unrecoverable machine check errors.
On non-recoverable MCE errors in kernel space, Linux kernel panics and system reboots. On BMC based system opal-prd runs as a daemon in the host. Hence, kernel crash may prevent opal-prd to detect and analyze this MCE error. This may land us in a situation where the faulty memory never gets de-configured and Linux would keep hitting same MCE error again and again. If this happens in early stage of kernel initialization, then Linux will keep crashing and rebooting in a loop. This patch fixes this issue by invoking new opal_cec_reboot2() call with reboot type OPAL_REBOOT_PLATFORM_ERROR to inform BMC/OCC about this error, so that BMC can collect relevant data for error analysis and decide what component to de-configure before rebooting. This patch is dependent on OPAL patchset posted on skiboot mailing list at https://lists.ozlabs.org/pipermail/skiboot/2015-July/001771.html that introduces opal_cec_reboot2() opal call. Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:

committed by
Michael Ellerman

parent
1852ae276b
commit
e784b6499d
@@ -154,7 +154,8 @@
|
||||
#define OPAL_FLASH_WRITE 111
|
||||
#define OPAL_FLASH_ERASE 112
|
||||
#define OPAL_PRD_MSG 113
|
||||
#define OPAL_LAST 113
|
||||
#define OPAL_CEC_REBOOT2 116
|
||||
#define OPAL_LAST 116
|
||||
|
||||
/* Device tree flags */
|
||||
|
||||
@@ -857,6 +858,12 @@ enum OpalSysCooling {
|
||||
OPAL_SYSCOOL_INSF = 0x0001, /* System insufficient cooling */
|
||||
};
|
||||
|
||||
/* Argument to OPAL_CEC_REBOOT2() */
|
||||
enum {
|
||||
OPAL_REBOOT_NORMAL = 0,
|
||||
OPAL_REBOOT_PLATFORM_ERROR = 1,
|
||||
};
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* __OPAL_API_H */
|
||||
|
@@ -44,6 +44,7 @@ int64_t opal_tpo_write(uint64_t token, uint32_t year_mon_day,
|
||||
uint32_t hour_min);
|
||||
int64_t opal_cec_power_down(uint64_t request);
|
||||
int64_t opal_cec_reboot(void);
|
||||
int64_t opal_cec_reboot2(uint32_t reboot_type, char *diag);
|
||||
int64_t opal_read_nvram(uint64_t buffer, uint64_t size, uint64_t offset);
|
||||
int64_t opal_write_nvram(uint64_t buffer, uint64_t size, uint64_t offset);
|
||||
int64_t opal_handle_interrupt(uint64_t isn, __be64 *outstanding_event_mask);
|
||||
|
Reference in New Issue
Block a user