[IA64] Proper handling of TLB errors from duplicate itr.d dropins

Jack Steiner noticed that duplicate TLB DTC entries do not cause a
linux panic.  See discussion:

http://www.gelato.unsw.edu.au/archives/linux-ia64/0307/6108.html

The current TLB recovery code is recovering from the duplicate itr.d
dropins, masking the underlying problem.  This change modifies
the MCA recovery code to look for the TLB check signature of the
duplicate TLB entry and panic in that case.

Signed-off-by: Russ Anderson (rja@sgi.com)
Signed-off-by: Tony Luck <tony.luck@intel.com>
This commit is contained in:
Russ Anderson
2006-12-14 16:01:41 -06:00
committed by Tony Luck
parent 908e0a8a26
commit 618b206f0b
3 changed files with 36 additions and 6 deletions

View File

@@ -607,6 +607,33 @@ recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx,
return status;
}
/*
* recover_from_tlb_check
* @peidx: pointer of index of processor error section
*
* Return value:
* 1 on Success / 0 on Failure
*/
static int
recover_from_tlb_check(peidx_table_t *peidx)
{
sal_log_mod_error_info_t *smei;
pal_tlb_check_info_t *ptci;
smei = (sal_log_mod_error_info_t *)peidx_tlb_check(peidx, 0);
ptci = (pal_tlb_check_info_t *)&(smei->check_info);
/*
* Look for signature of a duplicate TLB DTC entry, which is
* a SW bug and always fatal.
*/
if (ptci->op == PAL_TLB_CHECK_OP_PURGE
&& !(ptci->itr || ptci->dtc || ptci->itc))
return fatal_mca("Duplicate TLB entry");
return mca_recovered("TLB check recovered");
}
/**
* recover_from_processor_error
* @platform: whether there are some platform error section or not
@@ -651,6 +678,12 @@ recover_from_processor_error(int platform, slidx_table_t *slidx,
if (psp->us || psp->ci == 0)
return fatal_mca("error not contained");
/*
* Look for recoverable TLB check
*/
if (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc))
return recover_from_tlb_check(peidx);
/*
* The cache check and bus check bits have four possible states
* cc bc