powerpc/perf_event: Fix oops due to perf_event_do_pending call

Anton Blanchard found that large POWER systems would occasionally
crash in the exception exit path when profiling with perf_events.
The symptom was that an interrupt would occur late in the exit path
when the MSR[RI] (recoverable interrupt) bit was clear.  Interrupts
should be hard-disabled at this point but they were enabled.  Because
the interrupt was not recoverable the system panicked.

The reason is that the exception exit path was calling
perf_event_do_pending after hard-disabling interrupts, and
perf_event_do_pending will re-enable interrupts.

The simplest and cleanest fix for this is to use the same mechanism
that 32-bit powerpc does, namely to cause a self-IPI by setting the
decrementer to 1.  This means we can remove the tests in the exception
exit path and raw_local_irq_restore.

This also makes sure that the call to perf_event_do_pending from
timer_interrupt() happens within irq_enter/irq_exit.  (Note that
calling perf_event_do_pending from timer_interrupt does not mean that
there is a possible 1/HZ latency; setting the decrementer to 1 ensures
that the timer interrupt will happen immediately, i.e. within one
timebase tick, which is a few nanoseconds or 10s of nanoseconds.)

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: stable@kernel.org
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
This commit is contained in:
Paul Mackerras
2010-04-13 20:46:04 +00:00
committed by Benjamin Herrenschmidt
parent cea0d767c2
commit 0fe1ac48be
5 changed files with 48 additions and 66 deletions

View File

@@ -532,25 +532,60 @@ void __init iSeries_time_init_early(void)
}
#endif /* CONFIG_PPC_ISERIES */
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32)
DEFINE_PER_CPU(u8, perf_event_pending);
#ifdef CONFIG_PERF_EVENTS
void set_perf_event_pending(void)
/*
* 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
*/
#ifdef CONFIG_PPC64
static inline unsigned long test_perf_event_pending(void)
{
get_cpu_var(perf_event_pending) = 1;
set_dec(1);
put_cpu_var(perf_event_pending);
unsigned long x;
asm volatile("lbz %0,%1(13)"
: "=r" (x)
: "i" (offsetof(struct paca_struct, perf_event_pending)));
return x;
}
static inline void set_perf_event_pending_flag(void)
{
asm volatile("stb %0,%1(13)" : :
"r" (1),
"i" (offsetof(struct paca_struct, perf_event_pending)));
}
static inline void clear_perf_event_pending(void)
{
asm volatile("stb %0,%1(13)" : :
"r" (0),
"i" (offsetof(struct paca_struct, perf_event_pending)));
}
#else /* 32-bit */
DEFINE_PER_CPU(u8, perf_event_pending);
#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1
#define test_perf_event_pending() __get_cpu_var(perf_event_pending)
#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0
#else /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
#endif /* 32 vs 64 bit */
void set_perf_event_pending(void)
{
preempt_disable();
set_perf_event_pending_flag();
set_dec(1);
preempt_enable();
}
#else /* CONFIG_PERF_EVENTS */
#define test_perf_event_pending() 0
#define clear_perf_event_pending()
#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
#endif /* CONFIG_PERF_EVENTS */
/*
* For iSeries shared processors, we have to let the hypervisor
@@ -582,10 +617,6 @@ void timer_interrupt(struct pt_regs * regs)
set_dec(DECREMENTER_MAX);
#ifdef CONFIG_PPC32
if (test_perf_event_pending()) {
clear_perf_event_pending();
perf_event_do_pending();
}
if (atomic_read(&ppc_n_lost_interrupts) != 0)
do_IRQ(regs);
#endif
@@ -604,6 +635,11 @@ void timer_interrupt(struct pt_regs * regs)
calculate_steal_time();
if (test_perf_event_pending()) {
clear_perf_event_pending();
perf_event_do_pending();
}
#ifdef CONFIG_PPC_ISERIES
if (firmware_has_feature(FW_FEATURE_ISERIES))
get_lppaca()->int_dword.fields.decr_int = 0;