Files
android_kernel_xiaomi_sm8450/arch/powerpc/include/asm/hw_irq.h
Athira Rajeev ef798cd035 powerpc/perf: Fix PMU callbacks to clear pending PMI before resetting an overflown PMC
[ Upstream commit 2c9ac51b850d84ee496b0a5d832ce66d411ae552 ]

Running perf fuzzer showed below in dmesg logs:
  "Can't find PMC that caused IRQ"

This means a PMU exception happened, but none of the PMC's (Performance
Monitor Counter) were found to be overflown. There are some corner cases
that clears the PMCs after PMI gets masked. In such cases, the perf
interrupt handler will not find the active PMC values that had caused
the overflow and thus leads to this message while replaying.

Case 1: PMU Interrupt happens during replay of other interrupts and
counter values gets cleared by PMU callbacks before replay:

During replay of interrupts like timer, __do_irq() and doorbell
exception, we conditionally enable interrupts via may_hard_irq_enable().
This could potentially create a window to generate a PMI. Since irq soft
mask is set to ALL_DISABLED, the PMI will get masked here. We could get
IPIs run before perf interrupt is replayed and the PMU events could
be deleted or stopped. This will change the PMU SPR values and resets
the counters. Snippet of ftrace log showing PMU callbacks invoked in
__do_irq():

  <idle>-0 [051] dns. 132025441306354: __do_irq <-call_do_irq
  <idle>-0 [051] dns. 132025441306430: irq_enter <-__do_irq
  <idle>-0 [051] dns. 132025441306503: irq_enter_rcu <-__do_irq
  <idle>-0 [051] dnH. 132025441306599: xive_get_irq <-__do_irq
  <<>>
  <idle>-0 [051] dnH. 132025441307770: generic_smp_call_function_single_interrupt <-smp_ipi_demux_relaxed
  <idle>-0 [051] dnH. 132025441307839: flush_smp_call_function_queue <-smp_ipi_demux_relaxed
  <idle>-0 [051] dnH. 132025441308057: _raw_spin_lock <-event_function
  <idle>-0 [051] dnH. 132025441308206: power_pmu_disable <-perf_pmu_disable
  <idle>-0 [051] dnH. 132025441308337: power_pmu_del <-event_sched_out
  <idle>-0 [051] dnH. 132025441308407: power_pmu_read <-power_pmu_del
  <idle>-0 [051] dnH. 132025441308477: read_pmc <-power_pmu_read
  <idle>-0 [051] dnH. 132025441308590: isa207_disable_pmc <-power_pmu_del
  <idle>-0 [051] dnH. 132025441308663: write_pmc <-power_pmu_del
  <idle>-0 [051] dnH. 132025441308787: power_pmu_event_idx <-perf_event_update_userpage
  <idle>-0 [051] dnH. 132025441308859: rcu_read_unlock_strict <-perf_event_update_userpage
  <idle>-0 [051] dnH. 132025441308975: power_pmu_enable <-perf_pmu_enable
  <<>>
  <idle>-0 [051] dnH. 132025441311108: irq_exit <-__do_irq
  <idle>-0 [051] dns. 132025441311319: performance_monitor_exception <-replay_soft_interrupts

Case 2: PMI's masked during local_* operations, example local_add(). If
the local_add() operation happens within a local_irq_save(), replay of
PMI will be during local_irq_restore(). Similar to case 1, this could
also create a window before replay where PMU events gets deleted or
stopped.

Fix it by updating the PMU callback function power_pmu_disable() to
check for pending perf interrupt. If there is an overflown PMC and
pending perf interrupt indicated in paca, clear the PMI bit in paca to
drop that sample. Clearing of PMI bit is done in power_pmu_disable()
since disable is invoked before any event gets deleted/stopped. With
this fix, if there are more than one event running in the PMU, there is
a chance that we clear the PMI bit for the event which is not getting
deleted/stopped. The other events may still remain active. Hence to make
sure we don't drop valid sample in such cases, another check is added in
power_pmu_enable. This checks if there is an overflown PMC found among
the active events and if so enable back the PMI bit. Two new helper
functions are introduced to clear/set the PMI, ie
clear_pmi_irq_pending() and set_pmi_irq_pending(). Helper function
pmi_irq_pending() is introduced to give a warning if there is pending
PMI bit in paca, but no PMC is overflown.

Also there are corner cases which result in performance monitor
interrupts being triggered during power_pmu_disable(). This happens
since PMXE bit is not cleared along with disabling of other MMCR0 bits
in the pmu_disable. Such PMI's could leave the PMU running and could
trigger PMI again which will set MMCR0 PMAO bit. This could lead to
spurious interrupts in some corner cases. Example, a timer after
power_pmu_del() which will re-enable interrupts and triggers a PMI again
since PMAO bit is still set. But fails to find valid overflow since PMC
was cleared in power_pmu_del(). Fix that by disabling PMXE along with
disabling of other MMCR0 bits in power_pmu_disable().

We can't just replay PMI any time. Hence this approach is preferred
rather than replaying PMI before resetting overflown PMC. Patch also
documents core-book3s on a race condition which can trigger these PMC
messages during idle path in PowerNV.

Fixes: f442d00480 ("powerpc/64s: Add support to mask perf interrupts and replay them")
Reported-by: Nageswara R Sastry <nasastry@in.ibm.com>
Suggested-by: Nicholas Piggin <npiggin@gmail.com>
Suggested-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Tested-by: Nageswara R Sastry <rnsastry@linux.ibm.com>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Make pmi_irq_pending() return bool, reflow/reword some comments]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1626846509-1350-2-git-send-email-atrajeev@linux.vnet.ibm.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
2022-01-27 10:54:05 +01:00

421 lines
10 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
*/
#ifndef _ASM_POWERPC_HW_IRQ_H
#define _ASM_POWERPC_HW_IRQ_H
#ifdef __KERNEL__
#include <linux/errno.h>
#include <linux/compiler.h>
#include <asm/ptrace.h>
#include <asm/processor.h>
#ifdef CONFIG_PPC64
/*
* PACA flags in paca->irq_happened.
*
* This bits are set when interrupts occur while soft-disabled
* and allow a proper replay. Additionally, PACA_IRQ_HARD_DIS
* is set whenever we manually hard disable.
*/
#define PACA_IRQ_HARD_DIS 0x01
#define PACA_IRQ_DBELL 0x02
#define PACA_IRQ_EE 0x04
#define PACA_IRQ_DEC 0x08 /* Or FIT */
#define PACA_IRQ_HMI 0x10
#define PACA_IRQ_PMI 0x20
/*
* Some soft-masked interrupts must be hard masked until they are replayed
* (e.g., because the soft-masked handler does not clear the exception).
*/
#ifdef CONFIG_PPC_BOOK3S
#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE|PACA_IRQ_PMI)
#else
#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE)
#endif
#endif /* CONFIG_PPC64 */
/*
* flags for paca->irq_soft_mask
*/
#define IRQS_ENABLED 0
#define IRQS_DISABLED 1 /* local_irq_disable() interrupts */
#define IRQS_PMI_DISABLED 2
#define IRQS_ALL_DISABLED (IRQS_DISABLED | IRQS_PMI_DISABLED)
#ifndef __ASSEMBLY__
extern void replay_system_reset(void);
extern void replay_soft_interrupts(void);
extern void timer_interrupt(struct pt_regs *);
extern void timer_broadcast_interrupt(void);
extern void performance_monitor_exception(struct pt_regs *regs);
extern void WatchdogException(struct pt_regs *regs);
extern void unknown_exception(struct pt_regs *regs);
#ifdef CONFIG_PPC64
#include <asm/paca.h>
static inline notrace unsigned long irq_soft_mask_return(void)
{
unsigned long flags;
asm volatile(
"lbz %0,%1(13)"
: "=r" (flags)
: "i" (offsetof(struct paca_struct, irq_soft_mask)));
return flags;
}
/*
* The "memory" clobber acts as both a compiler barrier
* for the critical section and as a clobber because
* we changed paca->irq_soft_mask
*/
static inline notrace void irq_soft_mask_set(unsigned long mask)
{
#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
/*
* The irq mask must always include the STD bit if any are set.
*
* and interrupts don't get replayed until the standard
* interrupt (local_irq_disable()) is unmasked.
*
* Other masks must only provide additional masking beyond
* the standard, and they are also not replayed until the
* standard interrupt becomes unmasked.
*
* This could be changed, but it will require partial
* unmasks to be replayed, among other things. For now, take
* the simple approach.
*/
WARN_ON(mask && !(mask & IRQS_DISABLED));
#endif
asm volatile(
"stb %0,%1(13)"
:
: "r" (mask),
"i" (offsetof(struct paca_struct, irq_soft_mask))
: "memory");
}
static inline notrace unsigned long irq_soft_mask_set_return(unsigned long mask)
{
unsigned long flags;
#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
WARN_ON(mask && !(mask & IRQS_DISABLED));
#endif
asm volatile(
"lbz %0,%1(13); stb %2,%1(13)"
: "=&r" (flags)
: "i" (offsetof(struct paca_struct, irq_soft_mask)),
"r" (mask)
: "memory");
return flags;
}
static inline notrace unsigned long irq_soft_mask_or_return(unsigned long mask)
{
unsigned long flags, tmp;
asm volatile(
"lbz %0,%2(13); or %1,%0,%3; stb %1,%2(13)"
: "=&r" (flags), "=r" (tmp)
: "i" (offsetof(struct paca_struct, irq_soft_mask)),
"r" (mask)
: "memory");
#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
WARN_ON((mask | flags) && !((mask | flags) & IRQS_DISABLED));
#endif
return flags;
}
static inline unsigned long arch_local_save_flags(void)
{
return irq_soft_mask_return();
}
static inline void arch_local_irq_disable(void)
{
irq_soft_mask_set(IRQS_DISABLED);
}
extern void arch_local_irq_restore(unsigned long);
static inline void arch_local_irq_enable(void)
{
arch_local_irq_restore(IRQS_ENABLED);
}
static inline unsigned long arch_local_irq_save(void)
{
return irq_soft_mask_set_return(IRQS_DISABLED);
}
static inline bool arch_irqs_disabled_flags(unsigned long flags)
{
return flags & IRQS_DISABLED;
}
static inline bool arch_irqs_disabled(void)
{
return arch_irqs_disabled_flags(arch_local_save_flags());
}
static inline void set_pmi_irq_pending(void)
{
/*
* Invoked from PMU callback functions to set PMI bit in the paca.
* This has to be called with irq's disabled (via hard_irq_disable()).
*/
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
WARN_ON_ONCE(mfmsr() & MSR_EE);
get_paca()->irq_happened |= PACA_IRQ_PMI;
}
static inline void clear_pmi_irq_pending(void)
{
/*
* Invoked from PMU callback functions to clear the pending PMI bit
* in the paca.
*/
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
WARN_ON_ONCE(mfmsr() & MSR_EE);
get_paca()->irq_happened &= ~PACA_IRQ_PMI;
}
static inline bool pmi_irq_pending(void)
{
/*
* Invoked from PMU callback functions to check if there is a pending
* PMI bit in the paca.
*/
if (get_paca()->irq_happened & PACA_IRQ_PMI)
return true;
return false;
}
#ifdef CONFIG_PPC_BOOK3S
/*
* To support disabling and enabling of irq with PMI, set of
* new powerpc_local_irq_pmu_save() and powerpc_local_irq_restore()
* functions are added. These macros are implemented using generic
* linux local_irq_* code from include/linux/irqflags.h.
*/
#define raw_local_irq_pmu_save(flags) \
do { \
typecheck(unsigned long, flags); \
flags = irq_soft_mask_or_return(IRQS_DISABLED | \
IRQS_PMI_DISABLED); \
} while(0)
#define raw_local_irq_pmu_restore(flags) \
do { \
typecheck(unsigned long, flags); \
arch_local_irq_restore(flags); \
} while(0)
#ifdef CONFIG_TRACE_IRQFLAGS
#define powerpc_local_irq_pmu_save(flags) \
do { \
raw_local_irq_pmu_save(flags); \
if (!raw_irqs_disabled_flags(flags)) \
trace_hardirqs_off(); \
} while(0)
#define powerpc_local_irq_pmu_restore(flags) \
do { \
if (!raw_irqs_disabled_flags(flags)) \
trace_hardirqs_on(); \
raw_local_irq_pmu_restore(flags); \
} while(0)
#else
#define powerpc_local_irq_pmu_save(flags) \
do { \
raw_local_irq_pmu_save(flags); \
} while(0)
#define powerpc_local_irq_pmu_restore(flags) \
do { \
raw_local_irq_pmu_restore(flags); \
} while (0)
#endif /* CONFIG_TRACE_IRQFLAGS */
#endif /* CONFIG_PPC_BOOK3S */
#ifdef CONFIG_PPC_BOOK3E
#define __hard_irq_enable() wrtee(MSR_EE)
#define __hard_irq_disable() wrtee(0)
#define __hard_EE_RI_disable() wrtee(0)
#define __hard_RI_enable() do { } while (0)
#else
#define __hard_irq_enable() __mtmsrd(MSR_EE|MSR_RI, 1)
#define __hard_irq_disable() __mtmsrd(MSR_RI, 1)
#define __hard_EE_RI_disable() __mtmsrd(0, 1)
#define __hard_RI_enable() __mtmsrd(MSR_RI, 1)
#endif
#define hard_irq_disable() do { \
unsigned long flags; \
__hard_irq_disable(); \
flags = irq_soft_mask_set_return(IRQS_ALL_DISABLED); \
local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \
if (!arch_irqs_disabled_flags(flags)) { \
asm ("stdx %%r1, 0, %1 ;" \
: "=m" (local_paca->saved_r1) \
: "b" (&local_paca->saved_r1)); \
trace_hardirqs_off(); \
} \
} while(0)
static inline bool __lazy_irq_pending(u8 irq_happened)
{
return !!(irq_happened & ~PACA_IRQ_HARD_DIS);
}
/*
* Check if a lazy IRQ is pending. Should be called with IRQs hard disabled.
*/
static inline bool lazy_irq_pending(void)
{
return __lazy_irq_pending(get_paca()->irq_happened);
}
/*
* Check if a lazy IRQ is pending, with no debugging checks.
* Should be called with IRQs hard disabled.
* For use in RI disabled code or other constrained situations.
*/
static inline bool lazy_irq_pending_nocheck(void)
{
return __lazy_irq_pending(local_paca->irq_happened);
}
/*
* This is called by asynchronous interrupts to conditionally
* re-enable hard interrupts after having cleared the source
* of the interrupt. They are kept disabled if there is a different
* soft-masked interrupt pending that requires hard masking.
*/
static inline void may_hard_irq_enable(void)
{
if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK)) {
get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
__hard_irq_enable();
}
}
static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
{
return (regs->softe & IRQS_DISABLED);
}
extern bool prep_irq_for_idle(void);
extern bool prep_irq_for_idle_irqsoff(void);
extern void irq_set_pending_from_srr1(unsigned long srr1);
#define fini_irq_for_idle_irqsoff() trace_hardirqs_off();
extern void force_external_irq_replay(void);
static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val)
{
regs->softe = val;
}
#else /* CONFIG_PPC64 */
static inline unsigned long arch_local_save_flags(void)
{
return mfmsr();
}
static inline void arch_local_irq_restore(unsigned long flags)
{
if (IS_ENABLED(CONFIG_BOOKE))
wrtee(flags);
else
mtmsr(flags);
}
static inline unsigned long arch_local_irq_save(void)
{
unsigned long flags = arch_local_save_flags();
if (IS_ENABLED(CONFIG_BOOKE))
wrtee(0);
else if (IS_ENABLED(CONFIG_PPC_8xx))
wrtspr(SPRN_EID);
else
mtmsr(flags & ~MSR_EE);
return flags;
}
static inline void arch_local_irq_disable(void)
{
if (IS_ENABLED(CONFIG_BOOKE))
wrtee(0);
else if (IS_ENABLED(CONFIG_PPC_8xx))
wrtspr(SPRN_EID);
else
mtmsr(mfmsr() & ~MSR_EE);
}
static inline void arch_local_irq_enable(void)
{
if (IS_ENABLED(CONFIG_BOOKE))
wrtee(MSR_EE);
else if (IS_ENABLED(CONFIG_PPC_8xx))
wrtspr(SPRN_EIE);
else
mtmsr(mfmsr() | MSR_EE);
}
static inline bool arch_irqs_disabled_flags(unsigned long flags)
{
return (flags & MSR_EE) == 0;
}
static inline bool arch_irqs_disabled(void)
{
return arch_irqs_disabled_flags(arch_local_save_flags());
}
#define hard_irq_disable() arch_local_irq_disable()
static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
{
return !(regs->msr & MSR_EE);
}
static inline void may_hard_irq_enable(void) { }
static inline void clear_pmi_irq_pending(void) { }
static inline void set_pmi_irq_pending(void) { }
static inline bool pmi_irq_pending(void) { return false; }
static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val)
{
}
#endif /* CONFIG_PPC64 */
#define ARCH_IRQ_INIT_FLAGS IRQ_NOREQUEST
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HW_IRQ_H */