tracing: rework sched_preempt_disable trace point implementation

The current implementation of sched_preempt_disable trace point
fails to detect the preemption disable time inside spin_lock_bh()
and spin_unlock_bh(). This is because __local_bh_disable_ip() calls
directly __preempt_count_add() which skips the preemption disable
tracking. Instead of relying on the updates to preempt count, it
is better to write the preemption disable tracking directly to
preemptsoff tracer. This is similar to how irq disable tracking
is done.

The current code handles the false positives coming from __schedule()
by directly resetting the time stamp. This requires an interface
from the scheduler to preemptsoff tracer. To avoid this additional
interface, the current patch detects the same condition by comparing
the task pid and context switch count. If they are not matching
at the time of preemption disable to enable, don't track the preemption
disable time as it involved a context switch.

Due to this rework. the sched_preempt_disable trace point location is
changed to

/sys/kernel/debug/tracing/events/preemptirq/sched_preempt_disable/enable

Change-Id: I7f58d316b7c54bc7a54102bfeb678404bda010d4
Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
[satyap@codeaurora.org: port to 5.4 and resolve trivial merge conflicts]
Signed-off-by: Satya Durga Srinivasu Prabhala <satyap@codeaurora.org>
This commit is contained in:
Pavankumar Kondeti
2020-01-13 10:46:41 +05:30
committed by Satya Durga Srinivasu Prabhala
parent 617f2e7fe2
commit 9db84311e7
6 changed files with 99 additions and 90 deletions

View File

@@ -619,7 +619,7 @@ struct irqsoff_store {
unsigned long caddr[4];
};
DEFINE_PER_CPU(struct irqsoff_store, the_irqsoff);
static DEFINE_PER_CPU(struct irqsoff_store, the_irqsoff);
#endif /* CONFIG_PREEMPTIRQ_EVENTS */
/*
@@ -704,9 +704,57 @@ static struct tracer irqsoff_tracer __read_mostly =
#endif /* CONFIG_IRQSOFF_TRACER */
#ifdef CONFIG_PREEMPT_TRACER
#ifdef CONFIG_PREEMPTIRQ_EVENTS
/*
* preemptoff stack tracing threshold in ns.
* default: 1ms
*/
unsigned int sysctl_preemptoff_tracing_threshold_ns = 1000000UL;
struct preempt_store {
u64 ts;
unsigned long caddr[4];
bool irqs_disabled;
int pid;
unsigned long ncsw;
};
static DEFINE_PER_CPU(struct preempt_store, the_ps);
#endif /* CONFIG_PREEMPTIRQ_EVENTS */
void tracer_preempt_on(unsigned long a0, unsigned long a1)
{
int pc = preempt_count();
#ifdef CONFIG_PREEMPTIRQ_EVENTS
struct preempt_store *ps;
u64 delta = 0;
lockdep_off();
ps = &per_cpu(the_ps, raw_smp_processor_id());
/*
* schedule() calls __schedule() with preemption disabled.
* if we had entered idle and exiting idle now, we think
* preemption is disabled the whole time. Detect this by
* checking if the preemption is disabled across the same
* task. There is a possiblity that the same task is scheduled
* after idle. To rule out this possibility, compare the
* context switch count also.
*/
if (ps->ts && ps->pid == current->pid && (ps->ncsw ==
current->nvcsw + current->nivcsw))
delta = sched_clock() - ps->ts;
/*
* Trace preempt disable stack if preemption
* is disabled for more than the threshold.
*/
if (delta > sysctl_preemptoff_tracing_threshold_ns)
trace_sched_preempt_disable(delta, ps->irqs_disabled,
ps->caddr[0], ps->caddr[1],
ps->caddr[2], ps->caddr[3]);
ps->ts = 0;
lockdep_on();
#endif /* CONFIG_PREEMPTIRQ_EVENTS */
if (preempt_trace(pc) && !irq_trace())
stop_critical_timing(a0, a1, pc);
@@ -715,6 +763,21 @@ void tracer_preempt_on(unsigned long a0, unsigned long a1)
void tracer_preempt_off(unsigned long a0, unsigned long a1)
{
int pc = preempt_count();
#ifdef CONFIG_PREEMPTIRQ_EVENTS
struct preempt_store *ps;
lockdep_off();
ps = &per_cpu(the_ps, raw_smp_processor_id());
ps->ts = sched_clock();
ps->caddr[0] = CALLER_ADDR0;
ps->caddr[1] = CALLER_ADDR1;
ps->caddr[2] = CALLER_ADDR2;
ps->caddr[3] = CALLER_ADDR3;
ps->irqs_disabled = irqs_disabled();
ps->pid = current->pid;
ps->ncsw = current->nvcsw + current->nivcsw;
lockdep_on();
#endif /* CONFIG_PREEMPTIRQ_EVENTS */
if (preempt_trace(pc) && !irq_trace())
start_critical_timing(a0, a1, pc);