Merge branch 'perf/hw-branch-sampling' into perf/core
Merge reason: The 'perf record -b' hardware branch sampling feature is ready for upstream. Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
@@ -118,6 +118,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
|
||||
PERF_FLAG_FD_OUTPUT |\
|
||||
PERF_FLAG_PID_CGROUP)
|
||||
|
||||
/*
|
||||
* branch priv levels that need permission checks
|
||||
*/
|
||||
#define PERF_SAMPLE_BRANCH_PERM_PLM \
|
||||
(PERF_SAMPLE_BRANCH_KERNEL |\
|
||||
PERF_SAMPLE_BRANCH_HV)
|
||||
|
||||
enum event_type_t {
|
||||
EVENT_FLEXIBLE = 0x1,
|
||||
EVENT_PINNED = 0x2,
|
||||
@@ -130,6 +137,7 @@ enum event_type_t {
|
||||
*/
|
||||
struct static_key_deferred perf_sched_events __read_mostly;
|
||||
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
|
||||
static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
|
||||
|
||||
static atomic_t nr_mmap_events __read_mostly;
|
||||
static atomic_t nr_comm_events __read_mostly;
|
||||
@@ -881,6 +889,9 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
|
||||
if (is_cgroup_event(event))
|
||||
ctx->nr_cgroups++;
|
||||
|
||||
if (has_branch_stack(event))
|
||||
ctx->nr_branch_stack++;
|
||||
|
||||
list_add_rcu(&event->event_entry, &ctx->event_list);
|
||||
if (!ctx->nr_events)
|
||||
perf_pmu_rotate_start(ctx->pmu);
|
||||
@@ -1020,6 +1031,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
|
||||
cpuctx->cgrp = NULL;
|
||||
}
|
||||
|
||||
if (has_branch_stack(event))
|
||||
ctx->nr_branch_stack--;
|
||||
|
||||
ctx->nr_events--;
|
||||
if (event->attr.inherit_stat)
|
||||
ctx->nr_stat--;
|
||||
@@ -2194,6 +2208,66 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
|
||||
perf_pmu_rotate_start(ctx->pmu);
|
||||
}
|
||||
|
||||
/*
|
||||
* When sampling the branck stack in system-wide, it may be necessary
|
||||
* to flush the stack on context switch. This happens when the branch
|
||||
* stack does not tag its entries with the pid of the current task.
|
||||
* Otherwise it becomes impossible to associate a branch entry with a
|
||||
* task. This ambiguity is more likely to appear when the branch stack
|
||||
* supports priv level filtering and the user sets it to monitor only
|
||||
* at the user level (which could be a useful measurement in system-wide
|
||||
* mode). In that case, the risk is high of having a branch stack with
|
||||
* branch from multiple tasks. Flushing may mean dropping the existing
|
||||
* entries or stashing them somewhere in the PMU specific code layer.
|
||||
*
|
||||
* This function provides the context switch callback to the lower code
|
||||
* layer. It is invoked ONLY when there is at least one system-wide context
|
||||
* with at least one active event using taken branch sampling.
|
||||
*/
|
||||
static void perf_branch_stack_sched_in(struct task_struct *prev,
|
||||
struct task_struct *task)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx;
|
||||
struct pmu *pmu;
|
||||
unsigned long flags;
|
||||
|
||||
/* no need to flush branch stack if not changing task */
|
||||
if (prev == task)
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
list_for_each_entry_rcu(pmu, &pmus, entry) {
|
||||
cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
|
||||
|
||||
/*
|
||||
* check if the context has at least one
|
||||
* event using PERF_SAMPLE_BRANCH_STACK
|
||||
*/
|
||||
if (cpuctx->ctx.nr_branch_stack > 0
|
||||
&& pmu->flush_branch_stack) {
|
||||
|
||||
pmu = cpuctx->ctx.pmu;
|
||||
|
||||
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
|
||||
|
||||
perf_pmu_disable(pmu);
|
||||
|
||||
pmu->flush_branch_stack();
|
||||
|
||||
perf_pmu_enable(pmu);
|
||||
|
||||
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
|
||||
}
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from scheduler to add the events of the current task
|
||||
* with interrupts disabled.
|
||||
@@ -2225,6 +2299,10 @@ void __perf_event_task_sched_in(struct task_struct *prev,
|
||||
*/
|
||||
if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
|
||||
perf_cgroup_sched_in(prev, task);
|
||||
|
||||
/* check for system-wide branch_stack events */
|
||||
if (atomic_read(&__get_cpu_var(perf_branch_stack_events)))
|
||||
perf_branch_stack_sched_in(prev, task);
|
||||
}
|
||||
|
||||
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
|
||||
@@ -2791,6 +2869,14 @@ static void free_event(struct perf_event *event)
|
||||
atomic_dec(&per_cpu(perf_cgroup_events, event->cpu));
|
||||
static_key_slow_dec_deferred(&perf_sched_events);
|
||||
}
|
||||
|
||||
if (has_branch_stack(event)) {
|
||||
static_key_slow_dec_deferred(&perf_sched_events);
|
||||
/* is system-wide event */
|
||||
if (!(event->attach_state & PERF_ATTACH_TASK))
|
||||
atomic_dec(&per_cpu(perf_branch_stack_events,
|
||||
event->cpu));
|
||||
}
|
||||
}
|
||||
|
||||
if (event->rb) {
|
||||
@@ -3907,6 +3993,24 @@ void perf_output_sample(struct perf_output_handle *handle,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
|
||||
if (data->br_stack) {
|
||||
size_t size;
|
||||
|
||||
size = data->br_stack->nr
|
||||
* sizeof(struct perf_branch_entry);
|
||||
|
||||
perf_output_put(handle, data->br_stack->nr);
|
||||
perf_output_copy(handle, data->br_stack->entries, size);
|
||||
} else {
|
||||
/*
|
||||
* we always store at least the value of nr
|
||||
*/
|
||||
u64 nr = 0;
|
||||
perf_output_put(handle, nr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void perf_prepare_sample(struct perf_event_header *header,
|
||||
@@ -3949,6 +4053,15 @@ void perf_prepare_sample(struct perf_event_header *header,
|
||||
WARN_ON_ONCE(size & (sizeof(u64)-1));
|
||||
header->size += size;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
|
||||
int size = sizeof(u64); /* nr */
|
||||
if (data->br_stack) {
|
||||
size += data->br_stack->nr
|
||||
* sizeof(struct perf_branch_entry);
|
||||
}
|
||||
header->size += size;
|
||||
}
|
||||
}
|
||||
|
||||
static void perf_event_output(struct perf_event *event,
|
||||
@@ -5010,6 +5123,12 @@ static int perf_swevent_init(struct perf_event *event)
|
||||
if (event->attr.type != PERF_TYPE_SOFTWARE)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* no branch sampling for software events
|
||||
*/
|
||||
if (has_branch_stack(event))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
switch (event_id) {
|
||||
case PERF_COUNT_SW_CPU_CLOCK:
|
||||
case PERF_COUNT_SW_TASK_CLOCK:
|
||||
@@ -5120,6 +5239,12 @@ static int perf_tp_event_init(struct perf_event *event)
|
||||
if (event->attr.type != PERF_TYPE_TRACEPOINT)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* no branch sampling for tracepoint events
|
||||
*/
|
||||
if (has_branch_stack(event))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
err = perf_trace_init(event);
|
||||
if (err)
|
||||
return err;
|
||||
@@ -5345,6 +5470,12 @@ static int cpu_clock_event_init(struct perf_event *event)
|
||||
if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* no branch sampling for software events
|
||||
*/
|
||||
if (has_branch_stack(event))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
perf_swevent_init_hrtimer(event);
|
||||
|
||||
return 0;
|
||||
@@ -5419,6 +5550,12 @@ static int task_clock_event_init(struct perf_event *event)
|
||||
if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* no branch sampling for software events
|
||||
*/
|
||||
if (has_branch_stack(event))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
perf_swevent_init_hrtimer(event);
|
||||
|
||||
return 0;
|
||||
@@ -5866,6 +6003,12 @@ done:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
}
|
||||
if (has_branch_stack(event)) {
|
||||
static_key_slow_inc(&perf_sched_events.key);
|
||||
if (!(event->attach_state & PERF_ATTACH_TASK))
|
||||
atomic_inc(&per_cpu(perf_branch_stack_events,
|
||||
event->cpu));
|
||||
}
|
||||
}
|
||||
|
||||
return event;
|
||||
@@ -5935,6 +6078,40 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
|
||||
if (attr->read_format & ~(PERF_FORMAT_MAX-1))
|
||||
return -EINVAL;
|
||||
|
||||
if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
|
||||
u64 mask = attr->branch_sample_type;
|
||||
|
||||
/* only using defined bits */
|
||||
if (mask & ~(PERF_SAMPLE_BRANCH_MAX-1))
|
||||
return -EINVAL;
|
||||
|
||||
/* at least one branch bit must be set */
|
||||
if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL))
|
||||
return -EINVAL;
|
||||
|
||||
/* kernel level capture: check permissions */
|
||||
if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM)
|
||||
&& perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
|
||||
/* propagate priv level, when not set for branch */
|
||||
if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) {
|
||||
|
||||
/* exclude_kernel checked on syscall entry */
|
||||
if (!attr->exclude_kernel)
|
||||
mask |= PERF_SAMPLE_BRANCH_KERNEL;
|
||||
|
||||
if (!attr->exclude_user)
|
||||
mask |= PERF_SAMPLE_BRANCH_USER;
|
||||
|
||||
if (!attr->exclude_hv)
|
||||
mask |= PERF_SAMPLE_BRANCH_HV;
|
||||
/*
|
||||
* adjust user setting (for HW filter setup)
|
||||
*/
|
||||
attr->branch_sample_type = mask;
|
||||
}
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
|
||||
|
@@ -581,6 +581,12 @@ static int hw_breakpoint_event_init(struct perf_event *bp)
|
||||
if (bp->attr.type != PERF_TYPE_BREAKPOINT)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* no branch sampling for breakpoint events
|
||||
*/
|
||||
if (has_branch_stack(bp))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
err = register_perf_hw_breakpoint(bp);
|
||||
if (err)
|
||||
return err;
|
||||
|
Reference in New Issue
Block a user