Merge tag 'perf-core-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull performance events updates from Ingo Molnar: "x86 Intel updates: - Add Jasper Lake support - Add support for TopDown metrics on Ice Lake - Fix Ice Lake & Tiger Lake uncore support, add Snow Ridge support - Add a PCI sub driver to support uncore PMUs where the PCI resources have been claimed already - extending the range of supported systems. x86 AMD updates: - Restore 'perf stat -a' behaviour to program the uncore PMU to count all CPU threads. - Fix setting the proper count when sampling Large Increment per Cycle events / 'paired' events. - Fix IBS Fetch sampling on F17h and some other IBS fine tuning, greatly reducing the number of interrupts when large sample periods are specified. - Extends Family 17h RAPL support to also work on compatible F19h machines. Core code updates: - Fix race in perf_mmap_close() - Add PERF_EV_CAP_SIBLING, to denote that sibling events should be closed if the leader is removed. - Smaller fixes and updates" * tag 'perf-core-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (45 commits) perf/core: Fix race in the perf_mmap_close() function perf/x86: Fix n_metric for cancelled txn perf/x86: Fix n_pair for cancelled txn x86/events/amd/iommu: Fix sizeof mismatch perf/x86/intel: Check perf metrics feature for each CPU perf/x86/intel: Fix Ice Lake event constraint table perf/x86/intel/uncore: Fix the scale of the IMC free-running events perf/x86/intel/uncore: Fix for iio mapping on Skylake Server perf/x86/msr: Add Jasper Lake support perf/x86/intel: Add Jasper Lake support perf/x86/intel/uncore: Reduce the number of CBOX counters perf/x86/intel/uncore: Update Ice Lake uncore units perf/x86/intel/uncore: Split the Ice Lake and Tiger Lake MSR uncore support perf/x86/intel/uncore: Support PCIe3 unit on Snow Ridge perf/x86/intel/uncore: Generic support for the PCI sub driver perf/x86/intel/uncore: Factor out uncore_pci_pmu_unregister() perf/x86/intel/uncore: Factor out uncore_pci_pmu_register() perf/x86/intel/uncore: Factor out uncore_pci_find_dev_pmu() perf/x86/intel/uncore: Factor out uncore_pci_get_dev_die_info() perf/amd/uncore: Inform the user how many counters each uncore PMU has ...
Esse commit está contido em:
@@ -383,7 +383,6 @@ static DEFINE_MUTEX(perf_sched_mutex);
|
||||
static atomic_t perf_sched_count;
|
||||
|
||||
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
|
||||
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
|
||||
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
|
||||
|
||||
static atomic_t nr_mmap_events __read_mostly;
|
||||
@@ -2134,8 +2133,24 @@ static inline struct list_head *get_event_list(struct perf_event *event)
|
||||
return event->attr.pinned ? &ctx->pinned_active : &ctx->flexible_active;
|
||||
}
|
||||
|
||||
/*
|
||||
* Events that have PERF_EV_CAP_SIBLING require being part of a group and
|
||||
* cannot exist on their own, schedule them out and move them into the ERROR
|
||||
* state. Also see _perf_event_enable(), it will not be able to recover
|
||||
* this ERROR state.
|
||||
*/
|
||||
static inline void perf_remove_sibling_event(struct perf_event *event)
|
||||
{
|
||||
struct perf_event_context *ctx = event->ctx;
|
||||
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
|
||||
|
||||
event_sched_out(event, cpuctx, ctx);
|
||||
perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
|
||||
}
|
||||
|
||||
static void perf_group_detach(struct perf_event *event)
|
||||
{
|
||||
struct perf_event *leader = event->group_leader;
|
||||
struct perf_event *sibling, *tmp;
|
||||
struct perf_event_context *ctx = event->ctx;
|
||||
|
||||
@@ -2154,7 +2169,7 @@ static void perf_group_detach(struct perf_event *event)
|
||||
/*
|
||||
* If this is a sibling, remove it from its group.
|
||||
*/
|
||||
if (event->group_leader != event) {
|
||||
if (leader != event) {
|
||||
list_del_init(&event->sibling_list);
|
||||
event->group_leader->nr_siblings--;
|
||||
goto out;
|
||||
@@ -2167,6 +2182,9 @@ static void perf_group_detach(struct perf_event *event)
|
||||
*/
|
||||
list_for_each_entry_safe(sibling, tmp, &event->sibling_list, sibling_list) {
|
||||
|
||||
if (sibling->event_caps & PERF_EV_CAP_SIBLING)
|
||||
perf_remove_sibling_event(sibling);
|
||||
|
||||
sibling->group_leader = sibling;
|
||||
list_del_init(&sibling->sibling_list);
|
||||
|
||||
@@ -2184,10 +2202,10 @@ static void perf_group_detach(struct perf_event *event)
|
||||
}
|
||||
|
||||
out:
|
||||
perf_event__header_size(event->group_leader);
|
||||
|
||||
for_each_sibling_event(tmp, event->group_leader)
|
||||
for_each_sibling_event(tmp, leader)
|
||||
perf_event__header_size(tmp);
|
||||
|
||||
perf_event__header_size(leader);
|
||||
}
|
||||
|
||||
static bool is_orphaned_event(struct perf_event *event)
|
||||
@@ -2980,6 +2998,7 @@ static void _perf_event_enable(struct perf_event *event)
|
||||
raw_spin_lock_irq(&ctx->lock);
|
||||
if (event->state >= PERF_EVENT_STATE_INACTIVE ||
|
||||
event->state < PERF_EVENT_STATE_ERROR) {
|
||||
out:
|
||||
raw_spin_unlock_irq(&ctx->lock);
|
||||
return;
|
||||
}
|
||||
@@ -2991,8 +3010,16 @@ static void _perf_event_enable(struct perf_event *event)
|
||||
* has gone back into error state, as distinct from the task having
|
||||
* been scheduled away before the cross-call arrived.
|
||||
*/
|
||||
if (event->state == PERF_EVENT_STATE_ERROR)
|
||||
if (event->state == PERF_EVENT_STATE_ERROR) {
|
||||
/*
|
||||
* Detached SIBLING events cannot leave ERROR state.
|
||||
*/
|
||||
if (event->event_caps & PERF_EV_CAP_SIBLING &&
|
||||
event->group_leader == event)
|
||||
goto out;
|
||||
|
||||
event->state = PERF_EVENT_STATE_OFF;
|
||||
}
|
||||
raw_spin_unlock_irq(&ctx->lock);
|
||||
|
||||
event_function_call(event, __perf_event_enable, NULL);
|
||||
@@ -3357,10 +3384,12 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
|
||||
struct perf_event_context *parent, *next_parent;
|
||||
struct perf_cpu_context *cpuctx;
|
||||
int do_switch = 1;
|
||||
struct pmu *pmu;
|
||||
|
||||
if (likely(!ctx))
|
||||
return;
|
||||
|
||||
pmu = ctx->pmu;
|
||||
cpuctx = __get_cpu_context(ctx);
|
||||
if (!cpuctx->task_ctx)
|
||||
return;
|
||||
@@ -3390,11 +3419,15 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
|
||||
raw_spin_lock(&ctx->lock);
|
||||
raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
|
||||
if (context_equiv(ctx, next_ctx)) {
|
||||
struct pmu *pmu = ctx->pmu;
|
||||
|
||||
WRITE_ONCE(ctx->task, next);
|
||||
WRITE_ONCE(next_ctx->task, task);
|
||||
|
||||
perf_pmu_disable(pmu);
|
||||
|
||||
if (cpuctx->sched_cb_usage && pmu->sched_task)
|
||||
pmu->sched_task(ctx, false);
|
||||
|
||||
/*
|
||||
* PMU specific parts of task perf context can require
|
||||
* additional synchronization. As an example of such
|
||||
@@ -3406,6 +3439,8 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
|
||||
else
|
||||
swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
|
||||
|
||||
perf_pmu_enable(pmu);
|
||||
|
||||
/*
|
||||
* RCU_INIT_POINTER here is safe because we've not
|
||||
* modified the ctx and the above modification of
|
||||
@@ -3428,21 +3463,22 @@ unlock:
|
||||
|
||||
if (do_switch) {
|
||||
raw_spin_lock(&ctx->lock);
|
||||
perf_pmu_disable(pmu);
|
||||
|
||||
if (cpuctx->sched_cb_usage && pmu->sched_task)
|
||||
pmu->sched_task(ctx, false);
|
||||
task_ctx_sched_out(cpuctx, ctx, EVENT_ALL);
|
||||
|
||||
perf_pmu_enable(pmu);
|
||||
raw_spin_unlock(&ctx->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(struct list_head, sched_cb_list);
|
||||
|
||||
void perf_sched_cb_dec(struct pmu *pmu)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
|
||||
|
||||
this_cpu_dec(perf_sched_cb_usages);
|
||||
|
||||
if (!--cpuctx->sched_cb_usage)
|
||||
list_del(&cpuctx->sched_cb_entry);
|
||||
--cpuctx->sched_cb_usage;
|
||||
}
|
||||
|
||||
|
||||
@@ -3450,10 +3486,7 @@ void perf_sched_cb_inc(struct pmu *pmu)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
|
||||
|
||||
if (!cpuctx->sched_cb_usage++)
|
||||
list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
|
||||
|
||||
this_cpu_inc(perf_sched_cb_usages);
|
||||
cpuctx->sched_cb_usage++;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -3464,30 +3497,22 @@ void perf_sched_cb_inc(struct pmu *pmu)
|
||||
* PEBS requires this to provide PID/TID information. This requires we flush
|
||||
* all queued PEBS records before we context switch to a new task.
|
||||
*/
|
||||
static void perf_pmu_sched_task(struct task_struct *prev,
|
||||
struct task_struct *next,
|
||||
bool sched_in)
|
||||
static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx;
|
||||
struct pmu *pmu;
|
||||
|
||||
if (prev == next)
|
||||
pmu = cpuctx->ctx.pmu; /* software PMUs will not have sched_task */
|
||||
|
||||
if (WARN_ON_ONCE(!pmu->sched_task))
|
||||
return;
|
||||
|
||||
list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
|
||||
pmu = cpuctx->ctx.pmu; /* software PMUs will not have sched_task */
|
||||
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
|
||||
perf_pmu_disable(pmu);
|
||||
|
||||
if (WARN_ON_ONCE(!pmu->sched_task))
|
||||
continue;
|
||||
pmu->sched_task(cpuctx->task_ctx, sched_in);
|
||||
|
||||
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
|
||||
perf_pmu_disable(pmu);
|
||||
|
||||
pmu->sched_task(cpuctx->task_ctx, sched_in);
|
||||
|
||||
perf_pmu_enable(pmu);
|
||||
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
|
||||
}
|
||||
perf_pmu_enable(pmu);
|
||||
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
|
||||
}
|
||||
|
||||
static void perf_event_switch(struct task_struct *task,
|
||||
@@ -3512,9 +3537,6 @@ void __perf_event_task_sched_out(struct task_struct *task,
|
||||
{
|
||||
int ctxn;
|
||||
|
||||
if (__this_cpu_read(perf_sched_cb_usages))
|
||||
perf_pmu_sched_task(task, next, false);
|
||||
|
||||
if (atomic_read(&nr_switch_events))
|
||||
perf_event_switch(task, next, false);
|
||||
|
||||
@@ -3746,10 +3768,14 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
|
||||
struct task_struct *task)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx;
|
||||
struct pmu *pmu = ctx->pmu;
|
||||
|
||||
cpuctx = __get_cpu_context(ctx);
|
||||
if (cpuctx->task_ctx == ctx)
|
||||
if (cpuctx->task_ctx == ctx) {
|
||||
if (cpuctx->sched_cb_usage)
|
||||
__perf_pmu_sched_task(cpuctx, true);
|
||||
return;
|
||||
}
|
||||
|
||||
perf_ctx_lock(cpuctx, ctx);
|
||||
/*
|
||||
@@ -3759,7 +3785,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
|
||||
if (!ctx->nr_events)
|
||||
goto unlock;
|
||||
|
||||
perf_pmu_disable(ctx->pmu);
|
||||
perf_pmu_disable(pmu);
|
||||
/*
|
||||
* We want to keep the following priority order:
|
||||
* cpu pinned (that don't need to move), task pinned,
|
||||
@@ -3771,7 +3797,11 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
|
||||
if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree))
|
||||
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
|
||||
perf_event_sched_in(cpuctx, ctx, task);
|
||||
perf_pmu_enable(ctx->pmu);
|
||||
|
||||
if (cpuctx->sched_cb_usage && pmu->sched_task)
|
||||
pmu->sched_task(cpuctx->task_ctx, true);
|
||||
|
||||
perf_pmu_enable(pmu);
|
||||
|
||||
unlock:
|
||||
perf_ctx_unlock(cpuctx, ctx);
|
||||
@@ -3814,9 +3844,6 @@ void __perf_event_task_sched_in(struct task_struct *prev,
|
||||
|
||||
if (atomic_read(&nr_switch_events))
|
||||
perf_event_switch(task, prev, true);
|
||||
|
||||
if (__this_cpu_read(perf_sched_cb_usages))
|
||||
perf_pmu_sched_task(prev, task, true);
|
||||
}
|
||||
|
||||
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
|
||||
@@ -5869,11 +5896,11 @@ static void perf_pmu_output_stop(struct perf_event *event);
|
||||
static void perf_mmap_close(struct vm_area_struct *vma)
|
||||
{
|
||||
struct perf_event *event = vma->vm_file->private_data;
|
||||
|
||||
struct perf_buffer *rb = ring_buffer_get(event);
|
||||
struct user_struct *mmap_user = rb->mmap_user;
|
||||
int mmap_locked = rb->mmap_locked;
|
||||
unsigned long size = perf_data_size(rb);
|
||||
bool detach_rest = false;
|
||||
|
||||
if (event->pmu->event_unmapped)
|
||||
event->pmu->event_unmapped(event, vma->vm_mm);
|
||||
@@ -5904,7 +5931,8 @@ static void perf_mmap_close(struct vm_area_struct *vma)
|
||||
mutex_unlock(&event->mmap_mutex);
|
||||
}
|
||||
|
||||
atomic_dec(&rb->mmap_count);
|
||||
if (atomic_dec_and_test(&rb->mmap_count))
|
||||
detach_rest = true;
|
||||
|
||||
if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
|
||||
goto out_put;
|
||||
@@ -5913,7 +5941,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
|
||||
mutex_unlock(&event->mmap_mutex);
|
||||
|
||||
/* If there's still other mmap()s of this buffer, we're done. */
|
||||
if (atomic_read(&rb->mmap_count))
|
||||
if (!detach_rest)
|
||||
goto out_put;
|
||||
|
||||
/*
|
||||
@@ -12829,7 +12857,6 @@ static void __init perf_event_init_all_cpus(void)
|
||||
#ifdef CONFIG_CGROUP_PERF
|
||||
INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu));
|
||||
#endif
|
||||
INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
|
||||
}
|
||||
}
|
||||
|
||||
|
Referência em uma nova issue
Block a user