Merge tag 'perf-core-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull performance events updates from Ingo Molnar:
 "x86 Intel updates:

   - Add Jasper Lake support

   - Add support for TopDown metrics on Ice Lake

   - Fix Ice Lake & Tiger Lake uncore support, add Snow Ridge support

   - Add a PCI sub driver to support uncore PMUs where the PCI resources
     have been claimed already - extending the range of supported
     systems.

  x86 AMD updates:

   - Restore 'perf stat -a' behaviour to program the uncore PMU to count
     all CPU threads.

   - Fix setting the proper count when sampling Large Increment per
     Cycle events / 'paired' events.

   - Fix IBS Fetch sampling on F17h and some other IBS fine tuning,
     greatly reducing the number of interrupts when large sample periods
     are specified.

   - Extends Family 17h RAPL support to also work on compatible F19h
     machines.

  Core code updates:

   - Fix race in perf_mmap_close()

   - Add PERF_EV_CAP_SIBLING, to denote that sibling events should be
     closed if the leader is removed.

   - Smaller fixes and updates"

* tag 'perf-core-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (45 commits)
  perf/core: Fix race in the perf_mmap_close() function
  perf/x86: Fix n_metric for cancelled txn
  perf/x86: Fix n_pair for cancelled txn
  x86/events/amd/iommu: Fix sizeof mismatch
  perf/x86/intel: Check perf metrics feature for each CPU
  perf/x86/intel: Fix Ice Lake event constraint table
  perf/x86/intel/uncore: Fix the scale of the IMC free-running events
  perf/x86/intel/uncore: Fix for iio mapping on Skylake Server
  perf/x86/msr: Add Jasper Lake support
  perf/x86/intel: Add Jasper Lake support
  perf/x86/intel/uncore: Reduce the number of CBOX counters
  perf/x86/intel/uncore: Update Ice Lake uncore units
  perf/x86/intel/uncore: Split the Ice Lake and Tiger Lake MSR uncore support
  perf/x86/intel/uncore: Support PCIe3 unit on Snow Ridge
  perf/x86/intel/uncore: Generic support for the PCI sub driver
  perf/x86/intel/uncore: Factor out uncore_pci_pmu_unregister()
  perf/x86/intel/uncore: Factor out uncore_pci_pmu_register()
  perf/x86/intel/uncore: Factor out uncore_pci_find_dev_pmu()
  perf/x86/intel/uncore: Factor out uncore_pci_get_dev_die_info()
  perf/amd/uncore: Inform the user how many counters each uncore PMU has
  ...
Esse commit está contido em:
Linus Torvalds
2020-10-12 14:14:35 -07:00
18 arquivos alterados com 1180 adições e 298 exclusões

Ver arquivo

@@ -383,7 +383,6 @@ static DEFINE_MUTEX(perf_sched_mutex);
static atomic_t perf_sched_count;
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static atomic_t nr_mmap_events __read_mostly;
@@ -2134,8 +2133,24 @@ static inline struct list_head *get_event_list(struct perf_event *event)
return event->attr.pinned ? &ctx->pinned_active : &ctx->flexible_active;
}
/*
* Events that have PERF_EV_CAP_SIBLING require being part of a group and
* cannot exist on their own, schedule them out and move them into the ERROR
* state. Also see _perf_event_enable(), it will not be able to recover
* this ERROR state.
*/
static inline void perf_remove_sibling_event(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
event_sched_out(event, cpuctx, ctx);
perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
}
static void perf_group_detach(struct perf_event *event)
{
struct perf_event *leader = event->group_leader;
struct perf_event *sibling, *tmp;
struct perf_event_context *ctx = event->ctx;
@@ -2154,7 +2169,7 @@ static void perf_group_detach(struct perf_event *event)
/*
* If this is a sibling, remove it from its group.
*/
if (event->group_leader != event) {
if (leader != event) {
list_del_init(&event->sibling_list);
event->group_leader->nr_siblings--;
goto out;
@@ -2167,6 +2182,9 @@ static void perf_group_detach(struct perf_event *event)
*/
list_for_each_entry_safe(sibling, tmp, &event->sibling_list, sibling_list) {
if (sibling->event_caps & PERF_EV_CAP_SIBLING)
perf_remove_sibling_event(sibling);
sibling->group_leader = sibling;
list_del_init(&sibling->sibling_list);
@@ -2184,10 +2202,10 @@ static void perf_group_detach(struct perf_event *event)
}
out:
perf_event__header_size(event->group_leader);
for_each_sibling_event(tmp, event->group_leader)
for_each_sibling_event(tmp, leader)
perf_event__header_size(tmp);
perf_event__header_size(leader);
}
static bool is_orphaned_event(struct perf_event *event)
@@ -2980,6 +2998,7 @@ static void _perf_event_enable(struct perf_event *event)
raw_spin_lock_irq(&ctx->lock);
if (event->state >= PERF_EVENT_STATE_INACTIVE ||
event->state < PERF_EVENT_STATE_ERROR) {
out:
raw_spin_unlock_irq(&ctx->lock);
return;
}
@@ -2991,8 +3010,16 @@ static void _perf_event_enable(struct perf_event *event)
* has gone back into error state, as distinct from the task having
* been scheduled away before the cross-call arrived.
*/
if (event->state == PERF_EVENT_STATE_ERROR)
if (event->state == PERF_EVENT_STATE_ERROR) {
/*
* Detached SIBLING events cannot leave ERROR state.
*/
if (event->event_caps & PERF_EV_CAP_SIBLING &&
event->group_leader == event)
goto out;
event->state = PERF_EVENT_STATE_OFF;
}
raw_spin_unlock_irq(&ctx->lock);
event_function_call(event, __perf_event_enable, NULL);
@@ -3357,10 +3384,12 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
struct perf_event_context *parent, *next_parent;
struct perf_cpu_context *cpuctx;
int do_switch = 1;
struct pmu *pmu;
if (likely(!ctx))
return;
pmu = ctx->pmu;
cpuctx = __get_cpu_context(ctx);
if (!cpuctx->task_ctx)
return;
@@ -3390,11 +3419,15 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
raw_spin_lock(&ctx->lock);
raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
if (context_equiv(ctx, next_ctx)) {
struct pmu *pmu = ctx->pmu;
WRITE_ONCE(ctx->task, next);
WRITE_ONCE(next_ctx->task, task);
perf_pmu_disable(pmu);
if (cpuctx->sched_cb_usage && pmu->sched_task)
pmu->sched_task(ctx, false);
/*
* PMU specific parts of task perf context can require
* additional synchronization. As an example of such
@@ -3406,6 +3439,8 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
else
swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
perf_pmu_enable(pmu);
/*
* RCU_INIT_POINTER here is safe because we've not
* modified the ctx and the above modification of
@@ -3428,21 +3463,22 @@ unlock:
if (do_switch) {
raw_spin_lock(&ctx->lock);
perf_pmu_disable(pmu);
if (cpuctx->sched_cb_usage && pmu->sched_task)
pmu->sched_task(ctx, false);
task_ctx_sched_out(cpuctx, ctx, EVENT_ALL);
perf_pmu_enable(pmu);
raw_spin_unlock(&ctx->lock);
}
}
static DEFINE_PER_CPU(struct list_head, sched_cb_list);
void perf_sched_cb_dec(struct pmu *pmu)
{
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
this_cpu_dec(perf_sched_cb_usages);
if (!--cpuctx->sched_cb_usage)
list_del(&cpuctx->sched_cb_entry);
--cpuctx->sched_cb_usage;
}
@@ -3450,10 +3486,7 @@ void perf_sched_cb_inc(struct pmu *pmu)
{
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
if (!cpuctx->sched_cb_usage++)
list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
this_cpu_inc(perf_sched_cb_usages);
cpuctx->sched_cb_usage++;
}
/*
@@ -3464,30 +3497,22 @@ void perf_sched_cb_inc(struct pmu *pmu)
* PEBS requires this to provide PID/TID information. This requires we flush
* all queued PEBS records before we context switch to a new task.
*/
static void perf_pmu_sched_task(struct task_struct *prev,
struct task_struct *next,
bool sched_in)
static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in)
{
struct perf_cpu_context *cpuctx;
struct pmu *pmu;
if (prev == next)
pmu = cpuctx->ctx.pmu; /* software PMUs will not have sched_task */
if (WARN_ON_ONCE(!pmu->sched_task))
return;
list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
pmu = cpuctx->ctx.pmu; /* software PMUs will not have sched_task */
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
perf_pmu_disable(pmu);
if (WARN_ON_ONCE(!pmu->sched_task))
continue;
pmu->sched_task(cpuctx->task_ctx, sched_in);
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
perf_pmu_disable(pmu);
pmu->sched_task(cpuctx->task_ctx, sched_in);
perf_pmu_enable(pmu);
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}
perf_pmu_enable(pmu);
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}
static void perf_event_switch(struct task_struct *task,
@@ -3512,9 +3537,6 @@ void __perf_event_task_sched_out(struct task_struct *task,
{
int ctxn;
if (__this_cpu_read(perf_sched_cb_usages))
perf_pmu_sched_task(task, next, false);
if (atomic_read(&nr_switch_events))
perf_event_switch(task, next, false);
@@ -3746,10 +3768,14 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
struct task_struct *task)
{
struct perf_cpu_context *cpuctx;
struct pmu *pmu = ctx->pmu;
cpuctx = __get_cpu_context(ctx);
if (cpuctx->task_ctx == ctx)
if (cpuctx->task_ctx == ctx) {
if (cpuctx->sched_cb_usage)
__perf_pmu_sched_task(cpuctx, true);
return;
}
perf_ctx_lock(cpuctx, ctx);
/*
@@ -3759,7 +3785,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
if (!ctx->nr_events)
goto unlock;
perf_pmu_disable(ctx->pmu);
perf_pmu_disable(pmu);
/*
* We want to keep the following priority order:
* cpu pinned (that don't need to move), task pinned,
@@ -3771,7 +3797,11 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree))
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
perf_event_sched_in(cpuctx, ctx, task);
perf_pmu_enable(ctx->pmu);
if (cpuctx->sched_cb_usage && pmu->sched_task)
pmu->sched_task(cpuctx->task_ctx, true);
perf_pmu_enable(pmu);
unlock:
perf_ctx_unlock(cpuctx, ctx);
@@ -3814,9 +3844,6 @@ void __perf_event_task_sched_in(struct task_struct *prev,
if (atomic_read(&nr_switch_events))
perf_event_switch(task, prev, true);
if (__this_cpu_read(perf_sched_cb_usages))
perf_pmu_sched_task(prev, task, true);
}
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -5869,11 +5896,11 @@ static void perf_pmu_output_stop(struct perf_event *event);
static void perf_mmap_close(struct vm_area_struct *vma)
{
struct perf_event *event = vma->vm_file->private_data;
struct perf_buffer *rb = ring_buffer_get(event);
struct user_struct *mmap_user = rb->mmap_user;
int mmap_locked = rb->mmap_locked;
unsigned long size = perf_data_size(rb);
bool detach_rest = false;
if (event->pmu->event_unmapped)
event->pmu->event_unmapped(event, vma->vm_mm);
@@ -5904,7 +5931,8 @@ static void perf_mmap_close(struct vm_area_struct *vma)
mutex_unlock(&event->mmap_mutex);
}
atomic_dec(&rb->mmap_count);
if (atomic_dec_and_test(&rb->mmap_count))
detach_rest = true;
if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
goto out_put;
@@ -5913,7 +5941,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
mutex_unlock(&event->mmap_mutex);
/* If there's still other mmap()s of this buffer, we're done. */
if (atomic_read(&rb->mmap_count))
if (!detach_rest)
goto out_put;
/*
@@ -12829,7 +12857,6 @@ static void __init perf_event_init_all_cpus(void)
#ifdef CONFIG_CGROUP_PERF
INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu));
#endif
INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
}
}