Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Ingo Molnar:
 "Kernel side updates:

   - Fix and enhance poll support (Jiri Olsa)

   - Re-enable inheritance optimization (Jiri Olsa)

   - Enhance Intel memory events support (Stephane Eranian)

   - Refactor the Intel uncore driver to be more maintainable (Zheng
     Yan)

   - Enhance and fix Intel CPU and uncore PMU drivers (Peter Zijlstra,
     Andi Kleen)

   - [ plus various smaller fixes/cleanups ]

  User visible tooling updates:

   - Add +field argument support for --field option, so that one can add
     fields to the default list of fields to show, ie now one can just
     do:

         perf report --fields +pid

     And the pid will appear in addition to the default fields (Jiri
     Olsa)

   - Add +field argument support for --sort option (Jiri Olsa)

   - Honour -w in the report tools (report, top), allowing to specify
     the widths for the histogram entries columns (Namhyung Kim)

   - Properly show submicrosecond times in 'perf kvm stat' (Christian
     Borntraeger)

   - Add beautifier for mremap flags param in 'trace' (Alex Snast)

   - perf script: Allow callchains if any event samples them

   - Don't truncate Intel style addresses in 'annotate' (Alex Converse)

   - Allow profiling when kptr_restrict == 1 for non root users, kernel
     samples will just remain unresolved (Andi Kleen)

   - Allow configuring default options for callchains in config file
     (Namhyung Kim)

   - Support operations for shared futexes.  (Davidlohr Bueso)

   - "perf kvm stat report" improvements by Alexander Yarygin:
       -  Save pid string in opts.target.pid
       -  Enable the target.system_wide flag
       -  Unify the title bar output

   - [ plus lots of other fixes and small improvements.  ]

  Tooling infrastructure changes:

   - Refactor unit and scale function parameters for PMU parsing
     routines (Matt Fleming)

   - Improve DSO long names lookup with rbtree, resulting in great
     speedup for workloads with lots of DSOs (Waiman Long)

   - We were not handling POLLHUP notifications for event file
     descriptors

     Fix it by filtering entries in the events file descriptor array
     after poll() returns, refcounting mmaps so that when the last fd
     pointing to a perf mmap goes away we do the unmap (Arnaldo Carvalho
     de Melo)

   - Intel PT prep work, from Adrian Hunter, including:
       - Let a user specify a PMU event without any config terms
       - Add perf-with-kcore script
       - Let default config be defined for a PMU
       - Add perf_pmu__scan_file()
       - Add a 'perf test' for tracking with sched_switch
       - Add 'flush' callback to scripting API

   - Use ring buffer consume method to look like other tools (Arnaldo
     Carvalho de Melo)

   - hists browser (used in top and report) refactorings, getting rid of
     unused variables and reducing source code size by handling similar
     cases in a fewer functions (Namhyung Kim).

   - Replace thread unsafe strerror() with strerror_r() accross the
     whole tools/perf/ tree (Masami Hiramatsu)

   - Rename ordered_samples to ordered_events and allow setting a queue
     size for ordering events (Jiri Olsa)

   - [ plus lots of fixes, cleanups and other improvements ]"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (198 commits)
  perf/x86: Tone down kernel messages when the PMU check fails in a virtual environment
  perf/x86/intel/uncore: Fix minor race in box set up
  perf record: Fix error message for --filter option not coming after tracepoint
  perf tools: Fix build breakage on arm64 targets
  perf symbols: Improve DSO long names lookup speed with rbtree
  perf symbols: Encapsulate dsos list head into struct dsos
  perf bench futex: Sanitize -q option in requeue
  perf bench futex: Support operations for shared futexes
  perf trace: Fix mmap return address truncation to 32-bit
  perf tools: Refactor unit and scale function parameters
  perf tools: Fix line number in the config file error message
  perf tools: Convert {record,top}.call-graph option to call-graph.record-mode
  perf tools: Introduce perf_callchain_config()
  perf callchain: Move some parser functions to callchain.c
  perf tools: Move callchain config from record_opts to callchain_param
  perf hists browser: Fix callchain print bug on TUI
  perf tools: Use ACCESS_ONCE() instead of volatile cast
  perf tools: Modify error code for when perf_session__new() fails
  perf tools: Fix perf record as non root with kptr_restrict == 1
  perf stat: Fix --per-core on multi socket systems
  ...
This commit is contained in:
Linus Torvalds
2014-10-13 15:58:15 +02:00
کامیت 9d9420f120
134فایلهای تغییر یافته به همراه8844 افزوده شده و 5129 حذف شده

مشاهده پرونده

@@ -52,7 +52,7 @@ static void release_callchain_buffers(void)
struct callchain_cpus_entries *entries;
entries = callchain_cpus_entries;
rcu_assign_pointer(callchain_cpus_entries, NULL);
RCU_INIT_POINTER(callchain_cpus_entries, NULL);
call_rcu(&entries->rcu_head, release_callchain_buffers_rcu);
}

مشاهده پرونده

@@ -47,6 +47,8 @@
#include <asm/irq_regs.h>
static struct workqueue_struct *perf_wq;
struct remote_function_call {
struct task_struct *p;
int (*func)(void *info);
@@ -120,6 +122,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
return data.ret;
}
#define EVENT_OWNER_KERNEL ((void *) -1)
static bool is_kernel_event(struct perf_event *event)
{
return event->owner == EVENT_OWNER_KERNEL;
}
#define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
PERF_FLAG_FD_OUTPUT |\
PERF_FLAG_PID_CGROUP |\
@@ -1370,6 +1379,45 @@ out:
perf_event__header_size(tmp);
}
/*
* User event without the task.
*/
static bool is_orphaned_event(struct perf_event *event)
{
return event && !is_kernel_event(event) && !event->owner;
}
/*
* Event has a parent but parent's task finished and it's
* alive only because of children holding refference.
*/
static bool is_orphaned_child(struct perf_event *event)
{
return is_orphaned_event(event->parent);
}
static void orphans_remove_work(struct work_struct *work);
static void schedule_orphans_remove(struct perf_event_context *ctx)
{
if (!ctx->task || ctx->orphans_remove_sched || !perf_wq)
return;
if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) {
get_ctx(ctx);
ctx->orphans_remove_sched = true;
}
}
static int __init perf_workqueue_init(void)
{
perf_wq = create_singlethread_workqueue("perf");
WARN(!perf_wq, "failed to create perf workqueue\n");
return perf_wq ? 0 : -1;
}
core_initcall(perf_workqueue_init);
static inline int
event_filter_match(struct perf_event *event)
{
@@ -1419,6 +1467,9 @@ event_sched_out(struct perf_event *event,
if (event->attr.exclusive || !cpuctx->active_oncpu)
cpuctx->exclusive = 0;
if (is_orphaned_child(event))
schedule_orphans_remove(ctx);
perf_pmu_enable(event->pmu);
}
@@ -1726,6 +1777,9 @@ event_sched_in(struct perf_event *event,
if (event->attr.exclusive)
cpuctx->exclusive = 1;
if (is_orphaned_child(event))
schedule_orphans_remove(ctx);
out:
perf_pmu_enable(event->pmu);
@@ -2326,7 +2380,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
next_parent = rcu_dereference(next_ctx->parent_ctx);
/* If neither context have a parent context; they cannot be clones. */
if (!parent || !next_parent)
if (!parent && !next_parent)
goto unlock;
if (next_parent == ctx || next_ctx == parent || next_parent == parent) {
@@ -3073,6 +3127,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
INIT_LIST_HEAD(&ctx->flexible_groups);
INIT_LIST_HEAD(&ctx->event_list);
atomic_set(&ctx->refcount, 1);
INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work);
}
static struct perf_event_context *
@@ -3318,16 +3373,12 @@ static void free_event(struct perf_event *event)
}
/*
* Called when the last reference to the file is gone.
* Remove user event from the owner task.
*/
static void put_event(struct perf_event *event)
static void perf_remove_from_owner(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
struct task_struct *owner;
if (!atomic_long_dec_and_test(&event->refcount))
return;
rcu_read_lock();
owner = ACCESS_ONCE(event->owner);
/*
@@ -3360,6 +3411,20 @@ static void put_event(struct perf_event *event)
mutex_unlock(&owner->perf_event_mutex);
put_task_struct(owner);
}
}
/*
* Called when the last reference to the file is gone.
*/
static void put_event(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
if (!atomic_long_dec_and_test(&event->refcount))
return;
if (!is_kernel_event(event))
perf_remove_from_owner(event);
WARN_ON_ONCE(ctx->parent_ctx);
/*
@@ -3394,6 +3459,42 @@ static int perf_release(struct inode *inode, struct file *file)
return 0;
}
/*
* Remove all orphanes events from the context.
*/
static void orphans_remove_work(struct work_struct *work)
{
struct perf_event_context *ctx;
struct perf_event *event, *tmp;
ctx = container_of(work, struct perf_event_context,
orphans_remove.work);
mutex_lock(&ctx->mutex);
list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) {
struct perf_event *parent_event = event->parent;
if (!is_orphaned_child(event))
continue;
perf_remove_from_context(event, true);
mutex_lock(&parent_event->child_mutex);
list_del_init(&event->child_list);
mutex_unlock(&parent_event->child_mutex);
free_event(event);
put_event(parent_event);
}
raw_spin_lock_irq(&ctx->lock);
ctx->orphans_remove_sched = false;
raw_spin_unlock_irq(&ctx->lock);
mutex_unlock(&ctx->mutex);
put_ctx(ctx);
}
u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
{
struct perf_event *child;
@@ -3491,6 +3592,19 @@ static int perf_event_read_one(struct perf_event *event,
return n * sizeof(u64);
}
static bool is_event_hup(struct perf_event *event)
{
bool no_children;
if (event->state != PERF_EVENT_STATE_EXIT)
return false;
mutex_lock(&event->child_mutex);
no_children = list_empty(&event->child_list);
mutex_unlock(&event->child_mutex);
return no_children;
}
/*
* Read the performance event - simple non blocking version for now
*/
@@ -3532,7 +3646,12 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
{
struct perf_event *event = file->private_data;
struct ring_buffer *rb;
unsigned int events = POLL_HUP;
unsigned int events = POLLHUP;
poll_wait(file, &event->waitq, wait);
if (is_event_hup(event))
return events;
/*
* Pin the event->rb by taking event->mmap_mutex; otherwise
@@ -3543,9 +3662,6 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
if (rb)
events = atomic_xchg(&rb->poll, 0);
mutex_unlock(&event->mmap_mutex);
poll_wait(file, &event->waitq, wait);
return events;
}
@@ -5809,7 +5925,7 @@ static void swevent_hlist_release(struct swevent_htable *swhash)
if (!hlist)
return;
rcu_assign_pointer(swhash->swevent_hlist, NULL);
RCU_INIT_POINTER(swhash->swevent_hlist, NULL);
kfree_rcu(hlist, rcu_head);
}
@@ -7392,6 +7508,9 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
goto err;
}
/* Mark owner so we could distinguish it from user events. */
event->owner = EVENT_OWNER_KERNEL;
account_event(event);
ctx = find_get_context(event->pmu, task, cpu);
@@ -7478,6 +7597,12 @@ static void sync_child_event(struct perf_event *child_event,
list_del_init(&child_event->child_list);
mutex_unlock(&parent_event->child_mutex);
/*
* Make sure user/parent get notified, that we just
* lost one event.
*/
perf_event_wakeup(parent_event);
/*
* Release the parent event, if this was the last
* reference to it.
@@ -7512,6 +7637,9 @@ __perf_event_exit_task(struct perf_event *child_event,
if (child_event->parent) {
sync_child_event(child_event, child);
free_event(child_event);
} else {
child_event->state = PERF_EVENT_STATE_EXIT;
perf_event_wakeup(child_event);
}
}
@@ -7695,6 +7823,7 @@ inherit_event(struct perf_event *parent_event,
struct perf_event *group_leader,
struct perf_event_context *child_ctx)
{
enum perf_event_active_state parent_state = parent_event->state;
struct perf_event *child_event;
unsigned long flags;
@@ -7715,7 +7844,8 @@ inherit_event(struct perf_event *parent_event,
if (IS_ERR(child_event))
return child_event;
if (!atomic_long_inc_not_zero(&parent_event->refcount)) {
if (is_orphaned_event(parent_event) ||
!atomic_long_inc_not_zero(&parent_event->refcount)) {
free_event(child_event);
return NULL;
}
@@ -7727,7 +7857,7 @@ inherit_event(struct perf_event *parent_event,
* not its attr.disabled bit. We hold the parent's mutex,
* so we won't race with perf_event_{en, dis}able_family.
*/
if (parent_event->state >= PERF_EVENT_STATE_INACTIVE)
if (parent_state >= PERF_EVENT_STATE_INACTIVE)
child_event->state = PERF_EVENT_STATE_INACTIVE;
else
child_event->state = PERF_EVENT_STATE_OFF;