Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into next

Pull perf updates from Ingo Molnar:
 "The tooling changes maintained by Jiri Olsa until Arnaldo is on
  vacation:

  User visible changes:
   - Add -F option for specifying output fields (Namhyung Kim)
   - Propagate exit status of a command line workload for record command
     (Namhyung Kim)
   - Use tid for finding thread (Namhyung Kim)
   - Clarify the output of perf sched map plus small sched command
     fixes (Dongsheng Yang)
   - Wire up perf_regs and unwind support for ARM64 (Jean Pihet)
   - Factor hists statistics counts processing which in turn also fixes
     several bugs in TUI report command (Namhyung Kim)
   - Add --percentage option to control absolute/relative percentage
     output (Namhyung Kim)
   - Add --list-cmds to 'kmem', 'mem', 'lock' and 'sched', for use by
     completion scripts (Ramkumar Ramachandra)

  Development/infrastructure changes and fixes:
   - Android related fixes for pager and map dso resolving (Michael
     Lentine)
   - Add libdw DWARF post unwind support for ARM (Jean Pihet)
   - Consolidate types.h for ARM and ARM64 (Jean Pihet)
   - Fix possible null pointer dereference in session.c (Masanari Iida)
   - Cleanup, remove unused variables in map_switch_event() (Dongsheng
     Yang)
   - Remove nr_state_machine_bugs in perf latency (Dongsheng Yang)
   - Remove usage of trace_sched_wakeup(.success) (Peter Zijlstra)
   - Cleanups for perf.h header (Jiri Olsa)
   - Consolidate types.h and export.h within tools (Borislav Petkov)
   - Move u64_swap union to its single user's header, evsel.h (Borislav
     Petkov)
   - Fix for s390 to properly parse tracepoints plus test code
     (Alexander Yarygin)
   - Handle EINTR error for readn/writen (Namhyung Kim)
   - Add a test case for hists filtering (Namhyung Kim)
   - Share map_groups among threads of the same group (Arnaldo Carvalho
     de Melo, Jiri Olsa)
   - Making some code (cpu node map and report parse callchain callback)
     global to be usable by upcomming changes (Don Zickus)
   - Fix pmu object compilation error (Jiri Olsa)

  Kernel side changes:
   - intrusive uprobes fixes from Oleg Nesterov.  Since the interface is
     admin-only, and the bug only affects user-space ("any probed
     jmp/call can kill the application"), we queued these fixes via the
     development tree, as a special exception.
   - more fuzzer motivated race fixes and related refactoring and
     robustization.
   - allow PMU drivers to be built as modules.  (No actual module yet,
     because the x86 Intel uncore module wasn't ready in time for this)"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (114 commits)
  perf tools: Add automatic remapping of Android libraries
  perf tools: Add cat as fallback pager
  perf tests: Add a testcase for histogram output sorting
  perf tests: Factor out print_hists_*()
  perf tools: Introduce reset_output_field()
  perf tools: Get rid of obsolete hist_entry__sort_list
  perf hists: Reset width of output fields with header length
  perf tools: Skip elided sort entries
  perf top: Add --fields option to specify output fields
  perf report/tui: Fix a bug when --fields/sort is given
  perf tools: Add ->sort() member to struct sort_entry
  perf report: Add -F option to specify output fields
  perf tools: Call perf_hpp__init() before setting up GUI browsers
  perf tools: Consolidate management of default sort orders
  perf tools: Allow hpp fields to be sort keys
  perf ui: Get rid of callback from __hpp__fmt()
  perf tools: Consolidate output field handling to hpp format routines
  perf tools: Use hpp formats to sort final output
  perf tools: Support event grouping in hpp ->sort()
  perf tools: Use hpp formats to sort hist entries
  ...
This commit is contained in:
Linus Torvalds
2014-06-03 13:18:00 -07:00
123 changed files with 4602 additions and 1729 deletions

View File

@@ -39,6 +39,7 @@
#include <linux/hw_breakpoint.h>
#include <linux/mm_types.h>
#include <linux/cgroup.h>
#include <linux/module.h>
#include "internal.h"
@@ -1677,6 +1678,8 @@ event_sched_in(struct perf_event *event,
u64 tstamp = perf_event_time(event);
int ret = 0;
lockdep_assert_held(&ctx->lock);
if (event->state <= PERF_EVENT_STATE_OFF)
return 0;
@@ -3244,9 +3247,13 @@ static void __free_event(struct perf_event *event)
if (event->ctx)
put_ctx(event->ctx);
if (event->pmu)
module_put(event->pmu->module);
call_rcu(&event->rcu_head, free_event_rcu);
}
static void free_event(struct perf_event *event)
static void _free_event(struct perf_event *event)
{
irq_work_sync(&event->pending);
@@ -3267,42 +3274,31 @@ static void free_event(struct perf_event *event)
if (is_cgroup_event(event))
perf_detach_cgroup(event);
__free_event(event);
}
int perf_event_release_kernel(struct perf_event *event)
/*
* Used to free events which have a known refcount of 1, such as in error paths
* where the event isn't exposed yet and inherited events.
*/
static void free_event(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1,
"unexpected event refcount: %ld; ptr=%p\n",
atomic_long_read(&event->refcount), event)) {
/* leak to avoid use-after-free */
return;
}
WARN_ON_ONCE(ctx->parent_ctx);
/*
* There are two ways this annotation is useful:
*
* 1) there is a lock recursion from perf_event_exit_task
* see the comment there.
*
* 2) there is a lock-inversion with mmap_sem through
* perf_event_read_group(), which takes faults while
* holding ctx->mutex, however this is called after
* the last filedesc died, so there is no possibility
* to trigger the AB-BA case.
*/
mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
perf_remove_from_context(event, true);
mutex_unlock(&ctx->mutex);
free_event(event);
return 0;
_free_event(event);
}
EXPORT_SYMBOL_GPL(perf_event_release_kernel);
/*
* Called when the last reference to the file is gone.
*/
static void put_event(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
struct task_struct *owner;
if (!atomic_long_dec_and_test(&event->refcount))
@@ -3341,9 +3337,33 @@ static void put_event(struct perf_event *event)
put_task_struct(owner);
}
perf_event_release_kernel(event);
WARN_ON_ONCE(ctx->parent_ctx);
/*
* There are two ways this annotation is useful:
*
* 1) there is a lock recursion from perf_event_exit_task
* see the comment there.
*
* 2) there is a lock-inversion with mmap_sem through
* perf_event_read_group(), which takes faults while
* holding ctx->mutex, however this is called after
* the last filedesc died, so there is no possibility
* to trigger the AB-BA case.
*/
mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
perf_remove_from_context(event, true);
mutex_unlock(&ctx->mutex);
_free_event(event);
}
int perf_event_release_kernel(struct perf_event *event)
{
put_event(event);
return 0;
}
EXPORT_SYMBOL_GPL(perf_event_release_kernel);
static int perf_release(struct inode *inode, struct file *file)
{
put_event(file->private_data);
@@ -6578,6 +6598,7 @@ free_pdc:
free_percpu(pmu->pmu_disable_count);
goto unlock;
}
EXPORT_SYMBOL_GPL(perf_pmu_register);
void perf_pmu_unregister(struct pmu *pmu)
{
@@ -6599,6 +6620,7 @@ void perf_pmu_unregister(struct pmu *pmu)
put_device(pmu->dev);
free_pmu_context(pmu);
}
EXPORT_SYMBOL_GPL(perf_pmu_unregister);
struct pmu *perf_init_event(struct perf_event *event)
{
@@ -6612,6 +6634,10 @@ struct pmu *perf_init_event(struct perf_event *event)
pmu = idr_find(&pmu_idr, event->attr.type);
rcu_read_unlock();
if (pmu) {
if (!try_module_get(pmu->module)) {
pmu = ERR_PTR(-ENODEV);
goto unlock;
}
event->pmu = pmu;
ret = pmu->event_init(event);
if (ret)
@@ -6620,6 +6646,10 @@ struct pmu *perf_init_event(struct perf_event *event)
}
list_for_each_entry_rcu(pmu, &pmus, entry) {
if (!try_module_get(pmu->module)) {
pmu = ERR_PTR(-ENODEV);
goto unlock;
}
event->pmu = pmu;
ret = pmu->event_init(event);
if (!ret)
@@ -6798,6 +6828,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
err_pmu:
if (event->destroy)
event->destroy(event);
module_put(pmu->module);
err_ns:
if (event->ns)
put_pid_ns(event->ns);
@@ -7067,20 +7098,26 @@ SYSCALL_DEFINE5(perf_event_open,
}
}
if (task && group_leader &&
group_leader->attr.inherit != attr.inherit) {
err = -EINVAL;
goto err_task;
}
get_online_cpus();
event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
NULL, NULL);
if (IS_ERR(event)) {
err = PTR_ERR(event);
goto err_task;
goto err_cpus;
}
if (flags & PERF_FLAG_PID_CGROUP) {
err = perf_cgroup_connect(pid, event, &attr, group_leader);
if (err) {
__free_event(event);
goto err_task;
goto err_cpus;
}
}
@@ -7242,8 +7279,9 @@ err_context:
put_ctx(ctx);
err_alloc:
free_event(event);
err_task:
err_cpus:
put_online_cpus();
err_task:
if (task)
put_task_struct(task);
err_group_fd:
@@ -7379,7 +7417,7 @@ __perf_event_exit_task(struct perf_event *child_event,
struct perf_event_context *child_ctx,
struct task_struct *child)
{
perf_remove_from_context(child_event, !!child_event->parent);
perf_remove_from_context(child_event, true);
/*
* It can happen that the parent exits first, and has events
@@ -7394,7 +7432,7 @@ __perf_event_exit_task(struct perf_event *child_event,
static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
{
struct perf_event *child_event, *tmp;
struct perf_event *child_event;
struct perf_event_context *child_ctx;
unsigned long flags;
@@ -7448,24 +7486,9 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
*/
mutex_lock(&child_ctx->mutex);
again:
list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups,
group_entry)
list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry)
__perf_event_exit_task(child_event, child_ctx, child);
list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups,
group_entry)
__perf_event_exit_task(child_event, child_ctx, child);
/*
* If the last event was a group event, it will have appended all
* its siblings to the list, but we obtained 'tmp' before that which
* will still point to the list head terminating the iteration.
*/
if (!list_empty(&child_ctx->pinned_groups) ||
!list_empty(&child_ctx->flexible_groups))
goto again;
mutex_unlock(&child_ctx->mutex);
put_ctx(child_ctx);

View File

@@ -60,8 +60,6 @@ static struct percpu_rw_semaphore dup_mmap_sem;
/* Have a copy of original instruction */
#define UPROBE_COPY_INSN 0
/* Can skip singlestep */
#define UPROBE_SKIP_SSTEP 1
struct uprobe {
struct rb_node rb_node; /* node in the rb tree */
@@ -491,12 +489,9 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
uprobe->offset = offset;
init_rwsem(&uprobe->register_rwsem);
init_rwsem(&uprobe->consumer_rwsem);
/* For now assume that the instruction need not be single-stepped */
__set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
/* add to uprobes_tree, sorted on inode:offset */
cur_uprobe = insert_uprobe(uprobe);
/* a uprobe exists for this inode:offset combination */
if (cur_uprobe) {
kfree(uprobe);
@@ -1628,20 +1623,6 @@ bool uprobe_deny_signal(void)
return true;
}
/*
* Avoid singlestepping the original instruction if the original instruction
* is a NOP or can be emulated.
*/
static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
{
if (test_bit(UPROBE_SKIP_SSTEP, &uprobe->flags)) {
if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
return true;
clear_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
}
return false;
}
static void mmf_recalc_uprobes(struct mm_struct *mm)
{
struct vm_area_struct *vma;
@@ -1868,13 +1849,13 @@ static void handle_swbp(struct pt_regs *regs)
handler_chain(uprobe, regs);
if (can_skip_sstep(uprobe, regs))
if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
goto out;
if (!pre_ssout(uprobe, regs, bp_vaddr))
return;
/* can_skip_sstep() succeeded, or restart if can't singlestep */
/* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
out:
put_uprobe(uprobe);
}
@@ -1886,10 +1867,11 @@ out:
static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
{
struct uprobe *uprobe;
int err = 0;
uprobe = utask->active_uprobe;
if (utask->state == UTASK_SSTEP_ACK)
arch_uprobe_post_xol(&uprobe->arch, regs);
err = arch_uprobe_post_xol(&uprobe->arch, regs);
else if (utask->state == UTASK_SSTEP_TRAPPED)
arch_uprobe_abort_xol(&uprobe->arch, regs);
else
@@ -1903,6 +1885,11 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
spin_lock_irq(&current->sighand->siglock);
recalc_sigpending(); /* see uprobe_deny_signal() */
spin_unlock_irq(&current->sighand->siglock);
if (unlikely(err)) {
uprobe_warn(current, "execute the probed insn, sending SIGILL.");
force_sig_info(SIGILL, SEND_SIG_FORCED, current);
}
}
/*

View File

@@ -1039,6 +1039,7 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
return ret;
}
EXPORT_SYMBOL_GPL(__hrtimer_start_range_ns);
/**
* hrtimer_start_range_ns - (re)start an hrtimer on the current CPU