Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into next
Pull perf updates from Ingo Molnar: "The tooling changes maintained by Jiri Olsa until Arnaldo is on vacation: User visible changes: - Add -F option for specifying output fields (Namhyung Kim) - Propagate exit status of a command line workload for record command (Namhyung Kim) - Use tid for finding thread (Namhyung Kim) - Clarify the output of perf sched map plus small sched command fixes (Dongsheng Yang) - Wire up perf_regs and unwind support for ARM64 (Jean Pihet) - Factor hists statistics counts processing which in turn also fixes several bugs in TUI report command (Namhyung Kim) - Add --percentage option to control absolute/relative percentage output (Namhyung Kim) - Add --list-cmds to 'kmem', 'mem', 'lock' and 'sched', for use by completion scripts (Ramkumar Ramachandra) Development/infrastructure changes and fixes: - Android related fixes for pager and map dso resolving (Michael Lentine) - Add libdw DWARF post unwind support for ARM (Jean Pihet) - Consolidate types.h for ARM and ARM64 (Jean Pihet) - Fix possible null pointer dereference in session.c (Masanari Iida) - Cleanup, remove unused variables in map_switch_event() (Dongsheng Yang) - Remove nr_state_machine_bugs in perf latency (Dongsheng Yang) - Remove usage of trace_sched_wakeup(.success) (Peter Zijlstra) - Cleanups for perf.h header (Jiri Olsa) - Consolidate types.h and export.h within tools (Borislav Petkov) - Move u64_swap union to its single user's header, evsel.h (Borislav Petkov) - Fix for s390 to properly parse tracepoints plus test code (Alexander Yarygin) - Handle EINTR error for readn/writen (Namhyung Kim) - Add a test case for hists filtering (Namhyung Kim) - Share map_groups among threads of the same group (Arnaldo Carvalho de Melo, Jiri Olsa) - Making some code (cpu node map and report parse callchain callback) global to be usable by upcomming changes (Don Zickus) - Fix pmu object compilation error (Jiri Olsa) Kernel side changes: - intrusive uprobes fixes from Oleg Nesterov. Since the interface is admin-only, and the bug only affects user-space ("any probed jmp/call can kill the application"), we queued these fixes via the development tree, as a special exception. - more fuzzer motivated race fixes and related refactoring and robustization. - allow PMU drivers to be built as modules. (No actual module yet, because the x86 Intel uncore module wasn't ready in time for this)" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (114 commits) perf tools: Add automatic remapping of Android libraries perf tools: Add cat as fallback pager perf tests: Add a testcase for histogram output sorting perf tests: Factor out print_hists_*() perf tools: Introduce reset_output_field() perf tools: Get rid of obsolete hist_entry__sort_list perf hists: Reset width of output fields with header length perf tools: Skip elided sort entries perf top: Add --fields option to specify output fields perf report/tui: Fix a bug when --fields/sort is given perf tools: Add ->sort() member to struct sort_entry perf report: Add -F option to specify output fields perf tools: Call perf_hpp__init() before setting up GUI browsers perf tools: Consolidate management of default sort orders perf tools: Allow hpp fields to be sort keys perf ui: Get rid of callback from __hpp__fmt() perf tools: Consolidate output field handling to hpp format routines perf tools: Use hpp formats to sort final output perf tools: Support event grouping in hpp ->sort() perf tools: Use hpp formats to sort hist entries ...
This commit is contained in:
@@ -39,6 +39,7 @@
|
||||
#include <linux/hw_breakpoint.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
@@ -1677,6 +1678,8 @@ event_sched_in(struct perf_event *event,
|
||||
u64 tstamp = perf_event_time(event);
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held(&ctx->lock);
|
||||
|
||||
if (event->state <= PERF_EVENT_STATE_OFF)
|
||||
return 0;
|
||||
|
||||
@@ -3244,9 +3247,13 @@ static void __free_event(struct perf_event *event)
|
||||
if (event->ctx)
|
||||
put_ctx(event->ctx);
|
||||
|
||||
if (event->pmu)
|
||||
module_put(event->pmu->module);
|
||||
|
||||
call_rcu(&event->rcu_head, free_event_rcu);
|
||||
}
|
||||
static void free_event(struct perf_event *event)
|
||||
|
||||
static void _free_event(struct perf_event *event)
|
||||
{
|
||||
irq_work_sync(&event->pending);
|
||||
|
||||
@@ -3267,42 +3274,31 @@ static void free_event(struct perf_event *event)
|
||||
if (is_cgroup_event(event))
|
||||
perf_detach_cgroup(event);
|
||||
|
||||
|
||||
__free_event(event);
|
||||
}
|
||||
|
||||
int perf_event_release_kernel(struct perf_event *event)
|
||||
/*
|
||||
* Used to free events which have a known refcount of 1, such as in error paths
|
||||
* where the event isn't exposed yet and inherited events.
|
||||
*/
|
||||
static void free_event(struct perf_event *event)
|
||||
{
|
||||
struct perf_event_context *ctx = event->ctx;
|
||||
if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1,
|
||||
"unexpected event refcount: %ld; ptr=%p\n",
|
||||
atomic_long_read(&event->refcount), event)) {
|
||||
/* leak to avoid use-after-free */
|
||||
return;
|
||||
}
|
||||
|
||||
WARN_ON_ONCE(ctx->parent_ctx);
|
||||
/*
|
||||
* There are two ways this annotation is useful:
|
||||
*
|
||||
* 1) there is a lock recursion from perf_event_exit_task
|
||||
* see the comment there.
|
||||
*
|
||||
* 2) there is a lock-inversion with mmap_sem through
|
||||
* perf_event_read_group(), which takes faults while
|
||||
* holding ctx->mutex, however this is called after
|
||||
* the last filedesc died, so there is no possibility
|
||||
* to trigger the AB-BA case.
|
||||
*/
|
||||
mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
|
||||
perf_remove_from_context(event, true);
|
||||
mutex_unlock(&ctx->mutex);
|
||||
|
||||
free_event(event);
|
||||
|
||||
return 0;
|
||||
_free_event(event);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_event_release_kernel);
|
||||
|
||||
/*
|
||||
* Called when the last reference to the file is gone.
|
||||
*/
|
||||
static void put_event(struct perf_event *event)
|
||||
{
|
||||
struct perf_event_context *ctx = event->ctx;
|
||||
struct task_struct *owner;
|
||||
|
||||
if (!atomic_long_dec_and_test(&event->refcount))
|
||||
@@ -3341,9 +3337,33 @@ static void put_event(struct perf_event *event)
|
||||
put_task_struct(owner);
|
||||
}
|
||||
|
||||
perf_event_release_kernel(event);
|
||||
WARN_ON_ONCE(ctx->parent_ctx);
|
||||
/*
|
||||
* There are two ways this annotation is useful:
|
||||
*
|
||||
* 1) there is a lock recursion from perf_event_exit_task
|
||||
* see the comment there.
|
||||
*
|
||||
* 2) there is a lock-inversion with mmap_sem through
|
||||
* perf_event_read_group(), which takes faults while
|
||||
* holding ctx->mutex, however this is called after
|
||||
* the last filedesc died, so there is no possibility
|
||||
* to trigger the AB-BA case.
|
||||
*/
|
||||
mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
|
||||
perf_remove_from_context(event, true);
|
||||
mutex_unlock(&ctx->mutex);
|
||||
|
||||
_free_event(event);
|
||||
}
|
||||
|
||||
int perf_event_release_kernel(struct perf_event *event)
|
||||
{
|
||||
put_event(event);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_event_release_kernel);
|
||||
|
||||
static int perf_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
put_event(file->private_data);
|
||||
@@ -6578,6 +6598,7 @@ free_pdc:
|
||||
free_percpu(pmu->pmu_disable_count);
|
||||
goto unlock;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_pmu_register);
|
||||
|
||||
void perf_pmu_unregister(struct pmu *pmu)
|
||||
{
|
||||
@@ -6599,6 +6620,7 @@ void perf_pmu_unregister(struct pmu *pmu)
|
||||
put_device(pmu->dev);
|
||||
free_pmu_context(pmu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_pmu_unregister);
|
||||
|
||||
struct pmu *perf_init_event(struct perf_event *event)
|
||||
{
|
||||
@@ -6612,6 +6634,10 @@ struct pmu *perf_init_event(struct perf_event *event)
|
||||
pmu = idr_find(&pmu_idr, event->attr.type);
|
||||
rcu_read_unlock();
|
||||
if (pmu) {
|
||||
if (!try_module_get(pmu->module)) {
|
||||
pmu = ERR_PTR(-ENODEV);
|
||||
goto unlock;
|
||||
}
|
||||
event->pmu = pmu;
|
||||
ret = pmu->event_init(event);
|
||||
if (ret)
|
||||
@@ -6620,6 +6646,10 @@ struct pmu *perf_init_event(struct perf_event *event)
|
||||
}
|
||||
|
||||
list_for_each_entry_rcu(pmu, &pmus, entry) {
|
||||
if (!try_module_get(pmu->module)) {
|
||||
pmu = ERR_PTR(-ENODEV);
|
||||
goto unlock;
|
||||
}
|
||||
event->pmu = pmu;
|
||||
ret = pmu->event_init(event);
|
||||
if (!ret)
|
||||
@@ -6798,6 +6828,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
|
||||
err_pmu:
|
||||
if (event->destroy)
|
||||
event->destroy(event);
|
||||
module_put(pmu->module);
|
||||
err_ns:
|
||||
if (event->ns)
|
||||
put_pid_ns(event->ns);
|
||||
@@ -7067,20 +7098,26 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||
}
|
||||
}
|
||||
|
||||
if (task && group_leader &&
|
||||
group_leader->attr.inherit != attr.inherit) {
|
||||
err = -EINVAL;
|
||||
goto err_task;
|
||||
}
|
||||
|
||||
get_online_cpus();
|
||||
|
||||
event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
|
||||
NULL, NULL);
|
||||
if (IS_ERR(event)) {
|
||||
err = PTR_ERR(event);
|
||||
goto err_task;
|
||||
goto err_cpus;
|
||||
}
|
||||
|
||||
if (flags & PERF_FLAG_PID_CGROUP) {
|
||||
err = perf_cgroup_connect(pid, event, &attr, group_leader);
|
||||
if (err) {
|
||||
__free_event(event);
|
||||
goto err_task;
|
||||
goto err_cpus;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7242,8 +7279,9 @@ err_context:
|
||||
put_ctx(ctx);
|
||||
err_alloc:
|
||||
free_event(event);
|
||||
err_task:
|
||||
err_cpus:
|
||||
put_online_cpus();
|
||||
err_task:
|
||||
if (task)
|
||||
put_task_struct(task);
|
||||
err_group_fd:
|
||||
@@ -7379,7 +7417,7 @@ __perf_event_exit_task(struct perf_event *child_event,
|
||||
struct perf_event_context *child_ctx,
|
||||
struct task_struct *child)
|
||||
{
|
||||
perf_remove_from_context(child_event, !!child_event->parent);
|
||||
perf_remove_from_context(child_event, true);
|
||||
|
||||
/*
|
||||
* It can happen that the parent exits first, and has events
|
||||
@@ -7394,7 +7432,7 @@ __perf_event_exit_task(struct perf_event *child_event,
|
||||
|
||||
static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
|
||||
{
|
||||
struct perf_event *child_event, *tmp;
|
||||
struct perf_event *child_event;
|
||||
struct perf_event_context *child_ctx;
|
||||
unsigned long flags;
|
||||
|
||||
@@ -7448,24 +7486,9 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
|
||||
*/
|
||||
mutex_lock(&child_ctx->mutex);
|
||||
|
||||
again:
|
||||
list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups,
|
||||
group_entry)
|
||||
list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry)
|
||||
__perf_event_exit_task(child_event, child_ctx, child);
|
||||
|
||||
list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups,
|
||||
group_entry)
|
||||
__perf_event_exit_task(child_event, child_ctx, child);
|
||||
|
||||
/*
|
||||
* If the last event was a group event, it will have appended all
|
||||
* its siblings to the list, but we obtained 'tmp' before that which
|
||||
* will still point to the list head terminating the iteration.
|
||||
*/
|
||||
if (!list_empty(&child_ctx->pinned_groups) ||
|
||||
!list_empty(&child_ctx->flexible_groups))
|
||||
goto again;
|
||||
|
||||
mutex_unlock(&child_ctx->mutex);
|
||||
|
||||
put_ctx(child_ctx);
|
||||
|
@@ -60,8 +60,6 @@ static struct percpu_rw_semaphore dup_mmap_sem;
|
||||
|
||||
/* Have a copy of original instruction */
|
||||
#define UPROBE_COPY_INSN 0
|
||||
/* Can skip singlestep */
|
||||
#define UPROBE_SKIP_SSTEP 1
|
||||
|
||||
struct uprobe {
|
||||
struct rb_node rb_node; /* node in the rb tree */
|
||||
@@ -491,12 +489,9 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
|
||||
uprobe->offset = offset;
|
||||
init_rwsem(&uprobe->register_rwsem);
|
||||
init_rwsem(&uprobe->consumer_rwsem);
|
||||
/* For now assume that the instruction need not be single-stepped */
|
||||
__set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
|
||||
|
||||
/* add to uprobes_tree, sorted on inode:offset */
|
||||
cur_uprobe = insert_uprobe(uprobe);
|
||||
|
||||
/* a uprobe exists for this inode:offset combination */
|
||||
if (cur_uprobe) {
|
||||
kfree(uprobe);
|
||||
@@ -1628,20 +1623,6 @@ bool uprobe_deny_signal(void)
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Avoid singlestepping the original instruction if the original instruction
|
||||
* is a NOP or can be emulated.
|
||||
*/
|
||||
static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
|
||||
{
|
||||
if (test_bit(UPROBE_SKIP_SSTEP, &uprobe->flags)) {
|
||||
if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
|
||||
return true;
|
||||
clear_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static void mmf_recalc_uprobes(struct mm_struct *mm)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
@@ -1868,13 +1849,13 @@ static void handle_swbp(struct pt_regs *regs)
|
||||
|
||||
handler_chain(uprobe, regs);
|
||||
|
||||
if (can_skip_sstep(uprobe, regs))
|
||||
if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
|
||||
goto out;
|
||||
|
||||
if (!pre_ssout(uprobe, regs, bp_vaddr))
|
||||
return;
|
||||
|
||||
/* can_skip_sstep() succeeded, or restart if can't singlestep */
|
||||
/* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
|
||||
out:
|
||||
put_uprobe(uprobe);
|
||||
}
|
||||
@@ -1886,10 +1867,11 @@ out:
|
||||
static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
|
||||
{
|
||||
struct uprobe *uprobe;
|
||||
int err = 0;
|
||||
|
||||
uprobe = utask->active_uprobe;
|
||||
if (utask->state == UTASK_SSTEP_ACK)
|
||||
arch_uprobe_post_xol(&uprobe->arch, regs);
|
||||
err = arch_uprobe_post_xol(&uprobe->arch, regs);
|
||||
else if (utask->state == UTASK_SSTEP_TRAPPED)
|
||||
arch_uprobe_abort_xol(&uprobe->arch, regs);
|
||||
else
|
||||
@@ -1903,6 +1885,11 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
recalc_sigpending(); /* see uprobe_deny_signal() */
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
|
||||
if (unlikely(err)) {
|
||||
uprobe_warn(current, "execute the probed insn, sending SIGILL.");
|
||||
force_sig_info(SIGILL, SEND_SIG_FORCED, current);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -1039,6 +1039,7 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__hrtimer_start_range_ns);
|
||||
|
||||
/**
|
||||
* hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
|
||||
|
Reference in New Issue
Block a user