Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf tooling updates form Ingo Molnar:
 "A final batch of perf tooling changes: mostly fixes and small
  improvements"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (29 commits)
  perf session: Add comment for perf_session__register_idle_thread()
  perf thread-stack: Fix thread stack processing for the idle task
  perf thread-stack: Allocate an array of thread stacks
  perf thread-stack: Factor out thread_stack__init()
  perf thread-stack: Allow for a thread stack array
  perf thread-stack: Avoid direct reference to the thread's stack
  perf thread-stack: Tidy thread_stack__bottom() usage
  perf thread-stack: Simplify some code in thread_stack__process()
  tools gpio: Allow overriding CFLAGS
  tools power turbostat: Override CFLAGS assignments and add LDFLAGS to build command
  tools thermal tmon: Allow overriding CFLAGS assignments
  tools power x86_energy_perf_policy: Override CFLAGS assignments and add LDFLAGS to build command
  perf c2c: Increase the HITM ratio limit for displayed cachelines
  perf c2c: Change the default coalesce setup
  perf trace beauty ioctl: Beautify USBDEVFS_ commands
  perf trace beauty: Export function to get the files for a thread
  perf trace: Wire up ioctl's USBDEBFS_ cmd table generator
  perf beauty ioctl: Add generator for USBDEVFS_ ioctl commands
  tools headers uapi: Grab a copy of usbdevice_fs.h
  perf trace: Store the major number for a file when storing its pathname
  ...
This commit is contained in:
Linus Torvalds
2019-01-06 16:30:14 -08:00
27 changed files with 623 additions and 169 deletions

View File

@@ -13,3 +13,11 @@ const char *dump_insn(struct perf_insn *x __maybe_unused,
*lenp = 0;
return "?";
}
__weak
int arch_is_branch(const unsigned char *buf __maybe_unused,
size_t len __maybe_unused,
int x86_64 __maybe_unused)
{
return 0;
}

View File

@@ -20,4 +20,6 @@ struct perf_insn {
const char *dump_insn(struct perf_insn *x, u64 ip,
u8 *inbuf, int inlen, int *lenp);
int arch_is_branch(const unsigned char *buf, size_t len, int x86_64);
#endif

View File

@@ -451,7 +451,7 @@ static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
continue;
intel_bts_get_branch_type(btsq, branch);
if (btsq->bts->synth_opts.thread_stack)
thread_stack__event(thread, btsq->sample_flags,
thread_stack__event(thread, btsq->cpu, btsq->sample_flags,
le64_to_cpu(branch->from),
le64_to_cpu(branch->to),
btsq->intel_pt_insn.length,
@@ -523,7 +523,7 @@ static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
!btsq->bts->synth_opts.thread_stack && thread &&
(!old_buffer || btsq->bts->sampling_mode ||
(btsq->bts->snapshot_mode && !buffer->consecutive)))
thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1);
thread_stack__set_trace_nr(thread, btsq->cpu, buffer->buffer_nr + 1);
err = intel_bts_process_buffer(btsq, buffer, thread);

View File

@@ -180,6 +180,14 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
return 0;
}
int arch_is_branch(const unsigned char *buf, size_t len, int x86_64)
{
struct intel_pt_insn in;
if (intel_pt_get_insn(buf, len, x86_64, &in) < 0)
return -1;
return in.branch != INTEL_PT_BR_NO_BRANCH;
}
const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused,
u8 *inbuf, int inlen, int *lenp)
{

View File

@@ -1174,7 +1174,7 @@ static void intel_pt_prep_sample(struct intel_pt *pt,
intel_pt_prep_b_sample(pt, ptq, event, sample);
if (pt->synth_opts.callchain) {
thread_stack__sample(ptq->thread, ptq->chain,
thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
pt->synth_opts.callchain_sz + 1,
sample->ip, pt->kernel_start);
sample->callchain = ptq->chain;
@@ -1526,11 +1526,11 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
return 0;
if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip,
state->to_ip, ptq->insn_len,
state->trace_nr);
else
thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr);
if (pt->sample_branches) {
err = intel_pt_synth_branch_sample(ptq);

View File

@@ -939,7 +939,8 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
file = PyFile_FromFile(fp, "perf", "r", NULL);
#else
file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1, NULL, NULL, NULL, 1);
file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1,
NULL, NULL, NULL, 0);
#endif
if (file == NULL)
goto free_list;

View File

@@ -1527,6 +1527,13 @@ struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
return machine__findnew_thread(&session->machines.host, -1, pid);
}
/*
* Threads are identified by pid and tid, and the idle task has pid == tid == 0.
* So here a single thread is created for that, but actually there is a separate
* idle task per cpu, so there should be one 'struct thread' per cpu, but there
* is only 1. That causes problems for some tools, requiring workarounds. For
* example get_idle_thread() in builtin-sched.c, or thread_stack__per_cpu().
*/
int perf_session__register_idle_thread(struct perf_session *session)
{
struct thread *thread;

View File

@@ -15,6 +15,7 @@
#include <linux/rbtree.h>
#include <linux/list.h>
#include <linux/log2.h>
#include <errno.h>
#include "thread.h"
#include "event.h"
@@ -60,6 +61,7 @@ struct thread_stack_entry {
* @last_time: last timestamp
* @crp: call/return processor
* @comm: current comm
* @arr_sz: size of array if this is the first element of an array
*/
struct thread_stack {
struct thread_stack_entry *stack;
@@ -71,8 +73,19 @@ struct thread_stack {
u64 last_time;
struct call_return_processor *crp;
struct comm *comm;
unsigned int arr_sz;
};
/*
* Assume pid == tid == 0 identifies the idle task as defined by
* perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
* and therefore requires a stack for each cpu.
*/
static inline bool thread_stack__per_cpu(struct thread *thread)
{
return !(thread->tid || thread->pid_);
}
static int thread_stack__grow(struct thread_stack *ts)
{
struct thread_stack_entry *new_stack;
@@ -91,19 +104,14 @@ static int thread_stack__grow(struct thread_stack *ts)
return 0;
}
static struct thread_stack *thread_stack__new(struct thread *thread,
struct call_return_processor *crp)
static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
struct call_return_processor *crp)
{
struct thread_stack *ts;
int err;
ts = zalloc(sizeof(struct thread_stack));
if (!ts)
return NULL;
if (thread_stack__grow(ts)) {
free(ts);
return NULL;
}
err = thread_stack__grow(ts);
if (err)
return err;
if (thread->mg && thread->mg->machine)
ts->kernel_start = machine__kernel_start(thread->mg->machine);
@@ -111,9 +119,72 @@ static struct thread_stack *thread_stack__new(struct thread *thread,
ts->kernel_start = 1ULL << 63;
ts->crp = crp;
return 0;
}
static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
struct call_return_processor *crp)
{
struct thread_stack *ts = thread->ts, *new_ts;
unsigned int old_sz = ts ? ts->arr_sz : 0;
unsigned int new_sz = 1;
if (thread_stack__per_cpu(thread) && cpu > 0)
new_sz = roundup_pow_of_two(cpu + 1);
if (!ts || new_sz > old_sz) {
new_ts = calloc(new_sz, sizeof(*ts));
if (!new_ts)
return NULL;
if (ts)
memcpy(new_ts, ts, old_sz * sizeof(*ts));
new_ts->arr_sz = new_sz;
zfree(&thread->ts);
thread->ts = new_ts;
ts = new_ts;
}
if (thread_stack__per_cpu(thread) && cpu > 0 &&
(unsigned int)cpu < ts->arr_sz)
ts += cpu;
if (!ts->stack &&
thread_stack__init(ts, thread, crp))
return NULL;
return ts;
}
static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
{
struct thread_stack *ts = thread->ts;
if (cpu < 0)
cpu = 0;
if (!ts || (unsigned int)cpu >= ts->arr_sz)
return NULL;
ts += cpu;
if (!ts->stack)
return NULL;
return ts;
}
static inline struct thread_stack *thread__stack(struct thread *thread,
int cpu)
{
if (!thread)
return NULL;
if (thread_stack__per_cpu(thread))
return thread__cpu_stack(thread, cpu);
return thread->ts;
}
static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
bool trace_end)
{
@@ -226,25 +297,37 @@ static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
int thread_stack__flush(struct thread *thread)
{
if (thread->ts)
return __thread_stack__flush(thread, thread->ts);
struct thread_stack *ts = thread->ts;
unsigned int pos;
int err = 0;
return 0;
if (ts) {
for (pos = 0; pos < ts->arr_sz; pos++) {
int ret = __thread_stack__flush(thread, ts + pos);
if (ret)
err = ret;
}
}
return err;
}
int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
u64 to_ip, u16 insn_len, u64 trace_nr)
{
struct thread_stack *ts = thread__stack(thread, cpu);
if (!thread)
return -EINVAL;
if (!thread->ts) {
thread->ts = thread_stack__new(thread, NULL);
if (!thread->ts) {
if (!ts) {
ts = thread_stack__new(thread, cpu, NULL);
if (!ts) {
pr_warning("Out of memory: no thread stack\n");
return -ENOMEM;
}
thread->ts->trace_nr = trace_nr;
ts->trace_nr = trace_nr;
}
/*
@@ -252,14 +335,14 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
* the stack might be completely invalid. Better to report nothing than
* to report something misleading, so flush the stack.
*/
if (trace_nr != thread->ts->trace_nr) {
if (thread->ts->trace_nr)
__thread_stack__flush(thread, thread->ts);
thread->ts->trace_nr = trace_nr;
if (trace_nr != ts->trace_nr) {
if (ts->trace_nr)
__thread_stack__flush(thread, ts);
ts->trace_nr = trace_nr;
}
/* Stop here if thread_stack__process() is in use */
if (thread->ts->crp)
if (ts->crp)
return 0;
if (flags & PERF_IP_FLAG_CALL) {
@@ -270,7 +353,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
ret_addr = from_ip + insn_len;
if (ret_addr == to_ip)
return 0; /* Zero-length calls are excluded */
return thread_stack__push(thread->ts, ret_addr,
return thread_stack__push(ts, ret_addr,
flags & PERF_IP_FLAG_TRACE_END);
} else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
/*
@@ -280,32 +363,52 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
* address, so try to pop that. Also, do not expect a call made
* when the trace ended, to return, so pop that.
*/
thread_stack__pop(thread->ts, to_ip);
thread_stack__pop_trace_end(thread->ts);
thread_stack__pop(ts, to_ip);
thread_stack__pop_trace_end(ts);
} else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
thread_stack__pop(thread->ts, to_ip);
thread_stack__pop(ts, to_ip);
}
return 0;
}
void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
{
if (!thread || !thread->ts)
struct thread_stack *ts = thread__stack(thread, cpu);
if (!ts)
return;
if (trace_nr != thread->ts->trace_nr) {
if (thread->ts->trace_nr)
__thread_stack__flush(thread, thread->ts);
thread->ts->trace_nr = trace_nr;
if (trace_nr != ts->trace_nr) {
if (ts->trace_nr)
__thread_stack__flush(thread, ts);
ts->trace_nr = trace_nr;
}
}
static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
{
__thread_stack__flush(thread, ts);
zfree(&ts->stack);
}
static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
{
unsigned int arr_sz = ts->arr_sz;
__thread_stack__free(thread, ts);
memset(ts, 0, sizeof(*ts));
ts->arr_sz = arr_sz;
}
void thread_stack__free(struct thread *thread)
{
if (thread->ts) {
__thread_stack__flush(thread, thread->ts);
zfree(&thread->ts->stack);
struct thread_stack *ts = thread->ts;
unsigned int pos;
if (ts) {
for (pos = 0; pos < ts->arr_sz; pos++)
__thread_stack__free(thread, ts + pos);
zfree(&thread->ts);
}
}
@@ -315,9 +418,11 @@ static inline u64 callchain_context(u64 ip, u64 kernel_start)
return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
}
void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
void thread_stack__sample(struct thread *thread, int cpu,
struct ip_callchain *chain,
size_t sz, u64 ip, u64 kernel_start)
{
struct thread_stack *ts = thread__stack(thread, cpu);
u64 context = callchain_context(ip, kernel_start);
u64 last_context;
size_t i, j;
@@ -330,15 +435,15 @@ void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
chain->ips[0] = context;
chain->ips[1] = ip;
if (!thread || !thread->ts) {
if (!ts) {
chain->nr = 2;
return;
}
last_context = context;
for (i = 2, j = 1; i < sz && j <= thread->ts->cnt; i++, j++) {
ip = thread->ts->stack[thread->ts->cnt - j].ret_addr;
for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) {
ip = ts->stack[ts->cnt - j].ret_addr;
context = callchain_context(ip, kernel_start);
if (context != last_context) {
if (i >= sz - 1)
@@ -449,7 +554,7 @@ static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
return 1;
}
static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
static int thread_stack__bottom(struct thread_stack *ts,
struct perf_sample *sample,
struct addr_location *from_al,
struct addr_location *to_al, u64 ref)
@@ -474,7 +579,7 @@ static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
if (!cp)
return -ENOMEM;
return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp,
return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
true, false);
}
@@ -590,24 +695,19 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
struct addr_location *to_al, u64 ref,
struct call_return_processor *crp)
{
struct thread_stack *ts = thread->ts;
struct thread_stack *ts = thread__stack(thread, sample->cpu);
int err = 0;
if (ts) {
if (!ts->crp) {
/* Supersede thread_stack__event() */
thread_stack__free(thread);
thread->ts = thread_stack__new(thread, crp);
if (!thread->ts)
return -ENOMEM;
ts = thread->ts;
ts->comm = comm;
}
} else {
thread->ts = thread_stack__new(thread, crp);
if (!thread->ts)
if (ts && !ts->crp) {
/* Supersede thread_stack__event() */
thread_stack__reset(thread, ts);
ts = NULL;
}
if (!ts) {
ts = thread_stack__new(thread, sample->cpu, crp);
if (!ts)
return -ENOMEM;
ts = thread->ts;
ts->comm = comm;
}
@@ -621,8 +721,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
/* If the stack is empty, put the current symbol on the stack */
if (!ts->cnt) {
err = thread_stack__bottom(thread, ts, sample, from_al, to_al,
ref);
err = thread_stack__bottom(ts, sample, from_al, to_al, ref);
if (err)
return err;
}
@@ -671,9 +770,11 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
return err;
}
size_t thread_stack__depth(struct thread *thread)
size_t thread_stack__depth(struct thread *thread, int cpu)
{
if (!thread->ts)
struct thread_stack *ts = thread__stack(thread, cpu);
if (!ts)
return 0;
return thread->ts->cnt;
return ts->cnt;
}

View File

@@ -80,14 +80,14 @@ struct call_return_processor {
void *data;
};
int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
u64 to_ip, u16 insn_len, u64 trace_nr);
void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr);
void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr);
void thread_stack__sample(struct thread *thread, int cpu, struct ip_callchain *chain,
size_t sz, u64 ip, u64 kernel_start);
int thread_stack__flush(struct thread *thread);
void thread_stack__free(struct thread *thread);
size_t thread_stack__depth(struct thread *thread);
size_t thread_stack__depth(struct thread *thread, int cpu);
struct call_return_processor *
call_return_processor__new(int (*process)(struct call_return *cr, void *data),