Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates and fixes from Ingo Molnar: "These are almost all tooling updates: 'perf top', 'perf trace' and 'perf script' fixes and updates, an UAPI header sync with the merge window versions, license marker updates, much improved Sparc support from David Miller, and a number of fixes" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (66 commits) perf intel-pt/bts: Calculate cpumode for synthesized samples perf intel-pt: Insert callchain context into synthesized callchains perf tools: Don't clone maps from parent when synthesizing forks perf top: Start display thread earlier tools headers uapi: Update linux/if_link.h header copy tools headers uapi: Update linux/netlink.h header copy tools headers: Sync the various kvm.h header copies tools include uapi: Update linux/mmap.h copy perf trace beauty: Use the mmap flags table generated from headers perf beauty: Wire up the mmap flags table generator to the Makefile perf beauty: Add a generator for MAP_ mmap's flag constants tools include uapi: Update asound.h copy tools arch uapi: Update asm-generic/unistd.h and arm64 unistd.h copies tools include uapi: Update linux/fs.h copy perf callchain: Honour the ordering of PERF_CONTEXT_{USER,KERNEL,etc} perf cs-etm: Correct CPU mode for samples perf unwind: Take pgoff into account when reporting elf to libdwfl perf top: Do not use overwrite mode by default perf top: Allow disabling the overwrite mode perf trace: Beautify mount's first pathname arg ...
This commit is contained in:
@@ -139,6 +139,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i
|
||||
#include "arch/x86/annotate/instructions.c"
|
||||
#include "arch/powerpc/annotate/instructions.c"
|
||||
#include "arch/s390/annotate/instructions.c"
|
||||
#include "arch/sparc/annotate/instructions.c"
|
||||
|
||||
static struct arch architectures[] = {
|
||||
{
|
||||
@@ -170,6 +171,13 @@ static struct arch architectures[] = {
|
||||
.comment_char = '#',
|
||||
},
|
||||
},
|
||||
{
|
||||
.name = "sparc",
|
||||
.init = sparc__annotate_init,
|
||||
.objdump = {
|
||||
.comment_char = '#',
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
static void ins__delete(struct ins_operands *ops)
|
||||
|
@@ -962,16 +962,23 @@ s64 perf_event__process_auxtrace(struct perf_session *session,
|
||||
#define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ 64
|
||||
#define PERF_ITRACE_MAX_LAST_BRANCH_SZ 1024
|
||||
|
||||
void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
|
||||
void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
|
||||
bool no_sample)
|
||||
{
|
||||
synth_opts->instructions = true;
|
||||
synth_opts->branches = true;
|
||||
synth_opts->transactions = true;
|
||||
synth_opts->ptwrites = true;
|
||||
synth_opts->pwr_events = true;
|
||||
synth_opts->errors = true;
|
||||
synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
|
||||
synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
|
||||
if (no_sample) {
|
||||
synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS;
|
||||
synth_opts->period = 1;
|
||||
synth_opts->calls = true;
|
||||
} else {
|
||||
synth_opts->instructions = true;
|
||||
synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
|
||||
synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
|
||||
}
|
||||
synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
|
||||
synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
|
||||
synth_opts->initial_skip = 0;
|
||||
@@ -999,7 +1006,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
|
||||
}
|
||||
|
||||
if (!str) {
|
||||
itrace_synth_opts__set_default(synth_opts);
|
||||
itrace_synth_opts__set_default(synth_opts, false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -58,6 +58,7 @@ enum itrace_period_type {
|
||||
/**
|
||||
* struct itrace_synth_opts - AUX area tracing synthesis options.
|
||||
* @set: indicates whether or not options have been set
|
||||
* @default_no_sample: Default to no sampling.
|
||||
* @inject: indicates the event (not just the sample) must be fully synthesized
|
||||
* because 'perf inject' will write it out
|
||||
* @instructions: whether to synthesize 'instructions' events
|
||||
@@ -82,6 +83,7 @@ enum itrace_period_type {
|
||||
*/
|
||||
struct itrace_synth_opts {
|
||||
bool set;
|
||||
bool default_no_sample;
|
||||
bool inject;
|
||||
bool instructions;
|
||||
bool branches;
|
||||
@@ -528,7 +530,8 @@ int perf_event__process_auxtrace_error(struct perf_session *session,
|
||||
union perf_event *event);
|
||||
int itrace_parse_synth_opts(const struct option *opt, const char *str,
|
||||
int unset);
|
||||
void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts);
|
||||
void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
|
||||
bool no_sample);
|
||||
|
||||
size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp);
|
||||
void perf_session__auxtrace_error_inc(struct perf_session *session,
|
||||
|
@@ -244,6 +244,27 @@ static void cs_etm__free(struct perf_session *session)
|
||||
zfree(&aux);
|
||||
}
|
||||
|
||||
static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
|
||||
{
|
||||
struct machine *machine;
|
||||
|
||||
machine = etmq->etm->machine;
|
||||
|
||||
if (address >= etmq->etm->kernel_start) {
|
||||
if (machine__is_host(machine))
|
||||
return PERF_RECORD_MISC_KERNEL;
|
||||
else
|
||||
return PERF_RECORD_MISC_GUEST_KERNEL;
|
||||
} else {
|
||||
if (machine__is_host(machine))
|
||||
return PERF_RECORD_MISC_USER;
|
||||
else if (perf_guest)
|
||||
return PERF_RECORD_MISC_GUEST_USER;
|
||||
else
|
||||
return PERF_RECORD_MISC_HYPERVISOR;
|
||||
}
|
||||
}
|
||||
|
||||
static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
|
||||
size_t size, u8 *buffer)
|
||||
{
|
||||
@@ -258,10 +279,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
|
||||
return -1;
|
||||
|
||||
machine = etmq->etm->machine;
|
||||
if (address >= etmq->etm->kernel_start)
|
||||
cpumode = PERF_RECORD_MISC_KERNEL;
|
||||
else
|
||||
cpumode = PERF_RECORD_MISC_USER;
|
||||
cpumode = cs_etm__cpu_mode(etmq, address);
|
||||
|
||||
thread = etmq->thread;
|
||||
if (!thread) {
|
||||
@@ -653,7 +671,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
|
||||
struct perf_sample sample = {.ip = 0,};
|
||||
|
||||
event->sample.header.type = PERF_RECORD_SAMPLE;
|
||||
event->sample.header.misc = PERF_RECORD_MISC_USER;
|
||||
event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
|
||||
event->sample.header.size = sizeof(struct perf_event_header);
|
||||
|
||||
sample.ip = addr;
|
||||
@@ -665,7 +683,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
|
||||
sample.cpu = etmq->packet->cpu;
|
||||
sample.flags = 0;
|
||||
sample.insn_len = 1;
|
||||
sample.cpumode = event->header.misc;
|
||||
sample.cpumode = event->sample.header.misc;
|
||||
|
||||
if (etm->synth_opts.last_branch) {
|
||||
cs_etm__copy_last_branch_rb(etmq);
|
||||
@@ -706,12 +724,15 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
|
||||
u64 nr;
|
||||
struct branch_entry entries;
|
||||
} dummy_bs;
|
||||
u64 ip;
|
||||
|
||||
ip = cs_etm__last_executed_instr(etmq->prev_packet);
|
||||
|
||||
event->sample.header.type = PERF_RECORD_SAMPLE;
|
||||
event->sample.header.misc = PERF_RECORD_MISC_USER;
|
||||
event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
|
||||
event->sample.header.size = sizeof(struct perf_event_header);
|
||||
|
||||
sample.ip = cs_etm__last_executed_instr(etmq->prev_packet);
|
||||
sample.ip = ip;
|
||||
sample.pid = etmq->pid;
|
||||
sample.tid = etmq->tid;
|
||||
sample.addr = cs_etm__first_executed_instr(etmq->packet);
|
||||
@@ -720,7 +741,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
|
||||
sample.period = 1;
|
||||
sample.cpu = etmq->packet->cpu;
|
||||
sample.flags = 0;
|
||||
sample.cpumode = PERF_RECORD_MISC_USER;
|
||||
sample.cpumode = event->sample.header.misc;
|
||||
|
||||
/*
|
||||
* perf report cannot handle events without a branch stack
|
||||
@@ -1432,7 +1453,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
|
||||
if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
|
||||
etm->synth_opts = *session->itrace_synth_opts;
|
||||
} else {
|
||||
itrace_synth_opts__set_default(&etm->synth_opts);
|
||||
itrace_synth_opts__set_default(&etm->synth_opts,
|
||||
session->itrace_synth_opts->default_no_sample);
|
||||
etm->synth_opts.callchain = false;
|
||||
}
|
||||
|
||||
|
@@ -63,6 +63,7 @@ struct perf_env {
|
||||
struct numa_node *numa_nodes;
|
||||
struct memory_node *memory_nodes;
|
||||
unsigned long long memory_bsize;
|
||||
u64 clockid_res_ns;
|
||||
};
|
||||
|
||||
extern struct perf_env perf_env;
|
||||
|
@@ -308,6 +308,7 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
|
||||
event->fork.pid = tgid;
|
||||
event->fork.tid = pid;
|
||||
event->fork.header.type = PERF_RECORD_FORK;
|
||||
event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC;
|
||||
|
||||
event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);
|
||||
|
||||
|
@@ -358,7 +358,7 @@ void perf_evlist__disable(struct perf_evlist *evlist)
|
||||
struct perf_evsel *pos;
|
||||
|
||||
evlist__for_each_entry(evlist, pos) {
|
||||
if (!perf_evsel__is_group_leader(pos) || !pos->fd)
|
||||
if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->fd)
|
||||
continue;
|
||||
perf_evsel__disable(pos);
|
||||
}
|
||||
|
@@ -232,6 +232,7 @@ void perf_evsel__init(struct perf_evsel *evsel,
|
||||
evsel->leader = evsel;
|
||||
evsel->unit = "";
|
||||
evsel->scale = 1.0;
|
||||
evsel->max_events = ULONG_MAX;
|
||||
evsel->evlist = NULL;
|
||||
evsel->bpf_fd = -1;
|
||||
INIT_LIST_HEAD(&evsel->node);
|
||||
@@ -793,6 +794,9 @@ static void apply_config_terms(struct perf_evsel *evsel,
|
||||
case PERF_EVSEL__CONFIG_TERM_MAX_STACK:
|
||||
max_stack = term->val.max_stack;
|
||||
break;
|
||||
case PERF_EVSEL__CONFIG_TERM_MAX_EVENTS:
|
||||
evsel->max_events = term->val.max_events;
|
||||
break;
|
||||
case PERF_EVSEL__CONFIG_TERM_INHERIT:
|
||||
/*
|
||||
* attr->inherit should has already been set by
|
||||
@@ -1203,16 +1207,27 @@ int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter)
|
||||
|
||||
int perf_evsel__enable(struct perf_evsel *evsel)
|
||||
{
|
||||
return perf_evsel__run_ioctl(evsel,
|
||||
PERF_EVENT_IOC_ENABLE,
|
||||
0);
|
||||
int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0);
|
||||
|
||||
if (!err)
|
||||
evsel->disabled = false;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int perf_evsel__disable(struct perf_evsel *evsel)
|
||||
{
|
||||
return perf_evsel__run_ioctl(evsel,
|
||||
PERF_EVENT_IOC_DISABLE,
|
||||
0);
|
||||
int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0);
|
||||
/*
|
||||
* We mark it disabled here so that tools that disable a event can
|
||||
* ignore events after they disable it. I.e. the ring buffer may have
|
||||
* already a few more events queued up before the kernel got the stop
|
||||
* request.
|
||||
*/
|
||||
if (!err)
|
||||
evsel->disabled = true;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
|
||||
|
@@ -46,6 +46,7 @@ enum term_type {
|
||||
PERF_EVSEL__CONFIG_TERM_STACK_USER,
|
||||
PERF_EVSEL__CONFIG_TERM_INHERIT,
|
||||
PERF_EVSEL__CONFIG_TERM_MAX_STACK,
|
||||
PERF_EVSEL__CONFIG_TERM_MAX_EVENTS,
|
||||
PERF_EVSEL__CONFIG_TERM_OVERWRITE,
|
||||
PERF_EVSEL__CONFIG_TERM_DRV_CFG,
|
||||
PERF_EVSEL__CONFIG_TERM_BRANCH,
|
||||
@@ -65,6 +66,7 @@ struct perf_evsel_config_term {
|
||||
bool inherit;
|
||||
bool overwrite;
|
||||
char *branch;
|
||||
unsigned long max_events;
|
||||
} val;
|
||||
bool weak;
|
||||
};
|
||||
@@ -99,6 +101,8 @@ struct perf_evsel {
|
||||
struct perf_counts *prev_raw_counts;
|
||||
int idx;
|
||||
u32 ids;
|
||||
unsigned long max_events;
|
||||
unsigned long nr_events_printed;
|
||||
char *name;
|
||||
double scale;
|
||||
const char *unit;
|
||||
@@ -119,6 +123,7 @@ struct perf_evsel {
|
||||
bool snapshot;
|
||||
bool supported;
|
||||
bool needs_swap;
|
||||
bool disabled;
|
||||
bool no_aux_samples;
|
||||
bool immediate;
|
||||
bool system_wide;
|
||||
|
@@ -29,6 +29,12 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
|
||||
#elif defined(__powerpc__)
|
||||
#define GEN_ELF_ARCH EM_PPC
|
||||
#define GEN_ELF_CLASS ELFCLASS32
|
||||
#elif defined(__sparc__) && defined(__arch64__)
|
||||
#define GEN_ELF_ARCH EM_SPARCV9
|
||||
#define GEN_ELF_CLASS ELFCLASS64
|
||||
#elif defined(__sparc__)
|
||||
#define GEN_ELF_ARCH EM_SPARC
|
||||
#define GEN_ELF_CLASS ELFCLASS32
|
||||
#else
|
||||
#error "unsupported architecture"
|
||||
#endif
|
||||
|
@@ -1034,6 +1034,13 @@ static int write_auxtrace(struct feat_fd *ff,
|
||||
return err;
|
||||
}
|
||||
|
||||
static int write_clockid(struct feat_fd *ff,
|
||||
struct perf_evlist *evlist __maybe_unused)
|
||||
{
|
||||
return do_write(ff, &ff->ph->env.clockid_res_ns,
|
||||
sizeof(ff->ph->env.clockid_res_ns));
|
||||
}
|
||||
|
||||
static int cpu_cache_level__sort(const void *a, const void *b)
|
||||
{
|
||||
struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
|
||||
@@ -1508,6 +1515,12 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
|
||||
fprintf(fp, "# Core ID and Socket ID information is not available\n");
|
||||
}
|
||||
|
||||
static void print_clockid(struct feat_fd *ff, FILE *fp)
|
||||
{
|
||||
fprintf(fp, "# clockid frequency: %"PRIu64" MHz\n",
|
||||
ff->ph->env.clockid_res_ns * 1000);
|
||||
}
|
||||
|
||||
static void free_event_desc(struct perf_evsel *events)
|
||||
{
|
||||
struct perf_evsel *evsel;
|
||||
@@ -2531,6 +2544,15 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int process_clockid(struct feat_fd *ff,
|
||||
void *data __maybe_unused)
|
||||
{
|
||||
if (do_read_u64(ff, &ff->ph->env.clockid_res_ns))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct feature_ops {
|
||||
int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
|
||||
void (*print)(struct feat_fd *ff, FILE *fp);
|
||||
@@ -2590,6 +2612,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
|
||||
FEAT_OPN(CACHE, cache, true),
|
||||
FEAT_OPR(SAMPLE_TIME, sample_time, false),
|
||||
FEAT_OPR(MEM_TOPOLOGY, mem_topology, true),
|
||||
FEAT_OPR(CLOCKID, clockid, false)
|
||||
};
|
||||
|
||||
struct header_print_data {
|
||||
|
@@ -38,6 +38,7 @@ enum {
|
||||
HEADER_CACHE,
|
||||
HEADER_SAMPLE_TIME,
|
||||
HEADER_MEM_TOPOLOGY,
|
||||
HEADER_CLOCKID,
|
||||
HEADER_LAST_FEATURE,
|
||||
HEADER_FEAT_BITS = 256,
|
||||
};
|
||||
|
@@ -269,6 +269,13 @@ static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline u8 intel_bts_cpumode(struct intel_bts *bts, uint64_t ip)
|
||||
{
|
||||
return machine__kernel_ip(bts->machine, ip) ?
|
||||
PERF_RECORD_MISC_KERNEL :
|
||||
PERF_RECORD_MISC_USER;
|
||||
}
|
||||
|
||||
static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
|
||||
struct branch *branch)
|
||||
{
|
||||
@@ -281,12 +288,8 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
|
||||
bts->num_events++ <= bts->synth_opts.initial_skip)
|
||||
return 0;
|
||||
|
||||
event.sample.header.type = PERF_RECORD_SAMPLE;
|
||||
event.sample.header.misc = PERF_RECORD_MISC_USER;
|
||||
event.sample.header.size = sizeof(struct perf_event_header);
|
||||
|
||||
sample.cpumode = PERF_RECORD_MISC_USER;
|
||||
sample.ip = le64_to_cpu(branch->from);
|
||||
sample.cpumode = intel_bts_cpumode(bts, sample.ip);
|
||||
sample.pid = btsq->pid;
|
||||
sample.tid = btsq->tid;
|
||||
sample.addr = le64_to_cpu(branch->to);
|
||||
@@ -298,6 +301,10 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
|
||||
sample.insn_len = btsq->intel_pt_insn.length;
|
||||
memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ);
|
||||
|
||||
event.sample.header.type = PERF_RECORD_SAMPLE;
|
||||
event.sample.header.misc = sample.cpumode;
|
||||
event.sample.header.size = sizeof(struct perf_event_header);
|
||||
|
||||
if (bts->synth_opts.inject) {
|
||||
event.sample.header.size = bts->branches_event_size;
|
||||
ret = perf_event__synthesize_sample(&event,
|
||||
@@ -910,7 +917,8 @@ int intel_bts_process_auxtrace_info(union perf_event *event,
|
||||
if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
|
||||
bts->synth_opts = *session->itrace_synth_opts;
|
||||
} else {
|
||||
itrace_synth_opts__set_default(&bts->synth_opts);
|
||||
itrace_synth_opts__set_default(&bts->synth_opts,
|
||||
session->itrace_synth_opts->default_no_sample);
|
||||
if (session->itrace_synth_opts)
|
||||
bts->synth_opts.thread_stack =
|
||||
session->itrace_synth_opts->thread_stack;
|
||||
|
@@ -407,6 +407,13 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
|
||||
return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
|
||||
}
|
||||
|
||||
static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip)
|
||||
{
|
||||
return ip >= pt->kernel_start ?
|
||||
PERF_RECORD_MISC_KERNEL :
|
||||
PERF_RECORD_MISC_USER;
|
||||
}
|
||||
|
||||
static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
|
||||
uint64_t *insn_cnt_ptr, uint64_t *ip,
|
||||
uint64_t to_ip, uint64_t max_insn_cnt,
|
||||
@@ -429,10 +436,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
|
||||
if (to_ip && *ip == to_ip)
|
||||
goto out_no_cache;
|
||||
|
||||
if (*ip >= ptq->pt->kernel_start)
|
||||
cpumode = PERF_RECORD_MISC_KERNEL;
|
||||
else
|
||||
cpumode = PERF_RECORD_MISC_USER;
|
||||
cpumode = intel_pt_cpumode(ptq->pt, *ip);
|
||||
|
||||
thread = ptq->thread;
|
||||
if (!thread) {
|
||||
@@ -759,7 +763,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
|
||||
if (pt->synth_opts.callchain) {
|
||||
size_t sz = sizeof(struct ip_callchain);
|
||||
|
||||
sz += pt->synth_opts.callchain_sz * sizeof(u64);
|
||||
/* Add 1 to callchain_sz for callchain context */
|
||||
sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
|
||||
ptq->chain = zalloc(sz);
|
||||
if (!ptq->chain)
|
||||
goto out_free;
|
||||
@@ -1058,15 +1063,11 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt,
|
||||
union perf_event *event,
|
||||
struct perf_sample *sample)
|
||||
{
|
||||
event->sample.header.type = PERF_RECORD_SAMPLE;
|
||||
event->sample.header.misc = PERF_RECORD_MISC_USER;
|
||||
event->sample.header.size = sizeof(struct perf_event_header);
|
||||
|
||||
if (!pt->timeless_decoding)
|
||||
sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
|
||||
|
||||
sample->cpumode = PERF_RECORD_MISC_USER;
|
||||
sample->ip = ptq->state->from_ip;
|
||||
sample->cpumode = intel_pt_cpumode(pt, sample->ip);
|
||||
sample->pid = ptq->pid;
|
||||
sample->tid = ptq->tid;
|
||||
sample->addr = ptq->state->to_ip;
|
||||
@@ -1075,6 +1076,10 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt,
|
||||
sample->flags = ptq->flags;
|
||||
sample->insn_len = ptq->insn_len;
|
||||
memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
|
||||
|
||||
event->sample.header.type = PERF_RECORD_SAMPLE;
|
||||
event->sample.header.misc = sample->cpumode;
|
||||
event->sample.header.size = sizeof(struct perf_event_header);
|
||||
}
|
||||
|
||||
static int intel_pt_inject_event(union perf_event *event,
|
||||
@@ -1160,7 +1165,8 @@ static void intel_pt_prep_sample(struct intel_pt *pt,
|
||||
|
||||
if (pt->synth_opts.callchain) {
|
||||
thread_stack__sample(ptq->thread, ptq->chain,
|
||||
pt->synth_opts.callchain_sz, sample->ip);
|
||||
pt->synth_opts.callchain_sz + 1,
|
||||
sample->ip, pt->kernel_start);
|
||||
sample->callchain = ptq->chain;
|
||||
}
|
||||
|
||||
@@ -2559,7 +2565,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
|
||||
if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
|
||||
pt->synth_opts = *session->itrace_synth_opts;
|
||||
} else {
|
||||
itrace_synth_opts__set_default(&pt->synth_opts);
|
||||
itrace_synth_opts__set_default(&pt->synth_opts,
|
||||
session->itrace_synth_opts->default_no_sample);
|
||||
if (use_browser != -1) {
|
||||
pt->synth_opts.branches = false;
|
||||
pt->synth_opts.callchain = true;
|
||||
|
@@ -1708,6 +1708,7 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
|
||||
struct thread *parent = machine__findnew_thread(machine,
|
||||
event->fork.ppid,
|
||||
event->fork.ptid);
|
||||
bool do_maps_clone = true;
|
||||
int err = 0;
|
||||
|
||||
if (dump_trace)
|
||||
@@ -1736,9 +1737,25 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
|
||||
|
||||
thread = machine__findnew_thread(machine, event->fork.pid,
|
||||
event->fork.tid);
|
||||
/*
|
||||
* When synthesizing FORK events, we are trying to create thread
|
||||
* objects for the already running tasks on the machine.
|
||||
*
|
||||
* Normally, for a kernel FORK event, we want to clone the parent's
|
||||
* maps because that is what the kernel just did.
|
||||
*
|
||||
* But when synthesizing, this should not be done. If we do, we end up
|
||||
* with overlapping maps as we process the sythesized MMAP2 events that
|
||||
* get delivered shortly thereafter.
|
||||
*
|
||||
* Use the FORK event misc flags in an internal way to signal this
|
||||
* situation, so we can elide the map clone when appropriate.
|
||||
*/
|
||||
if (event->fork.header.misc & PERF_RECORD_MISC_FORK_EXEC)
|
||||
do_maps_clone = false;
|
||||
|
||||
if (thread == NULL || parent == NULL ||
|
||||
thread__fork(thread, parent, sample->time) < 0) {
|
||||
thread__fork(thread, parent, sample->time, do_maps_clone) < 0) {
|
||||
dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
|
||||
err = -1;
|
||||
}
|
||||
@@ -2140,6 +2157,27 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
|
||||
struct callchain_cursor *cursor,
|
||||
struct symbol **parent,
|
||||
struct addr_location *root_al,
|
||||
u8 *cpumode, int ent)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
while (--ent >= 0) {
|
||||
u64 ip = chain->ips[ent];
|
||||
|
||||
if (ip >= PERF_CONTEXT_MAX) {
|
||||
err = add_callchain_ip(thread, cursor, parent,
|
||||
root_al, cpumode, ip,
|
||||
false, NULL, NULL, 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int thread__resolve_callchain_sample(struct thread *thread,
|
||||
struct callchain_cursor *cursor,
|
||||
struct perf_evsel *evsel,
|
||||
@@ -2246,6 +2284,12 @@ static int thread__resolve_callchain_sample(struct thread *thread,
|
||||
}
|
||||
|
||||
check_calls:
|
||||
if (callchain_param.order != ORDER_CALLEE) {
|
||||
err = find_prev_cpumode(chain, thread, cursor, parent, root_al,
|
||||
&cpumode, chain->nr - first_call);
|
||||
if (err)
|
||||
return (err < 0) ? err : 0;
|
||||
}
|
||||
for (i = first_call, nr_entries = 0;
|
||||
i < chain_nr && nr_entries < max_stack; i++) {
|
||||
u64 ip;
|
||||
@@ -2260,9 +2304,15 @@ check_calls:
|
||||
continue;
|
||||
#endif
|
||||
ip = chain->ips[j];
|
||||
|
||||
if (ip < PERF_CONTEXT_MAX)
|
||||
++nr_entries;
|
||||
else if (callchain_param.order != ORDER_CALLEE) {
|
||||
err = find_prev_cpumode(chain, thread, cursor, parent,
|
||||
root_al, &cpumode, j);
|
||||
if (err)
|
||||
return (err < 0) ? err : 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
err = add_callchain_ip(thread, cursor, parent,
|
||||
root_al, &cpumode, ip,
|
||||
|
@@ -926,6 +926,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
|
||||
[PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit",
|
||||
[PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit",
|
||||
[PARSE_EVENTS__TERM_TYPE_MAX_STACK] = "max-stack",
|
||||
[PARSE_EVENTS__TERM_TYPE_MAX_EVENTS] = "nr",
|
||||
[PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite",
|
||||
[PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite",
|
||||
[PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config",
|
||||
@@ -1037,6 +1038,9 @@ do { \
|
||||
case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
|
||||
CHECK_TYPE_VAL(NUM);
|
||||
break;
|
||||
case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
|
||||
CHECK_TYPE_VAL(NUM);
|
||||
break;
|
||||
default:
|
||||
err->str = strdup("unknown term");
|
||||
err->idx = term->err_term;
|
||||
@@ -1084,6 +1088,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
|
||||
case PARSE_EVENTS__TERM_TYPE_INHERIT:
|
||||
case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
|
||||
case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
|
||||
case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
|
||||
case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
|
||||
case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
|
||||
return config_term_common(attr, term, err);
|
||||
@@ -1162,6 +1167,9 @@ do { \
|
||||
case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
|
||||
ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num);
|
||||
break;
|
||||
case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
|
||||
ADD_CONFIG_TERM(MAX_EVENTS, max_events, term->val.num);
|
||||
break;
|
||||
case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
|
||||
ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0);
|
||||
break;
|
||||
|
@@ -71,6 +71,7 @@ enum {
|
||||
PARSE_EVENTS__TERM_TYPE_NOINHERIT,
|
||||
PARSE_EVENTS__TERM_TYPE_INHERIT,
|
||||
PARSE_EVENTS__TERM_TYPE_MAX_STACK,
|
||||
PARSE_EVENTS__TERM_TYPE_MAX_EVENTS,
|
||||
PARSE_EVENTS__TERM_TYPE_NOOVERWRITE,
|
||||
PARSE_EVENTS__TERM_TYPE_OVERWRITE,
|
||||
PARSE_EVENTS__TERM_TYPE_DRV_CFG,
|
||||
|
@@ -269,6 +269,7 @@ time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
|
||||
call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
|
||||
stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
|
||||
max-stack { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); }
|
||||
nr { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_EVENTS); }
|
||||
inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); }
|
||||
no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
|
||||
overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); }
|
||||
|
@@ -324,7 +324,17 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss)
|
||||
plt_entry_size = 16;
|
||||
break;
|
||||
|
||||
default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa need to be checked */
|
||||
case EM_SPARC:
|
||||
plt_header_size = 48;
|
||||
plt_entry_size = 12;
|
||||
break;
|
||||
|
||||
case EM_SPARCV9:
|
||||
plt_header_size = 128;
|
||||
plt_entry_size = 32;
|
||||
break;
|
||||
|
||||
default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */
|
||||
plt_header_size = shdr_plt.sh_entsize;
|
||||
plt_entry_size = shdr_plt.sh_entsize;
|
||||
break;
|
||||
|
@@ -123,7 +123,8 @@ struct symbol_conf {
|
||||
const char *vmlinux_name,
|
||||
*kallsyms_name,
|
||||
*source_prefix,
|
||||
*field_sep;
|
||||
*field_sep,
|
||||
*graph_function;
|
||||
const char *default_guest_vmlinux_name,
|
||||
*default_guest_kallsyms,
|
||||
*default_guest_modules;
|
||||
|
@@ -310,20 +310,46 @@ void thread_stack__free(struct thread *thread)
|
||||
}
|
||||
}
|
||||
|
||||
void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
|
||||
size_t sz, u64 ip)
|
||||
static inline u64 callchain_context(u64 ip, u64 kernel_start)
|
||||
{
|
||||
size_t i;
|
||||
return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
|
||||
}
|
||||
|
||||
if (!thread || !thread->ts)
|
||||
chain->nr = 1;
|
||||
else
|
||||
chain->nr = min(sz, thread->ts->cnt + 1);
|
||||
void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
|
||||
size_t sz, u64 ip, u64 kernel_start)
|
||||
{
|
||||
u64 context = callchain_context(ip, kernel_start);
|
||||
u64 last_context;
|
||||
size_t i, j;
|
||||
|
||||
chain->ips[0] = ip;
|
||||
if (sz < 2) {
|
||||
chain->nr = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 1; i < chain->nr; i++)
|
||||
chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr;
|
||||
chain->ips[0] = context;
|
||||
chain->ips[1] = ip;
|
||||
|
||||
if (!thread || !thread->ts) {
|
||||
chain->nr = 2;
|
||||
return;
|
||||
}
|
||||
|
||||
last_context = context;
|
||||
|
||||
for (i = 2, j = 1; i < sz && j <= thread->ts->cnt; i++, j++) {
|
||||
ip = thread->ts->stack[thread->ts->cnt - j].ret_addr;
|
||||
context = callchain_context(ip, kernel_start);
|
||||
if (context != last_context) {
|
||||
if (i >= sz - 1)
|
||||
break;
|
||||
chain->ips[i++] = context;
|
||||
last_context = context;
|
||||
}
|
||||
chain->ips[i] = ip;
|
||||
}
|
||||
|
||||
chain->nr = i;
|
||||
}
|
||||
|
||||
struct call_return_processor *
|
||||
|
@@ -84,7 +84,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
|
||||
u64 to_ip, u16 insn_len, u64 trace_nr);
|
||||
void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr);
|
||||
void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
|
||||
size_t sz, u64 ip);
|
||||
size_t sz, u64 ip, u64 kernel_start);
|
||||
int thread_stack__flush(struct thread *thread);
|
||||
void thread_stack__free(struct thread *thread);
|
||||
size_t thread_stack__depth(struct thread *thread);
|
||||
|
@@ -330,7 +330,8 @@ static int thread__prepare_access(struct thread *thread)
|
||||
}
|
||||
|
||||
static int thread__clone_map_groups(struct thread *thread,
|
||||
struct thread *parent)
|
||||
struct thread *parent,
|
||||
bool do_maps_clone)
|
||||
{
|
||||
/* This is new thread, we share map groups for process. */
|
||||
if (thread->pid_ == parent->pid_)
|
||||
@@ -341,15 +342,11 @@ static int thread__clone_map_groups(struct thread *thread,
|
||||
thread->pid_, thread->tid, parent->pid_, parent->tid);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* But this one is new process, copy maps. */
|
||||
if (map_groups__clone(thread, parent->mg) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
return do_maps_clone ? map_groups__clone(thread, parent->mg) : 0;
|
||||
}
|
||||
|
||||
int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
|
||||
int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone)
|
||||
{
|
||||
if (parent->comm_set) {
|
||||
const char *comm = thread__comm_str(parent);
|
||||
@@ -362,7 +359,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
|
||||
}
|
||||
|
||||
thread->ppid = parent->tid;
|
||||
return thread__clone_map_groups(thread, parent);
|
||||
return thread__clone_map_groups(thread, parent, do_maps_clone);
|
||||
}
|
||||
|
||||
void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
|
||||
|
@@ -42,6 +42,8 @@ struct thread {
|
||||
void *addr_space;
|
||||
struct unwind_libunwind_ops *unwind_libunwind_ops;
|
||||
#endif
|
||||
bool filter;
|
||||
int filter_entry_depth;
|
||||
};
|
||||
|
||||
struct machine;
|
||||
@@ -87,7 +89,7 @@ struct comm *thread__comm(const struct thread *thread);
|
||||
struct comm *thread__exec_comm(const struct thread *thread);
|
||||
const char *thread__comm_str(const struct thread *thread);
|
||||
int thread__insert_map(struct thread *thread, struct map *map);
|
||||
int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp);
|
||||
int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone);
|
||||
size_t thread__fprintf(struct thread *thread, FILE *fp);
|
||||
|
||||
struct thread *thread__main_thread(struct machine *machine, struct thread *thread);
|
||||
|
@@ -45,13 +45,13 @@ static int __report_module(struct addr_location *al, u64 ip,
|
||||
Dwarf_Addr s;
|
||||
|
||||
dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL);
|
||||
if (s != al->map->start)
|
||||
if (s != al->map->start - al->map->pgoff)
|
||||
mod = 0;
|
||||
}
|
||||
|
||||
if (!mod)
|
||||
mod = dwfl_report_elf(ui->dwfl, dso->short_name,
|
||||
(dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start,
|
||||
(dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start - al->map->pgoff,
|
||||
false);
|
||||
|
||||
return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1;
|
||||
|
Reference in New Issue
Block a user