Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "Kernel side changes: - Clean up the x86 instruction decoder (Masami Hiramatsu) - Add new uprobes optimization for PUSH instructions on x86 (Yonghong Song) - Add MSR_IA32_THERM_STATUS to the MSR events (Stephane Eranian) - Fix misc bugs, update documentation, plus various cleanups (Jiri Olsa) There's a large number of tooling side improvements: - Intel-PT/BTS improvements (Adrian Hunter) - Numerous 'perf trace' improvements (Arnaldo Carvalho de Melo) - Introduce an errno code to string facility (Hendrik Brueckner) - Various build system improvements (Jiri Olsa) - Add support for CoreSight trace decoding by making the perf tools use the external openCSD (Mathieu Poirier, Tor Jeremiassen) - Add ARM Statistical Profiling Extensions (SPE) support (Kim Phillips) - libtraceevent updates (Steven Rostedt) - Intel vendor event JSON updates (Andi Kleen) - Introduce 'perf report --mmaps' and 'perf report --tasks' to show info present in 'perf.data' (Jiri Olsa, Arnaldo Carvalho de Melo) - Add infrastructure to record first and last sample time to the perf.data file header, so that when processing all samples in a 'perf record' session, such as when doing build-id processing, or when specifically requesting that that info be recorded, use that in 'perf report --time', that also got support for percent slices in addition to absolute ones. I.e. now it is possible to ask for the samples in the 10%-20% time slice of a perf.data file (Jin Yao) - Allow system wide 'perf stat --per-thread', sorting the result (Jin Yao) E.g.: [root@jouet ~]# perf stat --per-thread --metrics IPC ^C Performance counter stats for 'system wide': make-22229 23,012,094,032 inst_retired.any # 0.8 IPC cc1-22419 692,027,497 inst_retired.any # 0.8 IPC gcc-22418 328,231,855 inst_retired.any # 0.9 IPC cc1-22509 220,853,647 inst_retired.any # 0.8 IPC gcc-22486 199,874,810 inst_retired.any # 1.0 IPC as-22466 177,896,365 inst_retired.any # 0.9 IPC cc1-22465 150,732,374 inst_retired.any # 0.8 IPC gcc-22508 112,555,593 inst_retired.any # 0.9 IPC cc1-22487 108,964,079 inst_retired.any # 0.7 IPC qemu-system-x86-2697 21,330,550 inst_retired.any # 0.3 IPC systemd-journal-551 20,642,951 inst_retired.any # 0.4 IPC docker-containe-17651 9,552,892 inst_retired.any # 0.5 IPC dockerd-current-9809 7,528,586 inst_retired.any # 0.5 IPC make-22153 12,504,194,380 inst_retired.any # 0.8 IPC python2-22429 12,081,290,954 inst_retired.any # 0.8 IPC <SNIP> python2-22429 15,026,328,103 cpu_clk_unhalted.thread cc1-22419 826,660,193 cpu_clk_unhalted.thread gcc-22418 365,321,295 cpu_clk_unhalted.thread cc1-22509 279,169,362 cpu_clk_unhalted.thread gcc-22486 210,156,950 cpu_clk_unhalted.thread <SNIP> 5.638075538 seconds time elapsed [root@jouet ~]# - Improve shell auto-completion of perf events (Jin Yao) - 'perf probe' improvements (Masami Hiramatsu) - Improve PMU infrastructure to support amp64's ThunderX2 implementation defined core events (Ganapatrao Kulkarni) - Various annotation related improvements and fixes (Thomas Richter) - Clarify usage of 'overwrite' and 'backward' in the evlist/mmap code, removing the 'overwrite' parameter from several functions as it was always used it as 'false' (Wang Nan) - Fix/improve 'perf record' reverse recording support (Wang Nan) - Improve command line options documentation (Sihyeon Jang) - Optimize sample parsing for ordering events, where we don't need to parse all the PERF_SAMPLE_ bits, just the ones leading to the timestamp needed to reorder events (Jiri Olsa) - Generalize the annotation code to support other source information besides objdump/DWARF obtained ones, starting with python scripts, that will is slated to be merged soon (Jiri Olsa) - ... and a lot more that I failed to list, see the shortlog and changelog for details" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (262 commits) perf trace beauty flock: Move to separate object file perf evlist: Remove fcntl.h from evlist.h perf trace beauty futex: Beautify FUTEX_BITSET_MATCH_ANY perf trace: Do not print from time delta for interrupted syscall lines perf trace: Add --print-sample perf bpf: Remove misplaced __maybe_unused attribute MAINTAINERS: Adding entry for CoreSight trace decoding perf tools: Add mechanic to synthesise CoreSight trace packets perf tools: Add full support for CoreSight trace decoding pert tools: Add queue management functionality perf tools: Add functionality to communicate with the openCSD decoder perf tools: Add support for decoding CoreSight trace data perf tools: Add decoder mechanic to support dumping trace data perf tools: Add processing of coresight metadata perf tools: Add initial entry point for decoder CoreSight traces perf tools: Integrating the CoreSight decoding library perf vendor events intel: Update IvyTown files to V20 perf vendor events intel: Update IvyBridge files to V20 perf vendor events intel: Update BroadwellDE events to V7 perf vendor events intel: Update SkylakeX events to V1.06 ...
This commit is contained in:
@@ -5824,19 +5824,11 @@ void perf_output_sample(struct perf_output_handle *handle,
|
||||
perf_output_read(handle, event);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
|
||||
if (data->callchain) {
|
||||
int size = 1;
|
||||
int size = 1;
|
||||
|
||||
if (data->callchain)
|
||||
size += data->callchain->nr;
|
||||
|
||||
size *= sizeof(u64);
|
||||
|
||||
__output_copy(handle, data->callchain, size);
|
||||
} else {
|
||||
u64 nr = 0;
|
||||
perf_output_put(handle, nr);
|
||||
}
|
||||
size += data->callchain->nr;
|
||||
size *= sizeof(u64);
|
||||
__output_copy(handle, data->callchain, size);
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_RAW) {
|
||||
@@ -5989,6 +5981,26 @@ static u64 perf_virt_to_phys(u64 virt)
|
||||
return phys_addr;
|
||||
}
|
||||
|
||||
static struct perf_callchain_entry __empty_callchain = { .nr = 0, };
|
||||
|
||||
static struct perf_callchain_entry *
|
||||
perf_callchain(struct perf_event *event, struct pt_regs *regs)
|
||||
{
|
||||
bool kernel = !event->attr.exclude_callchain_kernel;
|
||||
bool user = !event->attr.exclude_callchain_user;
|
||||
/* Disallow cross-task user callchains. */
|
||||
bool crosstask = event->ctx->task && event->ctx->task != current;
|
||||
const u32 max_stack = event->attr.sample_max_stack;
|
||||
struct perf_callchain_entry *callchain;
|
||||
|
||||
if (!kernel && !user)
|
||||
return &__empty_callchain;
|
||||
|
||||
callchain = get_perf_callchain(regs, 0, kernel, user,
|
||||
max_stack, crosstask, true);
|
||||
return callchain ?: &__empty_callchain;
|
||||
}
|
||||
|
||||
void perf_prepare_sample(struct perf_event_header *header,
|
||||
struct perf_sample_data *data,
|
||||
struct perf_event *event,
|
||||
@@ -6011,9 +6023,7 @@ void perf_prepare_sample(struct perf_event_header *header,
|
||||
int size = 1;
|
||||
|
||||
data->callchain = perf_callchain(event, regs);
|
||||
|
||||
if (data->callchain)
|
||||
size += data->callchain->nr;
|
||||
size += data->callchain->nr;
|
||||
|
||||
header->size += size * sizeof(u64);
|
||||
}
|
||||
@@ -10740,6 +10750,19 @@ inherit_event(struct perf_event *parent_event,
|
||||
if (IS_ERR(child_event))
|
||||
return child_event;
|
||||
|
||||
|
||||
if ((child_event->attach_state & PERF_ATTACH_TASK_DATA) &&
|
||||
!child_ctx->task_ctx_data) {
|
||||
struct pmu *pmu = child_event->pmu;
|
||||
|
||||
child_ctx->task_ctx_data = kzalloc(pmu->task_ctx_size,
|
||||
GFP_KERNEL);
|
||||
if (!child_ctx->task_ctx_data) {
|
||||
free_event(child_event);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* is_orphaned_event() and list_add_tail(&parent_event->child_list)
|
||||
* must be under the same lock in order to serialize against
|
||||
@@ -10750,6 +10773,7 @@ inherit_event(struct perf_event *parent_event,
|
||||
if (is_orphaned_event(parent_event) ||
|
||||
!atomic_long_inc_not_zero(&parent_event->refcount)) {
|
||||
mutex_unlock(&parent_event->child_mutex);
|
||||
/* task_ctx_data is freed with child_ctx */
|
||||
free_event(child_event);
|
||||
return NULL;
|
||||
}
|
||||
|
Reference in New Issue
Block a user