perf tools: Optimize sample parsing for ordered events
Currently when using ordered events we parse the sample twice (the perf_evlist__parse_sample function). Once before we queue the sample for sorting: perf_session__process_event perf_evlist__parse_sample(sample) perf_session__queue_event(sample.time) And then when we deliver the sorted sample: ordered_events__deliver_event perf_evlist__parse_sample perf_session__deliver_event We can skip the initial full sample parsing by using perf_evlist__parse_sample_timestamp function, which got introduced earlier. The new path looks like: perf_session__process_event perf_evlist__parse_sample_timestamp perf_session__queue_event ordered_events__deliver_event perf_session__deliver_event perf_evlist__parse_sample It saves some instructions and is slightly faster: Before: Performance counter stats for './perf.old report --stdio' (5 runs): 64,396,007,225 cycles:u ( +- 0.97% ) 105,882,112,735 instructions:u # 1.64 insn per cycle ( +- 0.00% ) 21.618103465 seconds time elapsed ( +- 1.12% ) After: Performance counter stats for './perf report --stdio' (5 runs): 60,567,807,182 cycles:u ( +- 0.40% ) 104,853,333,514 instructions:u # 1.73 insn per cycle ( +- 0.00% ) 20.168895243 seconds time elapsed ( +- 0.32% ) Signed-off-by: Jiri Olsa <jolsa@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/n/tip-cjp2tuk0qkjs9dxzlpmm34ua@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:

committed by
Arnaldo Carvalho de Melo

parent
dc83e13940
commit
93d10af26b
@@ -741,20 +741,20 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
|
||||
u64 *mmap_time)
|
||||
{
|
||||
union perf_event *event;
|
||||
struct perf_sample sample;
|
||||
u64 timestamp;
|
||||
s64 n = 0;
|
||||
int err;
|
||||
|
||||
*mmap_time = ULLONG_MAX;
|
||||
while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) {
|
||||
err = perf_evlist__parse_sample(kvm->evlist, event, &sample);
|
||||
err = perf_evlist__parse_sample_timestamp(kvm->evlist, event, ×tamp);
|
||||
if (err) {
|
||||
perf_evlist__mmap_consume(kvm->evlist, idx);
|
||||
pr_err("Failed to parse sample\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = perf_session__queue_event(kvm->session, event, sample.time, 0);
|
||||
err = perf_session__queue_event(kvm->session, event, timestamp, 0);
|
||||
/*
|
||||
* FIXME: Here we can't consume the event, as perf_session__queue_event will
|
||||
* point to it, and it'll get possibly overwritten by the kernel.
|
||||
@@ -768,7 +768,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
|
||||
|
||||
/* save time stamp of our first sample for this mmap */
|
||||
if (n == 0)
|
||||
*mmap_time = sample.time;
|
||||
*mmap_time = timestamp;
|
||||
|
||||
/* limit events per mmap handled all at once */
|
||||
n++;
|
||||
|
Reference in New Issue
Block a user