perf tools: Optimize sample parsing for ordered events

Currently when using ordered events we parse the sample twice (the
perf_evlist__parse_sample function). Once before we queue the sample for
sorting:

  perf_session__process_event
    perf_evlist__parse_sample(sample)
    perf_session__queue_event(sample.time)

And then when we deliver the sorted sample:

  ordered_events__deliver_event
    perf_evlist__parse_sample
    perf_session__deliver_event

We can skip the initial full sample parsing by using
perf_evlist__parse_sample_timestamp function, which got introduced
earlier. The new path looks like:

  perf_session__process_event
    perf_evlist__parse_sample_timestamp
    perf_session__queue_event

  ordered_events__deliver_event
    perf_session__deliver_event
      perf_evlist__parse_sample

It saves some instructions and is slightly faster:

Before:
 Performance counter stats for './perf.old report --stdio' (5 runs):

    64,396,007,225      cycles:u                                                      ( +-  0.97% )
   105,882,112,735      instructions:u            #    1.64  insn per cycle           ( +-  0.00% )

      21.618103465 seconds time elapsed                                          ( +-  1.12% )

After:
 Performance counter stats for './perf report --stdio' (5 runs):

    60,567,807,182      cycles:u                                                      ( +-  0.40% )
   104,853,333,514      instructions:u            #    1.73  insn per cycle           ( +-  0.00% )

      20.168895243 seconds time elapsed                                          ( +-  0.32% )

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-cjp2tuk0qkjs9dxzlpmm34ua@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Jiri Olsa
2017-08-03 13:21:14 +02:00
committed by Arnaldo Carvalho de Melo
parent dc83e13940
commit 93d10af26b
2 changed files with 22 additions and 27 deletions

View File

@@ -741,20 +741,20 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
u64 *mmap_time)
{
union perf_event *event;
struct perf_sample sample;
u64 timestamp;
s64 n = 0;
int err;
*mmap_time = ULLONG_MAX;
while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) {
err = perf_evlist__parse_sample(kvm->evlist, event, &sample);
err = perf_evlist__parse_sample_timestamp(kvm->evlist, event, &timestamp);
if (err) {
perf_evlist__mmap_consume(kvm->evlist, idx);
pr_err("Failed to parse sample\n");
return -1;
}
err = perf_session__queue_event(kvm->session, event, sample.time, 0);
err = perf_session__queue_event(kvm->session, event, timestamp, 0);
/*
* FIXME: Here we can't consume the event, as perf_session__queue_event will
* point to it, and it'll get possibly overwritten by the kernel.
@@ -768,7 +768,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
/* save time stamp of our first sample for this mmap */
if (n == 0)
*mmap_time = sample.time;
*mmap_time = timestamp;
/* limit events per mmap handled all at once */
n++;