perf stat: Support per-die aggregation

It is useful to aggregate counts per die. E.g. Uncore becomes die-scope
on Xeon Cascade Lake-AP.

Introduce a new option "--per-die" to support per-die aggregation.

The global id for each core has been changed to socket + die id + core
id. The global id for each die is socket + die id.

Add die information for per-core aggregation. The output of per-core
aggregation will be changed from "S0-C0" to "S0-D0-C0". Any scripts
which rely on the output format of per-core aggregation probably be
broken.

For 'perf stat record/report', there is no die information when
processing the old perf.data. The per-die result will be the same as
per-socket.

Committer notes:

Renamed 'die' variable to 'die_id' to fix the build in some systems:

    CC       /tmp/build/perf/builtin-script.o
  cc1: warnings being treated as errors
  builtin-stat.c: In function 'perf_env__get_die':
  builtin-stat.c:963: error: declaration of 'die' shadows a global declaration
  util/util.h:19: error: shadowed declaration is here
  mv: cannot stat `/tmp/build/perf/.builtin-stat.o.tmp': No such file or directory

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lkml.kernel.org/n/tip-bsnhx7vgsuu6ei307mw60mbj@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Kan Liang
2019-06-04 15:50:42 -07:00
committed by Arnaldo Carvalho de Melo
parent acae8b36cd
commit db5742b684
8 changed files with 177 additions and 18 deletions

View File

@@ -777,6 +777,8 @@ static struct option stat_options[] = {
"stop workload and print counts after a timeout period in ms (>= 10ms)"),
OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
"aggregate counts per processor socket", AGGR_SOCKET),
OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode,
"aggregate counts per processor die", AGGR_DIE),
OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
"aggregate counts per physical processor core", AGGR_CORE),
OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
@@ -801,6 +803,12 @@ static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
return cpu_map__get_socket(map, cpu, NULL);
}
static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused,
struct cpu_map *map, int cpu)
{
return cpu_map__get_die(map, cpu, NULL);
}
static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
struct cpu_map *map, int cpu)
{
@@ -841,6 +849,12 @@ static int perf_stat__get_socket_cached(struct perf_stat_config *config,
return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx);
}
static int perf_stat__get_die_cached(struct perf_stat_config *config,
struct cpu_map *map, int idx)
{
return perf_stat__get_aggr(config, perf_stat__get_die, map, idx);
}
static int perf_stat__get_core_cached(struct perf_stat_config *config,
struct cpu_map *map, int idx)
{
@@ -871,6 +885,13 @@ static int perf_stat_init_aggr_mode(void)
}
stat_config.aggr_get_id = perf_stat__get_socket_cached;
break;
case AGGR_DIE:
if (cpu_map__build_die_map(evsel_list->cpus, &stat_config.aggr_map)) {
perror("cannot build die map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_die_cached;
break;
case AGGR_CORE:
if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) {
perror("cannot build core map");
@@ -936,21 +957,55 @@ static int perf_env__get_socket(struct cpu_map *map, int idx, void *data)
return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
}
static int perf_env__get_die(struct cpu_map *map, int idx, void *data)
{
struct perf_env *env = data;
int die_id = -1, cpu = perf_env__get_cpu(env, map, idx);
if (cpu != -1) {
/*
* Encode socket in bit range 15:8
* die_id is relative to socket,
* we need a global id. So we combine
* socket + die id
*/
if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
return -1;
if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
return -1;
die_id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff);
}
return die_id;
}
static int perf_env__get_core(struct cpu_map *map, int idx, void *data)
{
struct perf_env *env = data;
int core = -1, cpu = perf_env__get_cpu(env, map, idx);
if (cpu != -1) {
int socket_id = env->cpu[cpu].socket_id;
/*
* Encode socket in upper 16 bits
* core_id is relative to socket, and
* Encode socket in bit range 31:24
* encode die id in bit range 23:16
* core_id is relative to socket and die,
* we need a global id. So we combine
* socket + core id.
* socket + die id + core id
*/
core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff);
if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
return -1;
if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
return -1;
if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n"))
return -1;
core = (env->cpu[cpu].socket_id << 24) |
(env->cpu[cpu].die_id << 16) |
(env->cpu[cpu].core_id & 0xffff);
}
return core;
@@ -962,6 +1017,12 @@ static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus
return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
}
static int perf_env__build_die_map(struct perf_env *env, struct cpu_map *cpus,
struct cpu_map **diep)
{
return cpu_map__build_map(cpus, diep, perf_env__get_die, env);
}
static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus,
struct cpu_map **corep)
{
@@ -973,6 +1034,11 @@ static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_un
{
return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
}
static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused,
struct cpu_map *map, int idx)
{
return perf_env__get_die(map, idx, &perf_stat.session->header.env);
}
static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
struct cpu_map *map, int idx)
@@ -992,6 +1058,13 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
}
stat_config.aggr_get_id = perf_stat__get_socket_file;
break;
case AGGR_DIE:
if (perf_env__build_die_map(env, evsel_list->cpus, &stat_config.aggr_map)) {
perror("cannot build die map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_die_file;
break;
case AGGR_CORE:
if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) {
perror("cannot build core map");
@@ -1542,6 +1615,8 @@ static int __cmd_report(int argc, const char **argv)
OPT_STRING('i', "input", &input_name, "file", "input file name"),
OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
"aggregate counts per processor socket", AGGR_SOCKET),
OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode,
"aggregate counts per processor die", AGGR_DIE),
OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
"aggregate counts per physical processor core", AGGR_CORE),
OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,