Merge tag 'perf-core-for-mingo-20160606' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

- Tooling support for TopDown counters, recently added to the kernel (Andi Kleen)

- Show call graphs in 'perf script' when 1st event doesn't have it but some other has (He Kuang)

- Fix terminal cleanup when handling invalid .perfconfig files in 'perf top' (Taeung Song)

Build fixes:

- Respect CROSS_COMPILE for the linker in libapi (Lucas Stach)

Infrastructure changes:

- Fix perf_evlist__alloc_mmap() failure path (Wang Nan)

- Provide way to extract integer value from format_field (Arnaldo Carvalho de Melo)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar
2016-06-08 09:29:23 +02:00
16 changed files with 441 additions and 46 deletions

View File

@@ -275,7 +275,8 @@ static int perf_parse_file(config_fn_t fn, void *data)
break;
}
}
die("bad config file line %d in %s", config_linenr, config_file_name);
pr_err("bad config file line %d in %s\n", config_linenr, config_file_name);
return -1;
}
static int parse_unit_factor(const char *end, unsigned long *val)
@@ -479,16 +480,15 @@ static int perf_config_global(void)
int perf_config(config_fn_t fn, void *data)
{
int ret = 0, found = 0;
int ret = -1;
const char *home = NULL;
/* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
if (config_exclusive_filename)
return perf_config_from_file(fn, config_exclusive_filename, data);
if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) {
ret += perf_config_from_file(fn, perf_etc_perfconfig(),
data);
found += 1;
if (perf_config_from_file(fn, perf_etc_perfconfig(), data) < 0)
goto out;
}
home = getenv("HOME");
@@ -514,14 +514,12 @@ int perf_config(config_fn_t fn, void *data)
if (!st.st_size)
goto out_free;
ret += perf_config_from_file(fn, user_config, data);
found += 1;
ret = perf_config_from_file(fn, user_config, data);
out_free:
free(user_config);
}
out:
if (found == 0)
return -1;
return ret;
}
@@ -609,8 +607,12 @@ static int collect_config(const char *var, const char *value,
struct perf_config_section *section = NULL;
struct perf_config_item *item = NULL;
struct perf_config_set *set = perf_config_set;
struct list_head *sections = &set->sections;
struct list_head *sections;
if (set == NULL)
return -1;
sections = &set->sections;
key = ptr = strdup(var);
if (!key) {
pr_debug("%s: strdup failed\n", __func__);

View File

@@ -946,9 +946,12 @@ static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
if (cpu_map__empty(evlist->cpus))
evlist->nr_mmaps = thread_map__nr(evlist->threads);
evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
if (!evlist->mmap)
return -ENOMEM;
for (i = 0; i < evlist->nr_mmaps; i++)
evlist->mmap[i].fd = -1;
return evlist->mmap != NULL ? 0 : -ENOMEM;
return 0;
}
struct mmap_params {

View File

@@ -2251,17 +2251,11 @@ void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample,
return sample->raw_data + offset;
}
u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
const char *name)
u64 format_field__intval(struct format_field *field, struct perf_sample *sample,
bool needs_swap)
{
struct format_field *field = perf_evsel__field(evsel, name);
void *ptr;
u64 value;
if (!field)
return 0;
ptr = sample->raw_data + field->offset;
void *ptr = sample->raw_data + field->offset;
switch (field->size) {
case 1:
@@ -2279,7 +2273,7 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
return 0;
}
if (!evsel->needs_swap)
if (!needs_swap)
return value;
switch (field->size) {
@@ -2296,6 +2290,17 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
return 0;
}
u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
const char *name)
{
struct format_field *field = perf_evsel__field(evsel, name);
if (!field)
return 0;
return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
}
bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
char *msg, size_t msgsize)
{

View File

@@ -261,6 +261,8 @@ static inline char *perf_evsel__strval(struct perf_evsel *evsel,
struct format_field;
u64 format_field__intval(struct format_field *field, struct perf_sample *sample, bool needs_swap);
struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name);
#define perf_evsel__match(evsel, t, c) \

7
tools/perf/util/group.h Normal file
View File

@@ -0,0 +1,7 @@
#ifndef GROUP_H
#define GROUP_H 1
bool arch_topdown_check_group(bool *warn);
void arch_topdown_group_warn(void);
#endif

View File

@@ -260,6 +260,7 @@ cycles-ct { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
cycles-t { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
mem-loads { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
mem-stores { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
topdown-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
L1-dcache|l1-d|l1d|L1-data |
L1-icache|l1-i|l1i|L1-instruction |

View File

@@ -36,6 +36,11 @@ static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
static bool have_frontend_stalled;
struct stats walltime_nsecs_stats;
@@ -82,6 +87,11 @@ void perf_stat__reset_shadow_stats(void)
sizeof(runtime_transaction_stats));
memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots));
memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired));
memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
}
/*
@@ -105,6 +115,16 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
else if (perf_stat_evsel__is(counter, ELISION_START))
update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]);
else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]);
else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]);
else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]);
else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
@@ -302,6 +322,107 @@ static void print_ll_cache_misses(int cpu,
out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
}
/*
* High level "TopDown" CPU core pipe line bottleneck break down.
*
* Basic concept following
* Yasin, A Top Down Method for Performance analysis and Counter architecture
* ISPASS14
*
* The CPU pipeline is divided into 4 areas that can be bottlenecks:
*
* Frontend -> Backend -> Retiring
* BadSpeculation in addition means out of order execution that is thrown away
* (for example branch mispredictions)
* Frontend is instruction decoding.
* Backend is execution, like computation and accessing data in memory
* Retiring is good execution that is not directly bottlenecked
*
* The formulas are computed in slots.
* A slot is an entry in the pipeline each for the pipeline width
* (for example a 4-wide pipeline has 4 slots for each cycle)
*
* Formulas:
* BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
* TotalSlots
* Retiring = SlotsRetired / TotalSlots
* FrontendBound = FetchBubbles / TotalSlots
* BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
*
* The kernel provides the mapping to the low level CPU events and any scaling
* needed for the CPU pipeline width, for example:
*
* TotalSlots = Cycles * 4
*
* The scaling factor is communicated in the sysfs unit.
*
* In some cases the CPU may not be able to measure all the formulas due to
* missing events. In this case multiple formulas are combined, as possible.
*
* Full TopDown supports more levels to sub-divide each area: for example
* BackendBound into computing bound and memory bound. For now we only
* support Level 1 TopDown.
*/
static double sanitize_val(double x)
{
if (x < 0 && x >= -0.02)
return 0.0;
return x;
}
static double td_total_slots(int ctx, int cpu)
{
return avg_stats(&runtime_topdown_total_slots[ctx][cpu]);
}
static double td_bad_spec(int ctx, int cpu)
{
double bad_spec = 0;
double total_slots;
double total;
total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) -
avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) +
avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]);
total_slots = td_total_slots(ctx, cpu);
if (total_slots)
bad_spec = total / total_slots;
return sanitize_val(bad_spec);
}
static double td_retiring(int ctx, int cpu)
{
double retiring = 0;
double total_slots = td_total_slots(ctx, cpu);
double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]);
if (total_slots)
retiring = ret_slots / total_slots;
return retiring;
}
static double td_fe_bound(int ctx, int cpu)
{
double fe_bound = 0;
double total_slots = td_total_slots(ctx, cpu);
double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]);
if (total_slots)
fe_bound = fetch_bub / total_slots;
return fe_bound;
}
static double td_be_bound(int ctx, int cpu)
{
double sum = (td_fe_bound(ctx, cpu) +
td_bad_spec(ctx, cpu) +
td_retiring(ctx, cpu));
if (sum == 0)
return 0;
return sanitize_val(1.0 - sum);
}
void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
double avg, int cpu,
struct perf_stat_output_ctx *out)
@@ -309,6 +430,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
void *ctxp = out->ctx;
print_metric_t print_metric = out->print_metric;
double total, ratio = 0.0, total2;
const char *color = NULL;
int ctx = evsel_context(evsel);
if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
@@ -452,6 +574,46 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
avg / ratio);
else
print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
double fe_bound = td_fe_bound(ctx, cpu);
if (fe_bound > 0.2)
color = PERF_COLOR_RED;
print_metric(ctxp, color, "%8.1f%%", "frontend bound",
fe_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
double retiring = td_retiring(ctx, cpu);
if (retiring > 0.7)
color = PERF_COLOR_GREEN;
print_metric(ctxp, color, "%8.1f%%", "retiring",
retiring * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
double bad_spec = td_bad_spec(ctx, cpu);
if (bad_spec > 0.1)
color = PERF_COLOR_RED;
print_metric(ctxp, color, "%8.1f%%", "bad speculation",
bad_spec * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
double be_bound = td_be_bound(ctx, cpu);
const char *name = "backend bound";
static int have_recovery_bubbles = -1;
/* In case the CPU does not support topdown-recovery-bubbles */
if (have_recovery_bubbles < 0)
have_recovery_bubbles = pmu_have_event("cpu",
"topdown-recovery-bubbles");
if (!have_recovery_bubbles)
name = "backend bound/bad spec";
if (be_bound > 0.2)
color = PERF_COLOR_RED;
if (td_total_slots(ctx, cpu) > 0)
print_metric(ctxp, color, "%8.1f%%", name,
be_bound * 100.);
else
print_metric(ctxp, NULL, NULL, name, 0);
} else if (runtime_nsecs_stats[cpu].n != 0) {
char unit = 'M';
char unit_buf[10];

View File

@@ -79,6 +79,11 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
ID(TRANSACTION_START, cpu/tx-start/),
ID(ELISION_START, cpu/el-start/),
ID(CYCLES_IN_TX_CP, cpu/cycles-ct/),
ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots),
ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued),
ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
};
#undef ID

View File

@@ -17,6 +17,11 @@ enum perf_stat_evsel_id {
PERF_STAT_EVSEL_ID__TRANSACTION_START,
PERF_STAT_EVSEL_ID__ELISION_START,
PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP,
PERF_STAT_EVSEL_ID__TOPDOWN_TOTAL_SLOTS,
PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_ISSUED,
PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
PERF_STAT_EVSEL_ID__MAX,
};