Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar: "A bit larger than what I'd wish for - half of it is due to hw driver updates to Intel Ivy-Bridge which info got recently released, cycles:pp should work there now too, amongst other things. (but we are generally making exceptions for hardware enablement of this type.) There are also callchain fixes in it - responding to mostly theoretical (but valid) concerns. The tooling side sports perf.data endianness/portability fixes which did not make it for the merge window - and various other fixes as well." * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (26 commits) perf/x86: Check user address explicitly in copy_from_user_nmi() perf/x86: Check if user fp is valid perf: Limit callchains to 127 perf/x86: Allow multiple stacks perf/x86: Update SNB PEBS constraints perf/x86: Enable/Add IvyBridge hardware support perf/x86: Implement cycles:p for SNB/IVB perf/x86: Fix Intel shared extra MSR allocation x86/decoder: Fix bsr/bsf/jmpe decoding with operand-size prefix perf: Remove duplicate invocation on perf_event_for_each perf uprobes: Remove unnecessary check before strlist__delete perf symbols: Check for valid dso before creating map perf evsel: Fix 32 bit values endianity swap for sample_id_all header perf session: Handle endianity swap on sample_id_all header data perf symbols: Handle different endians properly during symbol load perf evlist: Pass third argument to ioctl explicitly perf tools: Update ioctl documentation for PERF_IOC_FLAG_GROUP perf tools: Make --version show kernel version instead of pull req tag perf tools: Check if callchain is corrupted perf callchain: Make callchain cursors TLS ...
This commit is contained in:
@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
|
||||
if (!cpuc->shared_regs)
|
||||
goto error;
|
||||
}
|
||||
cpuc->is_fake = 1;
|
||||
return cpuc;
|
||||
error:
|
||||
free_fake_cpuc(cpuc);
|
||||
@@ -1756,6 +1757,12 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
|
||||
dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
|
||||
}
|
||||
|
||||
static inline int
|
||||
valid_user_frame(const void __user *fp, unsigned long size)
|
||||
{
|
||||
return (__range_not_ok(fp, size, TASK_SIZE) == 0);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
|
||||
#include <asm/compat.h>
|
||||
@@ -1780,7 +1787,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
|
||||
if (bytes != sizeof(frame))
|
||||
break;
|
||||
|
||||
if (fp < compat_ptr(regs->sp))
|
||||
if (!valid_user_frame(fp, sizeof(frame)))
|
||||
break;
|
||||
|
||||
perf_callchain_store(entry, frame.return_address);
|
||||
@@ -1826,7 +1833,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
|
||||
if (bytes != sizeof(frame))
|
||||
break;
|
||||
|
||||
if ((unsigned long)fp < regs->sp)
|
||||
if (!valid_user_frame(fp, sizeof(frame)))
|
||||
break;
|
||||
|
||||
perf_callchain_store(entry, frame.return_address);
|
||||
|
@@ -117,6 +117,7 @@ struct cpu_hw_events {
|
||||
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
|
||||
|
||||
unsigned int group_flag;
|
||||
int is_fake;
|
||||
|
||||
/*
|
||||
* Intel DebugStore bits
|
||||
@@ -364,6 +365,7 @@ struct x86_pmu {
|
||||
int pebs_record_size;
|
||||
void (*drain_pebs)(struct pt_regs *regs);
|
||||
struct event_constraint *pebs_constraints;
|
||||
void (*pebs_aliases)(struct perf_event *event);
|
||||
|
||||
/*
|
||||
* Intel LBR
|
||||
|
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
|
||||
static int intel_alt_er(int idx)
|
||||
{
|
||||
if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
|
||||
return false;
|
||||
return idx;
|
||||
|
||||
if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
|
||||
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
|
||||
event->hw.config |= 0x01bb;
|
||||
event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
|
||||
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
|
||||
} else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
|
||||
if (idx == EXTRA_REG_RSP_0)
|
||||
return EXTRA_REG_RSP_1;
|
||||
|
||||
if (idx == EXTRA_REG_RSP_1)
|
||||
return EXTRA_REG_RSP_0;
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
static void intel_fixup_er(struct perf_event *event, int idx)
|
||||
{
|
||||
event->hw.extra_reg.idx = idx;
|
||||
|
||||
if (idx == EXTRA_REG_RSP_0) {
|
||||
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
|
||||
event->hw.config |= 0x01b7;
|
||||
event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
|
||||
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
|
||||
} else if (idx == EXTRA_REG_RSP_1) {
|
||||
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
|
||||
event->hw.config |= 0x01bb;
|
||||
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
|
||||
}
|
||||
|
||||
if (event->hw.extra_reg.idx == orig_idx)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
|
||||
struct event_constraint *c = &emptyconstraint;
|
||||
struct er_account *era;
|
||||
unsigned long flags;
|
||||
int orig_idx = reg->idx;
|
||||
int idx = reg->idx;
|
||||
|
||||
/* already allocated shared msr */
|
||||
if (reg->alloc)
|
||||
/*
|
||||
* reg->alloc can be set due to existing state, so for fake cpuc we
|
||||
* need to ignore this, otherwise we might fail to allocate proper fake
|
||||
* state for this extra reg constraint. Also see the comment below.
|
||||
*/
|
||||
if (reg->alloc && !cpuc->is_fake)
|
||||
return NULL; /* call x86_get_event_constraint() */
|
||||
|
||||
again:
|
||||
era = &cpuc->shared_regs->regs[reg->idx];
|
||||
era = &cpuc->shared_regs->regs[idx];
|
||||
/*
|
||||
* we use spin_lock_irqsave() to avoid lockdep issues when
|
||||
* passing a fake cpuc
|
||||
@@ -1173,6 +1183,29 @@ again:
|
||||
|
||||
if (!atomic_read(&era->ref) || era->config == reg->config) {
|
||||
|
||||
/*
|
||||
* If its a fake cpuc -- as per validate_{group,event}() we
|
||||
* shouldn't touch event state and we can avoid doing so
|
||||
* since both will only call get_event_constraints() once
|
||||
* on each event, this avoids the need for reg->alloc.
|
||||
*
|
||||
* Not doing the ER fixup will only result in era->reg being
|
||||
* wrong, but since we won't actually try and program hardware
|
||||
* this isn't a problem either.
|
||||
*/
|
||||
if (!cpuc->is_fake) {
|
||||
if (idx != reg->idx)
|
||||
intel_fixup_er(event, idx);
|
||||
|
||||
/*
|
||||
* x86_schedule_events() can call get_event_constraints()
|
||||
* multiple times on events in the case of incremental
|
||||
* scheduling(). reg->alloc ensures we only do the ER
|
||||
* allocation once.
|
||||
*/
|
||||
reg->alloc = 1;
|
||||
}
|
||||
|
||||
/* lock in msr value */
|
||||
era->config = reg->config;
|
||||
era->reg = reg->reg;
|
||||
@@ -1180,17 +1213,17 @@ again:
|
||||
/* one more user */
|
||||
atomic_inc(&era->ref);
|
||||
|
||||
/* no need to reallocate during incremental event scheduling */
|
||||
reg->alloc = 1;
|
||||
|
||||
/*
|
||||
* need to call x86_get_event_constraint()
|
||||
* to check if associated event has constraints
|
||||
*/
|
||||
c = NULL;
|
||||
} else if (intel_try_alt_er(event, orig_idx)) {
|
||||
raw_spin_unlock_irqrestore(&era->lock, flags);
|
||||
goto again;
|
||||
} else {
|
||||
idx = intel_alt_er(idx);
|
||||
if (idx != reg->idx) {
|
||||
raw_spin_unlock_irqrestore(&era->lock, flags);
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&era->lock, flags);
|
||||
|
||||
@@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
|
||||
struct er_account *era;
|
||||
|
||||
/*
|
||||
* only put constraint if extra reg was actually
|
||||
* allocated. Also takes care of event which do
|
||||
* not use an extra shared reg
|
||||
* Only put constraint if extra reg was actually allocated. Also takes
|
||||
* care of event which do not use an extra shared reg.
|
||||
*
|
||||
* Also, if this is a fake cpuc we shouldn't touch any event state
|
||||
* (reg->alloc) and we don't care about leaving inconsistent cpuc state
|
||||
* either since it'll be thrown out.
|
||||
*/
|
||||
if (!reg->alloc)
|
||||
if (!reg->alloc || cpuc->is_fake)
|
||||
return;
|
||||
|
||||
era = &cpuc->shared_regs->regs[reg->idx];
|
||||
@@ -1300,15 +1336,9 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
|
||||
intel_put_shared_regs_event_constraints(cpuc, event);
|
||||
}
|
||||
|
||||
static int intel_pmu_hw_config(struct perf_event *event)
|
||||
static void intel_pebs_aliases_core2(struct perf_event *event)
|
||||
{
|
||||
int ret = x86_pmu_hw_config(event);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (event->attr.precise_ip &&
|
||||
(event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
|
||||
if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
|
||||
/*
|
||||
* Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
|
||||
* (0x003c) so that we can use it with PEBS.
|
||||
@@ -1329,10 +1359,48 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
||||
*/
|
||||
u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
|
||||
|
||||
alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
|
||||
event->hw.config = alt_config;
|
||||
}
|
||||
}
|
||||
|
||||
static void intel_pebs_aliases_snb(struct perf_event *event)
|
||||
{
|
||||
if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
|
||||
/*
|
||||
* Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
|
||||
* (0x003c) so that we can use it with PEBS.
|
||||
*
|
||||
* The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
|
||||
* PEBS capable. However we can use UOPS_RETIRED.ALL
|
||||
* (0x01c2), which is a PEBS capable event, to get the same
|
||||
* count.
|
||||
*
|
||||
* UOPS_RETIRED.ALL counts the number of cycles that retires
|
||||
* CNTMASK micro-ops. By setting CNTMASK to a value (16)
|
||||
* larger than the maximum number of micro-ops that can be
|
||||
* retired per cycle (4) and then inverting the condition, we
|
||||
* count all cycles that retire 16 or less micro-ops, which
|
||||
* is every cycle.
|
||||
*
|
||||
* Thereby we gain a PEBS capable cycle counter.
|
||||
*/
|
||||
u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
|
||||
|
||||
alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
|
||||
event->hw.config = alt_config;
|
||||
}
|
||||
}
|
||||
|
||||
static int intel_pmu_hw_config(struct perf_event *event)
|
||||
{
|
||||
int ret = x86_pmu_hw_config(event);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (event->attr.precise_ip && x86_pmu.pebs_aliases)
|
||||
x86_pmu.pebs_aliases(event);
|
||||
|
||||
if (intel_pmu_needs_lbr_smpl(event)) {
|
||||
ret = intel_pmu_setup_lbr_filter(event);
|
||||
@@ -1607,6 +1675,7 @@ static __initconst const struct x86_pmu intel_pmu = {
|
||||
.max_period = (1ULL << 31) - 1,
|
||||
.get_event_constraints = intel_get_event_constraints,
|
||||
.put_event_constraints = intel_put_event_constraints,
|
||||
.pebs_aliases = intel_pebs_aliases_core2,
|
||||
|
||||
.format_attrs = intel_arch3_formats_attr,
|
||||
|
||||
@@ -1840,8 +1909,9 @@ __init int intel_pmu_init(void)
|
||||
break;
|
||||
|
||||
case 42: /* SandyBridge */
|
||||
x86_add_quirk(intel_sandybridge_quirk);
|
||||
case 45: /* SandyBridge, "Romely-EP" */
|
||||
x86_add_quirk(intel_sandybridge_quirk);
|
||||
case 58: /* IvyBridge */
|
||||
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
|
||||
@@ -1849,6 +1919,7 @@ __init int intel_pmu_init(void)
|
||||
|
||||
x86_pmu.event_constraints = intel_snb_event_constraints;
|
||||
x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
|
||||
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
|
||||
x86_pmu.extra_regs = intel_snb_extra_regs;
|
||||
/* all extra regs are per-cpu when HT is on */
|
||||
x86_pmu.er_flags |= ERF_HAS_RSP_1;
|
||||
|
@@ -400,14 +400,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
|
||||
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
|
||||
INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */
|
||||
INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */
|
||||
INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */
|
||||
INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */
|
||||
INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */
|
||||
INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */
|
||||
INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */
|
||||
INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */
|
||||
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
|
||||
INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
|
||||
|
Reference in New Issue
Block a user