Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "Kernel side changes mostly consist of work on x86 PMU drivers: - x86 Intel PT (hardware CPU tracer) improvements (Alexander Shishkin) - x86 Intel CQM (cache quality monitoring) improvements (Thomas Gleixner) - x86 Intel PEBSv3 support (Peter Zijlstra) - x86 Intel PEBS interrupt batching support for lower overhead sampling (Zheng Yan, Kan Liang) - x86 PMU scheduler fixes and improvements (Peter Zijlstra) There's too many tooling improvements to list them all - here are a few select highlights: 'perf bench': - Introduce new 'perf bench futex' benchmark: 'wake-parallel', to measure parallel waker threads generating contention for kernel locks (hb->lock). (Davidlohr Bueso) 'perf top', 'perf report': - Allow disabling/enabling events dynamicaly in 'perf top': a 'perf top' session can instantly become a 'perf report' one, i.e. going from dynamic analysis to a static one, returning to a dynamic one is possible, to toogle the modes, just press 'f' to 'freeze/unfreeze' the sampling. (Arnaldo Carvalho de Melo) - Make Ctrl-C stop processing on TUI, allowing interrupting the load of big perf.data files (Namhyung Kim) 'perf probe': (Masami Hiramatsu) - Support glob wildcards for function name - Support $params special probe argument: Collect all function arguments - Make --line checks validate C-style function name. - Add --no-inlines option to avoid searching inline functions - Greatly speed up 'perf probe --list' by caching debuginfo. - Improve --filter support for 'perf probe', allowing using its arguments on other commands, as --add, --del, etc. 'perf sched': - Add option in 'perf sched' to merge like comms to lat output (Josef Bacik) Plus tons of infrastructure work - in particular preparation for upcoming threaded perf report support, but also lots of other work - and fixes and other improvements. See (much) more details in the shortlog and in the git log" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (305 commits) perf tools: Configurable per thread proc map processing time out perf tools: Add time out to force stop proc map processing perf report: Fix sort__sym_cmp to also compare end of symbol perf hists browser: React to unassigned hotkey pressing perf top: Tell the user how to unfreeze events after pressing 'f' perf hists browser: Honour the help line provided by builtin-{top,report}.c perf hists browser: Do not exit when 'f' is pressed in 'report' mode perf top: Replace CTRL+z with 'f' as hotkey for enable/disable events perf annotate: Rename source_line_percent to source_line_samples perf annotate: Display total number of samples with --show-total-period perf tools: Ensure thread-stack is flushed perf top: Allow disabling/enabling events dynamicly perf evlist: Add toggle_enable() method perf trace: Fix race condition at the end of started workloads perf probe: Speed up perf probe --list by caching debuginfo perf probe: Show usage even if the last event is skipped perf tools: Move libtraceevent dynamic list to separated LDFLAGS variable perf tools: Fix a problem when opening old perf.data with different byte order perf tools: Ignore .config-detected in .gitignore perf probe: Fix to return error if no probe is added ...
This commit is contained in:
@@ -881,10 +881,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
|
||||
if (x86_pmu.commit_scheduling)
|
||||
x86_pmu.commit_scheduling(cpuc, i, assign[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!assign || unsched) {
|
||||
|
||||
} else {
|
||||
for (i = 0; i < n; i++) {
|
||||
e = cpuc->event_list[i];
|
||||
/*
|
||||
@@ -1097,13 +1094,16 @@ int x86_perf_event_set_period(struct perf_event *event)
|
||||
|
||||
per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
|
||||
|
||||
/*
|
||||
* The hw event starts counting from this event offset,
|
||||
* mark it to be able to extra future deltas:
|
||||
*/
|
||||
local64_set(&hwc->prev_count, (u64)-left);
|
||||
if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) ||
|
||||
local64_read(&hwc->prev_count) != (u64)-left) {
|
||||
/*
|
||||
* The hw event starts counting from this event offset,
|
||||
* mark it to be able to extra future deltas:
|
||||
*/
|
||||
local64_set(&hwc->prev_count, (u64)-left);
|
||||
|
||||
wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
|
||||
wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Due to erratum on certan cpu we need
|
||||
|
@@ -75,6 +75,8 @@ struct event_constraint {
|
||||
#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */
|
||||
#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */
|
||||
#define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */
|
||||
#define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */
|
||||
#define PERF_X86_EVENT_FREERUNNING 0x0800 /* use freerunning PEBS */
|
||||
|
||||
|
||||
struct amd_nb {
|
||||
@@ -87,6 +89,18 @@ struct amd_nb {
|
||||
/* The maximal number of PEBS events: */
|
||||
#define MAX_PEBS_EVENTS 8
|
||||
|
||||
/*
|
||||
* Flags PEBS can handle without an PMI.
|
||||
*
|
||||
* TID can only be handled by flushing at context switch.
|
||||
*
|
||||
*/
|
||||
#define PEBS_FREERUNNING_FLAGS \
|
||||
(PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
|
||||
PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
|
||||
PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
|
||||
PERF_SAMPLE_TRANSACTION)
|
||||
|
||||
/*
|
||||
* A debug store configuration.
|
||||
*
|
||||
@@ -133,7 +147,6 @@ enum intel_excl_state_type {
|
||||
};
|
||||
|
||||
struct intel_excl_states {
|
||||
enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];
|
||||
enum intel_excl_state_type state[X86_PMC_IDX_MAX];
|
||||
bool sched_started; /* true if scheduling has started */
|
||||
};
|
||||
@@ -527,10 +540,10 @@ struct x86_pmu {
|
||||
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
|
||||
struct perf_event *event);
|
||||
|
||||
void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
|
||||
|
||||
void (*start_scheduling)(struct cpu_hw_events *cpuc);
|
||||
|
||||
void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
|
||||
|
||||
void (*stop_scheduling)(struct cpu_hw_events *cpuc);
|
||||
|
||||
struct event_constraint *event_constraints;
|
||||
@@ -866,6 +879,8 @@ void intel_pmu_pebs_enable_all(void);
|
||||
|
||||
void intel_pmu_pebs_disable_all(void);
|
||||
|
||||
void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
|
||||
|
||||
void intel_ds_init(void);
|
||||
|
||||
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
|
||||
|
@@ -1903,9 +1903,8 @@ static void
|
||||
intel_start_scheduling(struct cpu_hw_events *cpuc)
|
||||
{
|
||||
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
|
||||
struct intel_excl_states *xl, *xlo;
|
||||
struct intel_excl_states *xl;
|
||||
int tid = cpuc->excl_thread_id;
|
||||
int o_tid = 1 - tid; /* sibling thread */
|
||||
|
||||
/*
|
||||
* nothing needed if in group validation mode
|
||||
@@ -1916,10 +1915,9 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
|
||||
/*
|
||||
* no exclusion needed
|
||||
*/
|
||||
if (!excl_cntrs)
|
||||
if (WARN_ON_ONCE(!excl_cntrs))
|
||||
return;
|
||||
|
||||
xlo = &excl_cntrs->states[o_tid];
|
||||
xl = &excl_cntrs->states[tid];
|
||||
|
||||
xl->sched_started = true;
|
||||
@@ -1928,22 +1926,41 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
|
||||
* in stop_event_scheduling()
|
||||
* makes scheduling appear as a transaction
|
||||
*/
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
raw_spin_lock(&excl_cntrs->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* save initial state of sibling thread
|
||||
*/
|
||||
memcpy(xlo->init_state, xlo->state, sizeof(xlo->init_state));
|
||||
static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
|
||||
{
|
||||
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
|
||||
struct event_constraint *c = cpuc->event_constraint[idx];
|
||||
struct intel_excl_states *xl;
|
||||
int tid = cpuc->excl_thread_id;
|
||||
|
||||
if (cpuc->is_fake || !is_ht_workaround_enabled())
|
||||
return;
|
||||
|
||||
if (WARN_ON_ONCE(!excl_cntrs))
|
||||
return;
|
||||
|
||||
if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
|
||||
return;
|
||||
|
||||
xl = &excl_cntrs->states[tid];
|
||||
|
||||
lockdep_assert_held(&excl_cntrs->lock);
|
||||
|
||||
if (c->flags & PERF_X86_EVENT_EXCL)
|
||||
xl->state[cntr] = INTEL_EXCL_EXCLUSIVE;
|
||||
else
|
||||
xl->state[cntr] = INTEL_EXCL_SHARED;
|
||||
}
|
||||
|
||||
static void
|
||||
intel_stop_scheduling(struct cpu_hw_events *cpuc)
|
||||
{
|
||||
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
|
||||
struct intel_excl_states *xl, *xlo;
|
||||
struct intel_excl_states *xl;
|
||||
int tid = cpuc->excl_thread_id;
|
||||
int o_tid = 1 - tid; /* sibling thread */
|
||||
|
||||
/*
|
||||
* nothing needed if in group validation mode
|
||||
@@ -1953,17 +1970,11 @@ intel_stop_scheduling(struct cpu_hw_events *cpuc)
|
||||
/*
|
||||
* no exclusion needed
|
||||
*/
|
||||
if (!excl_cntrs)
|
||||
if (WARN_ON_ONCE(!excl_cntrs))
|
||||
return;
|
||||
|
||||
xlo = &excl_cntrs->states[o_tid];
|
||||
xl = &excl_cntrs->states[tid];
|
||||
|
||||
/*
|
||||
* make new sibling thread state visible
|
||||
*/
|
||||
memcpy(xlo->state, xlo->init_state, sizeof(xlo->state));
|
||||
|
||||
xl->sched_started = false;
|
||||
/*
|
||||
* release shared state lock (acquired in intel_start_scheduling())
|
||||
@@ -1975,12 +1986,10 @@ static struct event_constraint *
|
||||
intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
|
||||
int idx, struct event_constraint *c)
|
||||
{
|
||||
struct event_constraint *cx;
|
||||
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
|
||||
struct intel_excl_states *xl, *xlo;
|
||||
int is_excl, i;
|
||||
struct intel_excl_states *xlo;
|
||||
int tid = cpuc->excl_thread_id;
|
||||
int o_tid = 1 - tid; /* alternate */
|
||||
int is_excl, i;
|
||||
|
||||
/*
|
||||
* validating a group does not require
|
||||
@@ -1992,8 +2001,51 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
|
||||
/*
|
||||
* no exclusion needed
|
||||
*/
|
||||
if (!excl_cntrs)
|
||||
if (WARN_ON_ONCE(!excl_cntrs))
|
||||
return c;
|
||||
|
||||
/*
|
||||
* because we modify the constraint, we need
|
||||
* to make a copy. Static constraints come
|
||||
* from static const tables.
|
||||
*
|
||||
* only needed when constraint has not yet
|
||||
* been cloned (marked dynamic)
|
||||
*/
|
||||
if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
|
||||
struct event_constraint *cx;
|
||||
|
||||
/*
|
||||
* grab pre-allocated constraint entry
|
||||
*/
|
||||
cx = &cpuc->constraint_list[idx];
|
||||
|
||||
/*
|
||||
* initialize dynamic constraint
|
||||
* with static constraint
|
||||
*/
|
||||
*cx = *c;
|
||||
|
||||
/*
|
||||
* mark constraint as dynamic, so we
|
||||
* can free it later on
|
||||
*/
|
||||
cx->flags |= PERF_X86_EVENT_DYNAMIC;
|
||||
c = cx;
|
||||
}
|
||||
|
||||
/*
|
||||
* From here on, the constraint is dynamic.
|
||||
* Either it was just allocated above, or it
|
||||
* was allocated during a earlier invocation
|
||||
* of this function
|
||||
*/
|
||||
|
||||
/*
|
||||
* state of sibling HT
|
||||
*/
|
||||
xlo = &excl_cntrs->states[tid ^ 1];
|
||||
|
||||
/*
|
||||
* event requires exclusive counter access
|
||||
* across HT threads
|
||||
@@ -2005,54 +2057,6 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
|
||||
WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* xl = state of current HT
|
||||
* xlo = state of sibling HT
|
||||
*/
|
||||
xl = &excl_cntrs->states[tid];
|
||||
xlo = &excl_cntrs->states[o_tid];
|
||||
|
||||
cx = c;
|
||||
|
||||
/*
|
||||
* because we modify the constraint, we need
|
||||
* to make a copy. Static constraints come
|
||||
* from static const tables.
|
||||
*
|
||||
* only needed when constraint has not yet
|
||||
* been cloned (marked dynamic)
|
||||
*/
|
||||
if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
|
||||
|
||||
/* sanity check */
|
||||
if (idx < 0)
|
||||
return &emptyconstraint;
|
||||
|
||||
/*
|
||||
* grab pre-allocated constraint entry
|
||||
*/
|
||||
cx = &cpuc->constraint_list[idx];
|
||||
|
||||
/*
|
||||
* initialize dynamic constraint
|
||||
* with static constraint
|
||||
*/
|
||||
memcpy(cx, c, sizeof(*cx));
|
||||
|
||||
/*
|
||||
* mark constraint as dynamic, so we
|
||||
* can free it later on
|
||||
*/
|
||||
cx->flags |= PERF_X86_EVENT_DYNAMIC;
|
||||
}
|
||||
|
||||
/*
|
||||
* From here on, the constraint is dynamic.
|
||||
* Either it was just allocated above, or it
|
||||
* was allocated during a earlier invocation
|
||||
* of this function
|
||||
*/
|
||||
|
||||
/*
|
||||
* Modify static constraint with current dynamic
|
||||
* state of thread
|
||||
@@ -2061,37 +2065,37 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
|
||||
* SHARED : sibling counter measuring non-exclusive event
|
||||
* UNUSED : sibling counter unused
|
||||
*/
|
||||
for_each_set_bit(i, cx->idxmsk, X86_PMC_IDX_MAX) {
|
||||
for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
|
||||
/*
|
||||
* exclusive event in sibling counter
|
||||
* our corresponding counter cannot be used
|
||||
* regardless of our event
|
||||
*/
|
||||
if (xl->state[i] == INTEL_EXCL_EXCLUSIVE)
|
||||
__clear_bit(i, cx->idxmsk);
|
||||
if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
|
||||
__clear_bit(i, c->idxmsk);
|
||||
/*
|
||||
* if measuring an exclusive event, sibling
|
||||
* measuring non-exclusive, then counter cannot
|
||||
* be used
|
||||
*/
|
||||
if (is_excl && xl->state[i] == INTEL_EXCL_SHARED)
|
||||
__clear_bit(i, cx->idxmsk);
|
||||
if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
|
||||
__clear_bit(i, c->idxmsk);
|
||||
}
|
||||
|
||||
/*
|
||||
* recompute actual bit weight for scheduling algorithm
|
||||
*/
|
||||
cx->weight = hweight64(cx->idxmsk64);
|
||||
c->weight = hweight64(c->idxmsk64);
|
||||
|
||||
/*
|
||||
* if we return an empty mask, then switch
|
||||
* back to static empty constraint to avoid
|
||||
* the cost of freeing later on
|
||||
*/
|
||||
if (cx->weight == 0)
|
||||
cx = &emptyconstraint;
|
||||
if (c->weight == 0)
|
||||
c = &emptyconstraint;
|
||||
|
||||
return cx;
|
||||
return c;
|
||||
}
|
||||
|
||||
static struct event_constraint *
|
||||
@@ -2124,10 +2128,8 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
|
||||
struct intel_excl_states *xlo, *xl;
|
||||
unsigned long flags = 0; /* keep compiler happy */
|
||||
int tid = cpuc->excl_thread_id;
|
||||
int o_tid = 1 - tid;
|
||||
struct intel_excl_states *xl;
|
||||
|
||||
/*
|
||||
* nothing needed if in group validation mode
|
||||
@@ -2135,13 +2137,9 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
|
||||
if (cpuc->is_fake)
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!excl_cntrs);
|
||||
|
||||
if (!excl_cntrs)
|
||||
if (WARN_ON_ONCE(!excl_cntrs))
|
||||
return;
|
||||
|
||||
xl = &excl_cntrs->states[tid];
|
||||
xlo = &excl_cntrs->states[o_tid];
|
||||
if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
|
||||
hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
|
||||
if (!--cpuc->n_excl)
|
||||
@@ -2149,22 +2147,25 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
|
||||
}
|
||||
|
||||
/*
|
||||
* put_constraint may be called from x86_schedule_events()
|
||||
* which already has the lock held so here make locking
|
||||
* conditional
|
||||
* If event was actually assigned, then mark the counter state as
|
||||
* unused now.
|
||||
*/
|
||||
if (!xl->sched_started)
|
||||
raw_spin_lock_irqsave(&excl_cntrs->lock, flags);
|
||||
if (hwc->idx >= 0) {
|
||||
xl = &excl_cntrs->states[tid];
|
||||
|
||||
/*
|
||||
* if event was actually assigned, then mark the
|
||||
* counter state as unused now
|
||||
*/
|
||||
if (hwc->idx >= 0)
|
||||
xlo->state[hwc->idx] = INTEL_EXCL_UNUSED;
|
||||
/*
|
||||
* put_constraint may be called from x86_schedule_events()
|
||||
* which already has the lock held so here make locking
|
||||
* conditional.
|
||||
*/
|
||||
if (!xl->sched_started)
|
||||
raw_spin_lock(&excl_cntrs->lock);
|
||||
|
||||
if (!xl->sched_started)
|
||||
raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags);
|
||||
xl->state[hwc->idx] = INTEL_EXCL_UNUSED;
|
||||
|
||||
if (!xl->sched_started)
|
||||
raw_spin_unlock(&excl_cntrs->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -2196,41 +2197,6 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
|
||||
intel_put_excl_constraints(cpuc, event);
|
||||
}
|
||||
|
||||
static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
|
||||
{
|
||||
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
|
||||
struct event_constraint *c = cpuc->event_constraint[idx];
|
||||
struct intel_excl_states *xlo, *xl;
|
||||
int tid = cpuc->excl_thread_id;
|
||||
int o_tid = 1 - tid;
|
||||
int is_excl;
|
||||
|
||||
if (cpuc->is_fake || !c)
|
||||
return;
|
||||
|
||||
is_excl = c->flags & PERF_X86_EVENT_EXCL;
|
||||
|
||||
if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!excl_cntrs);
|
||||
|
||||
if (!excl_cntrs)
|
||||
return;
|
||||
|
||||
xl = &excl_cntrs->states[tid];
|
||||
xlo = &excl_cntrs->states[o_tid];
|
||||
|
||||
WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock));
|
||||
|
||||
if (cntr >= 0) {
|
||||
if (is_excl)
|
||||
xlo->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
|
||||
else
|
||||
xlo->init_state[cntr] = INTEL_EXCL_SHARED;
|
||||
}
|
||||
}
|
||||
|
||||
static void intel_pebs_aliases_core2(struct perf_event *event)
|
||||
{
|
||||
if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
|
||||
@@ -2294,8 +2260,15 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (event->attr.precise_ip && x86_pmu.pebs_aliases)
|
||||
x86_pmu.pebs_aliases(event);
|
||||
if (event->attr.precise_ip) {
|
||||
if (!event->attr.freq) {
|
||||
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
|
||||
if (!(event->attr.sample_type & ~PEBS_FREERUNNING_FLAGS))
|
||||
event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
|
||||
}
|
||||
if (x86_pmu.pebs_aliases)
|
||||
x86_pmu.pebs_aliases(event);
|
||||
}
|
||||
|
||||
if (needs_branch_stack(event)) {
|
||||
ret = intel_pmu_setup_lbr_filter(event);
|
||||
@@ -2544,19 +2517,11 @@ struct intel_shared_regs *allocate_shared_regs(int cpu)
|
||||
static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
|
||||
{
|
||||
struct intel_excl_cntrs *c;
|
||||
int i;
|
||||
|
||||
c = kzalloc_node(sizeof(struct intel_excl_cntrs),
|
||||
GFP_KERNEL, cpu_to_node(cpu));
|
||||
if (c) {
|
||||
raw_spin_lock_init(&c->lock);
|
||||
for (i = 0; i < X86_PMC_IDX_MAX; i++) {
|
||||
c->states[0].state[i] = INTEL_EXCL_UNUSED;
|
||||
c->states[0].init_state[i] = INTEL_EXCL_UNUSED;
|
||||
|
||||
c->states[1].state[i] = INTEL_EXCL_UNUSED;
|
||||
c->states[1].init_state[i] = INTEL_EXCL_UNUSED;
|
||||
}
|
||||
c->core_id = -1;
|
||||
}
|
||||
return c;
|
||||
@@ -2677,6 +2642,15 @@ static void intel_pmu_cpu_dying(int cpu)
|
||||
fini_debug_store_on_cpu(cpu);
|
||||
}
|
||||
|
||||
static void intel_pmu_sched_task(struct perf_event_context *ctx,
|
||||
bool sched_in)
|
||||
{
|
||||
if (x86_pmu.pebs_active)
|
||||
intel_pmu_pebs_sched_task(ctx, sched_in);
|
||||
if (x86_pmu.lbr_nr)
|
||||
intel_pmu_lbr_sched_task(ctx, sched_in);
|
||||
}
|
||||
|
||||
PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
|
||||
|
||||
PMU_FORMAT_ATTR(ldlat, "config1:0-15");
|
||||
@@ -2766,7 +2740,7 @@ static __initconst const struct x86_pmu intel_pmu = {
|
||||
.cpu_starting = intel_pmu_cpu_starting,
|
||||
.cpu_dying = intel_pmu_cpu_dying,
|
||||
.guest_get_msrs = intel_guest_get_msrs,
|
||||
.sched_task = intel_pmu_lbr_sched_task,
|
||||
.sched_task = intel_pmu_sched_task,
|
||||
};
|
||||
|
||||
static __init void intel_clovertown_quirk(void)
|
||||
@@ -2939,8 +2913,8 @@ static __init void intel_ht_bug(void)
|
||||
{
|
||||
x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED;
|
||||
|
||||
x86_pmu.commit_scheduling = intel_commit_scheduling;
|
||||
x86_pmu.start_scheduling = intel_start_scheduling;
|
||||
x86_pmu.commit_scheduling = intel_commit_scheduling;
|
||||
x86_pmu.stop_scheduling = intel_stop_scheduling;
|
||||
}
|
||||
|
||||
@@ -3396,8 +3370,8 @@ static __init int fixup_ht_bug(void)
|
||||
|
||||
x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
|
||||
|
||||
x86_pmu.commit_scheduling = NULL;
|
||||
x86_pmu.start_scheduling = NULL;
|
||||
x86_pmu.commit_scheduling = NULL;
|
||||
x86_pmu.stop_scheduling = NULL;
|
||||
|
||||
watchdog_nmi_enable_all();
|
||||
|
@@ -13,16 +13,35 @@
|
||||
#define MSR_IA32_QM_CTR 0x0c8e
|
||||
#define MSR_IA32_QM_EVTSEL 0x0c8d
|
||||
|
||||
static unsigned int cqm_max_rmid = -1;
|
||||
static u32 cqm_max_rmid = -1;
|
||||
static unsigned int cqm_l3_scale; /* supposedly cacheline size */
|
||||
|
||||
struct intel_cqm_state {
|
||||
raw_spinlock_t lock;
|
||||
int rmid;
|
||||
int cnt;
|
||||
/**
|
||||
* struct intel_pqr_state - State cache for the PQR MSR
|
||||
* @rmid: The cached Resource Monitoring ID
|
||||
* @closid: The cached Class Of Service ID
|
||||
* @rmid_usecnt: The usage counter for rmid
|
||||
*
|
||||
* The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
|
||||
* lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
|
||||
* contains both parts, so we need to cache them.
|
||||
*
|
||||
* The cache also helps to avoid pointless updates if the value does
|
||||
* not change.
|
||||
*/
|
||||
struct intel_pqr_state {
|
||||
u32 rmid;
|
||||
u32 closid;
|
||||
int rmid_usecnt;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state);
|
||||
/*
|
||||
* The cached intel_pqr_state is strictly per CPU and can never be
|
||||
* updated from a remote CPU. Both functions which modify the state
|
||||
* (intel_cqm_event_start and intel_cqm_event_stop) are called with
|
||||
* interrupts disabled, which is sufficient for the protection.
|
||||
*/
|
||||
static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
|
||||
|
||||
/*
|
||||
* Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
|
||||
@@ -57,7 +76,7 @@ static cpumask_t cqm_cpumask;
|
||||
* near-zero occupancy value, i.e. no cachelines are tagged with this
|
||||
* RMID, once __intel_cqm_rmid_rotate() returns.
|
||||
*/
|
||||
static unsigned int intel_cqm_rotation_rmid;
|
||||
static u32 intel_cqm_rotation_rmid;
|
||||
|
||||
#define INVALID_RMID (-1)
|
||||
|
||||
@@ -69,7 +88,7 @@ static unsigned int intel_cqm_rotation_rmid;
|
||||
* Likewise, an rmid value of -1 is used to indicate "no rmid currently
|
||||
* assigned" and is used as part of the rotation code.
|
||||
*/
|
||||
static inline bool __rmid_valid(unsigned int rmid)
|
||||
static inline bool __rmid_valid(u32 rmid)
|
||||
{
|
||||
if (!rmid || rmid == INVALID_RMID)
|
||||
return false;
|
||||
@@ -77,7 +96,7 @@ static inline bool __rmid_valid(unsigned int rmid)
|
||||
return true;
|
||||
}
|
||||
|
||||
static u64 __rmid_read(unsigned int rmid)
|
||||
static u64 __rmid_read(u32 rmid)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
@@ -102,7 +121,7 @@ enum rmid_recycle_state {
|
||||
};
|
||||
|
||||
struct cqm_rmid_entry {
|
||||
unsigned int rmid;
|
||||
u32 rmid;
|
||||
enum rmid_recycle_state state;
|
||||
struct list_head list;
|
||||
unsigned long queue_time;
|
||||
@@ -147,7 +166,7 @@ static LIST_HEAD(cqm_rmid_limbo_lru);
|
||||
*/
|
||||
static struct cqm_rmid_entry **cqm_rmid_ptrs;
|
||||
|
||||
static inline struct cqm_rmid_entry *__rmid_entry(int rmid)
|
||||
static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid)
|
||||
{
|
||||
struct cqm_rmid_entry *entry;
|
||||
|
||||
@@ -162,7 +181,7 @@ static inline struct cqm_rmid_entry *__rmid_entry(int rmid)
|
||||
*
|
||||
* We expect to be called with cache_mutex held.
|
||||
*/
|
||||
static int __get_rmid(void)
|
||||
static u32 __get_rmid(void)
|
||||
{
|
||||
struct cqm_rmid_entry *entry;
|
||||
|
||||
@@ -177,7 +196,7 @@ static int __get_rmid(void)
|
||||
return entry->rmid;
|
||||
}
|
||||
|
||||
static void __put_rmid(unsigned int rmid)
|
||||
static void __put_rmid(u32 rmid)
|
||||
{
|
||||
struct cqm_rmid_entry *entry;
|
||||
|
||||
@@ -372,7 +391,7 @@ static bool __conflict_event(struct perf_event *a, struct perf_event *b)
|
||||
}
|
||||
|
||||
struct rmid_read {
|
||||
unsigned int rmid;
|
||||
u32 rmid;
|
||||
atomic64_t value;
|
||||
};
|
||||
|
||||
@@ -381,12 +400,11 @@ static void __intel_cqm_event_count(void *info);
|
||||
/*
|
||||
* Exchange the RMID of a group of events.
|
||||
*/
|
||||
static unsigned int
|
||||
intel_cqm_xchg_rmid(struct perf_event *group, unsigned int rmid)
|
||||
static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
|
||||
{
|
||||
struct perf_event *event;
|
||||
unsigned int old_rmid = group->hw.cqm_rmid;
|
||||
struct list_head *head = &group->hw.cqm_group_entry;
|
||||
u32 old_rmid = group->hw.cqm_rmid;
|
||||
|
||||
lockdep_assert_held(&cache_mutex);
|
||||
|
||||
@@ -451,7 +469,7 @@ static void intel_cqm_stable(void *arg)
|
||||
* If we have group events waiting for an RMID that don't conflict with
|
||||
* events already running, assign @rmid.
|
||||
*/
|
||||
static bool intel_cqm_sched_in_event(unsigned int rmid)
|
||||
static bool intel_cqm_sched_in_event(u32 rmid)
|
||||
{
|
||||
struct perf_event *leader, *event;
|
||||
|
||||
@@ -598,7 +616,7 @@ static bool intel_cqm_rmid_stabilize(unsigned int *available)
|
||||
static void __intel_cqm_pick_and_rotate(struct perf_event *next)
|
||||
{
|
||||
struct perf_event *rotor;
|
||||
unsigned int rmid;
|
||||
u32 rmid;
|
||||
|
||||
lockdep_assert_held(&cache_mutex);
|
||||
|
||||
@@ -626,7 +644,7 @@ static void __intel_cqm_pick_and_rotate(struct perf_event *next)
|
||||
static void intel_cqm_sched_out_conflicting_events(struct perf_event *event)
|
||||
{
|
||||
struct perf_event *group, *g;
|
||||
unsigned int rmid;
|
||||
u32 rmid;
|
||||
|
||||
lockdep_assert_held(&cache_mutex);
|
||||
|
||||
@@ -828,8 +846,8 @@ static void intel_cqm_setup_event(struct perf_event *event,
|
||||
struct perf_event **group)
|
||||
{
|
||||
struct perf_event *iter;
|
||||
unsigned int rmid;
|
||||
bool conflict = false;
|
||||
u32 rmid;
|
||||
|
||||
list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
|
||||
rmid = iter->hw.cqm_rmid;
|
||||
@@ -860,7 +878,7 @@ static void intel_cqm_setup_event(struct perf_event *event,
|
||||
static void intel_cqm_event_read(struct perf_event *event)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned int rmid;
|
||||
u32 rmid;
|
||||
u64 val;
|
||||
|
||||
/*
|
||||
@@ -961,55 +979,48 @@ out:
|
||||
|
||||
static void intel_cqm_event_start(struct perf_event *event, int mode)
|
||||
{
|
||||
struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
|
||||
unsigned int rmid = event->hw.cqm_rmid;
|
||||
unsigned long flags;
|
||||
struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
|
||||
u32 rmid = event->hw.cqm_rmid;
|
||||
|
||||
if (!(event->hw.cqm_state & PERF_HES_STOPPED))
|
||||
return;
|
||||
|
||||
event->hw.cqm_state &= ~PERF_HES_STOPPED;
|
||||
|
||||
raw_spin_lock_irqsave(&state->lock, flags);
|
||||
|
||||
if (state->cnt++)
|
||||
WARN_ON_ONCE(state->rmid != rmid);
|
||||
else
|
||||
if (state->rmid_usecnt++) {
|
||||
if (!WARN_ON_ONCE(state->rmid != rmid))
|
||||
return;
|
||||
} else {
|
||||
WARN_ON_ONCE(state->rmid);
|
||||
}
|
||||
|
||||
state->rmid = rmid;
|
||||
wrmsrl(MSR_IA32_PQR_ASSOC, state->rmid);
|
||||
|
||||
raw_spin_unlock_irqrestore(&state->lock, flags);
|
||||
wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
|
||||
}
|
||||
|
||||
static void intel_cqm_event_stop(struct perf_event *event, int mode)
|
||||
{
|
||||
struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
|
||||
unsigned long flags;
|
||||
struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
|
||||
|
||||
if (event->hw.cqm_state & PERF_HES_STOPPED)
|
||||
return;
|
||||
|
||||
event->hw.cqm_state |= PERF_HES_STOPPED;
|
||||
|
||||
raw_spin_lock_irqsave(&state->lock, flags);
|
||||
intel_cqm_event_read(event);
|
||||
|
||||
if (!--state->cnt) {
|
||||
if (!--state->rmid_usecnt) {
|
||||
state->rmid = 0;
|
||||
wrmsrl(MSR_IA32_PQR_ASSOC, 0);
|
||||
wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid);
|
||||
} else {
|
||||
WARN_ON_ONCE(!state->rmid);
|
||||
}
|
||||
|
||||
raw_spin_unlock_irqrestore(&state->lock, flags);
|
||||
}
|
||||
|
||||
static int intel_cqm_event_add(struct perf_event *event, int mode)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned int rmid;
|
||||
u32 rmid;
|
||||
|
||||
raw_spin_lock_irqsave(&cache_lock, flags);
|
||||
|
||||
@@ -1024,11 +1035,6 @@ static int intel_cqm_event_add(struct perf_event *event, int mode)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void intel_cqm_event_del(struct perf_event *event, int mode)
|
||||
{
|
||||
intel_cqm_event_stop(event, mode);
|
||||
}
|
||||
|
||||
static void intel_cqm_event_destroy(struct perf_event *event)
|
||||
{
|
||||
struct perf_event *group_other = NULL;
|
||||
@@ -1057,7 +1063,7 @@ static void intel_cqm_event_destroy(struct perf_event *event)
|
||||
list_replace(&event->hw.cqm_groups_entry,
|
||||
&group_other->hw.cqm_groups_entry);
|
||||
} else {
|
||||
unsigned int rmid = event->hw.cqm_rmid;
|
||||
u32 rmid = event->hw.cqm_rmid;
|
||||
|
||||
if (__rmid_valid(rmid))
|
||||
__put_rmid(rmid);
|
||||
@@ -1221,7 +1227,7 @@ static struct pmu intel_cqm_pmu = {
|
||||
.task_ctx_nr = perf_sw_context,
|
||||
.event_init = intel_cqm_event_init,
|
||||
.add = intel_cqm_event_add,
|
||||
.del = intel_cqm_event_del,
|
||||
.del = intel_cqm_event_stop,
|
||||
.start = intel_cqm_event_start,
|
||||
.stop = intel_cqm_event_stop,
|
||||
.read = intel_cqm_event_read,
|
||||
@@ -1243,12 +1249,12 @@ static inline void cqm_pick_event_reader(int cpu)
|
||||
|
||||
static void intel_cqm_cpu_prepare(unsigned int cpu)
|
||||
{
|
||||
struct intel_cqm_state *state = &per_cpu(cqm_state, cpu);
|
||||
struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
|
||||
raw_spin_lock_init(&state->lock);
|
||||
state->rmid = 0;
|
||||
state->cnt = 0;
|
||||
state->closid = 0;
|
||||
state->rmid_usecnt = 0;
|
||||
|
||||
WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
|
||||
WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
|
||||
|
@@ -11,7 +11,7 @@
|
||||
#define BTS_RECORD_SIZE 24
|
||||
|
||||
#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
|
||||
#define PEBS_BUFFER_SIZE PAGE_SIZE
|
||||
#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
|
||||
#define PEBS_FIXUP_SIZE PAGE_SIZE
|
||||
|
||||
/*
|
||||
@@ -250,7 +250,7 @@ static int alloc_pebs_buffer(int cpu)
|
||||
{
|
||||
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
||||
int node = cpu_to_node(cpu);
|
||||
int max, thresh = 1; /* always use a single PEBS record */
|
||||
int max;
|
||||
void *buffer, *ibuffer;
|
||||
|
||||
if (!x86_pmu.pebs)
|
||||
@@ -280,9 +280,6 @@ static int alloc_pebs_buffer(int cpu)
|
||||
ds->pebs_absolute_maximum = ds->pebs_buffer_base +
|
||||
max * x86_pmu.pebs_record_size;
|
||||
|
||||
ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
|
||||
thresh * x86_pmu.pebs_record_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -549,6 +546,19 @@ int intel_pmu_drain_bts_buffer(void)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void intel_pmu_drain_pebs_buffer(void)
|
||||
{
|
||||
struct pt_regs regs;
|
||||
|
||||
x86_pmu.drain_pebs(®s);
|
||||
}
|
||||
|
||||
void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
|
||||
{
|
||||
if (!sched_in)
|
||||
intel_pmu_drain_pebs_buffer();
|
||||
}
|
||||
|
||||
/*
|
||||
* PEBS
|
||||
*/
|
||||
@@ -684,25 +694,66 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
|
||||
return &emptyconstraint;
|
||||
}
|
||||
|
||||
static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc)
|
||||
{
|
||||
return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
|
||||
}
|
||||
|
||||
void intel_pmu_pebs_enable(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct debug_store *ds = cpuc->ds;
|
||||
bool first_pebs;
|
||||
u64 threshold;
|
||||
|
||||
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
|
||||
|
||||
first_pebs = !pebs_is_enabled(cpuc);
|
||||
cpuc->pebs_enabled |= 1ULL << hwc->idx;
|
||||
|
||||
if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
|
||||
cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
|
||||
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
|
||||
cpuc->pebs_enabled |= 1ULL << 63;
|
||||
|
||||
/*
|
||||
* When the event is constrained enough we can use a larger
|
||||
* threshold and run the event with less frequent PMI.
|
||||
*/
|
||||
if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
|
||||
threshold = ds->pebs_absolute_maximum -
|
||||
x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
|
||||
|
||||
if (first_pebs)
|
||||
perf_sched_cb_inc(event->ctx->pmu);
|
||||
} else {
|
||||
threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
|
||||
|
||||
/*
|
||||
* If not all events can use larger buffer,
|
||||
* roll back to threshold = 1
|
||||
*/
|
||||
if (!first_pebs &&
|
||||
(ds->pebs_interrupt_threshold > threshold))
|
||||
perf_sched_cb_dec(event->ctx->pmu);
|
||||
}
|
||||
|
||||
/* Use auto-reload if possible to save a MSR write in the PMI */
|
||||
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
|
||||
ds->pebs_event_reset[hwc->idx] =
|
||||
(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
|
||||
}
|
||||
|
||||
if (first_pebs || ds->pebs_interrupt_threshold > threshold)
|
||||
ds->pebs_interrupt_threshold = threshold;
|
||||
}
|
||||
|
||||
void intel_pmu_pebs_disable(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct debug_store *ds = cpuc->ds;
|
||||
|
||||
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
|
||||
|
||||
@@ -711,6 +762,13 @@ void intel_pmu_pebs_disable(struct perf_event *event)
|
||||
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
|
||||
cpuc->pebs_enabled &= ~(1ULL << 63);
|
||||
|
||||
if (ds->pebs_interrupt_threshold >
|
||||
ds->pebs_buffer_base + x86_pmu.pebs_record_size) {
|
||||
intel_pmu_drain_pebs_buffer();
|
||||
if (!pebs_is_enabled(cpuc))
|
||||
perf_sched_cb_dec(event->ctx->pmu);
|
||||
}
|
||||
|
||||
if (cpuc->enabled)
|
||||
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
|
||||
|
||||
@@ -846,8 +904,10 @@ static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs)
|
||||
return txn;
|
||||
}
|
||||
|
||||
static void __intel_pmu_pebs_event(struct perf_event *event,
|
||||
struct pt_regs *iregs, void *__pebs)
|
||||
static void setup_pebs_sample_data(struct perf_event *event,
|
||||
struct pt_regs *iregs, void *__pebs,
|
||||
struct perf_sample_data *data,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
#define PERF_X86_EVENT_PEBS_HSW_PREC \
|
||||
(PERF_X86_EVENT_PEBS_ST_HSW | \
|
||||
@@ -859,13 +919,11 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
||||
*/
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct pebs_record_hsw *pebs = __pebs;
|
||||
struct perf_sample_data data;
|
||||
struct pt_regs regs;
|
||||
u64 sample_type;
|
||||
int fll, fst, dsrc;
|
||||
int fl = event->hw.flags;
|
||||
|
||||
if (!intel_pmu_save_and_restart(event))
|
||||
if (pebs == NULL)
|
||||
return;
|
||||
|
||||
sample_type = event->attr.sample_type;
|
||||
@@ -874,15 +932,15 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
||||
fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
|
||||
fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
|
||||
|
||||
perf_sample_data_init(&data, 0, event->hw.last_period);
|
||||
perf_sample_data_init(data, 0, event->hw.last_period);
|
||||
|
||||
data.period = event->hw.last_period;
|
||||
data->period = event->hw.last_period;
|
||||
|
||||
/*
|
||||
* Use latency for weight (only avail with PEBS-LL)
|
||||
*/
|
||||
if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
|
||||
data.weight = pebs->lat;
|
||||
data->weight = pebs->lat;
|
||||
|
||||
/*
|
||||
* data.data_src encodes the data source
|
||||
@@ -895,7 +953,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
||||
val = precise_datala_hsw(event, pebs->dse);
|
||||
else if (fst)
|
||||
val = precise_store_data(pebs->dse);
|
||||
data.data_src.val = val;
|
||||
data->data_src.val = val;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -908,61 +966,123 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
||||
* PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
|
||||
* A possible PERF_SAMPLE_REGS will have to transfer all regs.
|
||||
*/
|
||||
regs = *iregs;
|
||||
regs.flags = pebs->flags;
|
||||
set_linear_ip(®s, pebs->ip);
|
||||
regs.bp = pebs->bp;
|
||||
regs.sp = pebs->sp;
|
||||
*regs = *iregs;
|
||||
regs->flags = pebs->flags;
|
||||
set_linear_ip(regs, pebs->ip);
|
||||
regs->bp = pebs->bp;
|
||||
regs->sp = pebs->sp;
|
||||
|
||||
if (sample_type & PERF_SAMPLE_REGS_INTR) {
|
||||
regs.ax = pebs->ax;
|
||||
regs.bx = pebs->bx;
|
||||
regs.cx = pebs->cx;
|
||||
regs.dx = pebs->dx;
|
||||
regs.si = pebs->si;
|
||||
regs.di = pebs->di;
|
||||
regs.bp = pebs->bp;
|
||||
regs.sp = pebs->sp;
|
||||
regs->ax = pebs->ax;
|
||||
regs->bx = pebs->bx;
|
||||
regs->cx = pebs->cx;
|
||||
regs->dx = pebs->dx;
|
||||
regs->si = pebs->si;
|
||||
regs->di = pebs->di;
|
||||
regs->bp = pebs->bp;
|
||||
regs->sp = pebs->sp;
|
||||
|
||||
regs.flags = pebs->flags;
|
||||
regs->flags = pebs->flags;
|
||||
#ifndef CONFIG_X86_32
|
||||
regs.r8 = pebs->r8;
|
||||
regs.r9 = pebs->r9;
|
||||
regs.r10 = pebs->r10;
|
||||
regs.r11 = pebs->r11;
|
||||
regs.r12 = pebs->r12;
|
||||
regs.r13 = pebs->r13;
|
||||
regs.r14 = pebs->r14;
|
||||
regs.r15 = pebs->r15;
|
||||
regs->r8 = pebs->r8;
|
||||
regs->r9 = pebs->r9;
|
||||
regs->r10 = pebs->r10;
|
||||
regs->r11 = pebs->r11;
|
||||
regs->r12 = pebs->r12;
|
||||
regs->r13 = pebs->r13;
|
||||
regs->r14 = pebs->r14;
|
||||
regs->r15 = pebs->r15;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
|
||||
regs.ip = pebs->real_ip;
|
||||
regs.flags |= PERF_EFLAGS_EXACT;
|
||||
} else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s))
|
||||
regs.flags |= PERF_EFLAGS_EXACT;
|
||||
regs->ip = pebs->real_ip;
|
||||
regs->flags |= PERF_EFLAGS_EXACT;
|
||||
} else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs))
|
||||
regs->flags |= PERF_EFLAGS_EXACT;
|
||||
else
|
||||
regs.flags &= ~PERF_EFLAGS_EXACT;
|
||||
regs->flags &= ~PERF_EFLAGS_EXACT;
|
||||
|
||||
if ((sample_type & PERF_SAMPLE_ADDR) &&
|
||||
x86_pmu.intel_cap.pebs_format >= 1)
|
||||
data.addr = pebs->dla;
|
||||
data->addr = pebs->dla;
|
||||
|
||||
if (x86_pmu.intel_cap.pebs_format >= 2) {
|
||||
/* Only set the TSX weight when no memory weight. */
|
||||
if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
|
||||
data.weight = intel_hsw_weight(pebs);
|
||||
data->weight = intel_hsw_weight(pebs);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_TRANSACTION)
|
||||
data.txn = intel_hsw_transaction(pebs);
|
||||
data->txn = intel_hsw_transaction(pebs);
|
||||
}
|
||||
|
||||
if (has_branch_stack(event))
|
||||
data.br_stack = &cpuc->lbr_stack;
|
||||
data->br_stack = &cpuc->lbr_stack;
|
||||
}
|
||||
|
||||
if (perf_event_overflow(event, &data, ®s))
|
||||
static inline void *
|
||||
get_next_pebs_record_by_bit(void *base, void *top, int bit)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
void *at;
|
||||
u64 pebs_status;
|
||||
|
||||
if (base == NULL)
|
||||
return NULL;
|
||||
|
||||
for (at = base; at < top; at += x86_pmu.pebs_record_size) {
|
||||
struct pebs_record_nhm *p = at;
|
||||
|
||||
if (test_bit(bit, (unsigned long *)&p->status)) {
|
||||
/* PEBS v3 has accurate status bits */
|
||||
if (x86_pmu.intel_cap.pebs_format >= 3)
|
||||
return at;
|
||||
|
||||
if (p->status == (1 << bit))
|
||||
return at;
|
||||
|
||||
/* clear non-PEBS bit and re-check */
|
||||
pebs_status = p->status & cpuc->pebs_enabled;
|
||||
pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
|
||||
if (pebs_status == (1 << bit))
|
||||
return at;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void __intel_pmu_pebs_event(struct perf_event *event,
|
||||
struct pt_regs *iregs,
|
||||
void *base, void *top,
|
||||
int bit, int count)
|
||||
{
|
||||
struct perf_sample_data data;
|
||||
struct pt_regs regs;
|
||||
void *at = get_next_pebs_record_by_bit(base, top, bit);
|
||||
|
||||
if (!intel_pmu_save_and_restart(event) &&
|
||||
!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
|
||||
return;
|
||||
|
||||
while (count > 1) {
|
||||
setup_pebs_sample_data(event, iregs, at, &data, ®s);
|
||||
perf_event_output(event, &data, ®s);
|
||||
at += x86_pmu.pebs_record_size;
|
||||
at = get_next_pebs_record_by_bit(at, top, bit);
|
||||
count--;
|
||||
}
|
||||
|
||||
setup_pebs_sample_data(event, iregs, at, &data, ®s);
|
||||
|
||||
/*
|
||||
* All but the last records are processed.
|
||||
* The last one is left to be able to call the overflow handler.
|
||||
*/
|
||||
if (perf_event_overflow(event, &data, ®s)) {
|
||||
x86_pmu_stop(event, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
|
||||
@@ -992,72 +1112,99 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
|
||||
if (!event->attr.precise_ip)
|
||||
return;
|
||||
|
||||
n = top - at;
|
||||
n = (top - at) / x86_pmu.pebs_record_size;
|
||||
if (n <= 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Should not happen, we program the threshold at 1 and do not
|
||||
* set a reset value.
|
||||
*/
|
||||
WARN_ONCE(n > 1, "bad leftover pebs %d\n", n);
|
||||
at += n - 1;
|
||||
|
||||
__intel_pmu_pebs_event(event, iregs, at);
|
||||
__intel_pmu_pebs_event(event, iregs, at, top, 0, n);
|
||||
}
|
||||
|
||||
static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct debug_store *ds = cpuc->ds;
|
||||
struct perf_event *event = NULL;
|
||||
void *at, *top;
|
||||
u64 status = 0;
|
||||
int bit;
|
||||
struct perf_event *event;
|
||||
void *base, *at, *top;
|
||||
short counts[MAX_PEBS_EVENTS] = {};
|
||||
short error[MAX_PEBS_EVENTS] = {};
|
||||
int bit, i;
|
||||
|
||||
if (!x86_pmu.pebs_active)
|
||||
return;
|
||||
|
||||
at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
|
||||
base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
|
||||
top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
|
||||
|
||||
ds->pebs_index = ds->pebs_buffer_base;
|
||||
|
||||
if (unlikely(at > top))
|
||||
if (unlikely(base >= top))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Should not happen, we program the threshold at 1 and do not
|
||||
* set a reset value.
|
||||
*/
|
||||
WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size,
|
||||
"Unexpected number of pebs records %ld\n",
|
||||
(long)(top - at) / x86_pmu.pebs_record_size);
|
||||
|
||||
for (; at < top; at += x86_pmu.pebs_record_size) {
|
||||
for (at = base; at < top; at += x86_pmu.pebs_record_size) {
|
||||
struct pebs_record_nhm *p = at;
|
||||
|
||||
for_each_set_bit(bit, (unsigned long *)&p->status,
|
||||
x86_pmu.max_pebs_events) {
|
||||
event = cpuc->events[bit];
|
||||
if (!test_bit(bit, cpuc->active_mask))
|
||||
continue;
|
||||
/* PEBS v3 has accurate status bits */
|
||||
if (x86_pmu.intel_cap.pebs_format >= 3) {
|
||||
for_each_set_bit(bit, (unsigned long *)&p->status,
|
||||
MAX_PEBS_EVENTS)
|
||||
counts[bit]++;
|
||||
|
||||
WARN_ON_ONCE(!event);
|
||||
|
||||
if (!event->attr.precise_ip)
|
||||
continue;
|
||||
|
||||
if (__test_and_set_bit(bit, (unsigned long *)&status))
|
||||
continue;
|
||||
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!event || bit >= x86_pmu.max_pebs_events)
|
||||
bit = find_first_bit((unsigned long *)&p->status,
|
||||
x86_pmu.max_pebs_events);
|
||||
if (bit >= x86_pmu.max_pebs_events)
|
||||
continue;
|
||||
if (!test_bit(bit, cpuc->active_mask))
|
||||
continue;
|
||||
/*
|
||||
* The PEBS hardware does not deal well with the situation
|
||||
* when events happen near to each other and multiple bits
|
||||
* are set. But it should happen rarely.
|
||||
*
|
||||
* If these events include one PEBS and multiple non-PEBS
|
||||
* events, it doesn't impact PEBS record. The record will
|
||||
* be handled normally. (slow path)
|
||||
*
|
||||
* If these events include two or more PEBS events, the
|
||||
* records for the events can be collapsed into a single
|
||||
* one, and it's not possible to reconstruct all events
|
||||
* that caused the PEBS record. It's called collision.
|
||||
* If collision happened, the record will be dropped.
|
||||
*
|
||||
*/
|
||||
if (p->status != (1 << bit)) {
|
||||
u64 pebs_status;
|
||||
|
||||
__intel_pmu_pebs_event(event, iregs, at);
|
||||
/* slow path */
|
||||
pebs_status = p->status & cpuc->pebs_enabled;
|
||||
pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
|
||||
if (pebs_status != (1 << bit)) {
|
||||
for_each_set_bit(i, (unsigned long *)&pebs_status,
|
||||
MAX_PEBS_EVENTS)
|
||||
error[i]++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
counts[bit]++;
|
||||
}
|
||||
|
||||
for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
|
||||
if ((counts[bit] == 0) && (error[bit] == 0))
|
||||
continue;
|
||||
event = cpuc->events[bit];
|
||||
WARN_ON_ONCE(!event);
|
||||
WARN_ON_ONCE(!event->attr.precise_ip);
|
||||
|
||||
/* log dropped samples number */
|
||||
if (error[bit])
|
||||
perf_log_lost_samples(event, error[bit]);
|
||||
|
||||
if (counts[bit]) {
|
||||
__intel_pmu_pebs_event(event, iregs, base,
|
||||
top, bit, counts[bit]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -96,6 +96,7 @@ enum {
|
||||
X86_BR_NO_TX = 1 << 14,/* not in transaction */
|
||||
X86_BR_ZERO_CALL = 1 << 15,/* zero length call */
|
||||
X86_BR_CALL_STACK = 1 << 16,/* call stack */
|
||||
X86_BR_IND_JMP = 1 << 17,/* indirect jump */
|
||||
};
|
||||
|
||||
#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
|
||||
@@ -113,6 +114,7 @@ enum {
|
||||
X86_BR_IRQ |\
|
||||
X86_BR_ABORT |\
|
||||
X86_BR_IND_CALL |\
|
||||
X86_BR_IND_JMP |\
|
||||
X86_BR_ZERO_CALL)
|
||||
|
||||
#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
|
||||
@@ -262,9 +264,6 @@ void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct x86_perf_task_context *task_ctx;
|
||||
|
||||
if (!x86_pmu.lbr_nr)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If LBR callstack feature is enabled and the stack was saved when
|
||||
* the task was scheduled out, restore the stack. Otherwise flush
|
||||
@@ -523,6 +522,9 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
|
||||
X86_BR_CALL_STACK;
|
||||
}
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
|
||||
mask |= X86_BR_IND_JMP;
|
||||
|
||||
/*
|
||||
* stash actual user request into reg, it may
|
||||
* be used by fixup code for some CPU
|
||||
@@ -736,7 +738,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
|
||||
break;
|
||||
case 4:
|
||||
case 5:
|
||||
ret = X86_BR_JMP;
|
||||
ret = X86_BR_IND_JMP;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
@@ -844,6 +846,7 @@ static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
|
||||
*/
|
||||
[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
|
||||
[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
|
||||
[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
|
||||
};
|
||||
|
||||
static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
|
||||
@@ -856,6 +859,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
|
||||
| LBR_FAR,
|
||||
[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
|
||||
[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
|
||||
[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
|
||||
};
|
||||
|
||||
static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
|
||||
@@ -870,6 +874,7 @@ static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
|
||||
[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
|
||||
[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
|
||||
| LBR_RETURN | LBR_CALL_STACK,
|
||||
[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
|
||||
};
|
||||
|
||||
/* core */
|
||||
|
@@ -187,15 +187,6 @@ static bool pt_event_valid(struct perf_event *event)
|
||||
* These all are cpu affine and operate on a local PT
|
||||
*/
|
||||
|
||||
static bool pt_is_running(void)
|
||||
{
|
||||
u64 ctl;
|
||||
|
||||
rdmsrl(MSR_IA32_RTIT_CTL, ctl);
|
||||
|
||||
return !!(ctl & RTIT_CTL_TRACEEN);
|
||||
}
|
||||
|
||||
static void pt_config(struct perf_event *event)
|
||||
{
|
||||
u64 reg;
|
||||
@@ -609,7 +600,12 @@ static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
|
||||
* @handle: Current output handle.
|
||||
*
|
||||
* Place INT and STOP marks to prevent overwriting old data that the consumer
|
||||
* hasn't yet collected.
|
||||
* hasn't yet collected and waking up the consumer after a certain fraction of
|
||||
* the buffer has filled up. Only needed and sensible for non-snapshot counters.
|
||||
*
|
||||
* This obviously relies on buf::head to figure out buffer markers, so it has
|
||||
* to be called after pt_buffer_reset_offsets() and before the hardware tracing
|
||||
* is enabled.
|
||||
*/
|
||||
static int pt_buffer_reset_markers(struct pt_buffer *buf,
|
||||
struct perf_output_handle *handle)
|
||||
@@ -618,9 +614,6 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
|
||||
unsigned long head = local64_read(&buf->head);
|
||||
unsigned long idx, npages, wakeup;
|
||||
|
||||
if (buf->snapshot)
|
||||
return 0;
|
||||
|
||||
/* can't stop in the middle of an output region */
|
||||
if (buf->output_off + handle->size + 1 <
|
||||
sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size))
|
||||
@@ -674,7 +667,7 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
|
||||
struct topa *cur = buf->first, *prev = buf->last;
|
||||
struct topa_entry *te_cur = TOPA_ENTRY(cur, 0),
|
||||
*te_prev = TOPA_ENTRY(prev, prev->last - 1);
|
||||
int pg = 0, idx = 0, ntopa = 0;
|
||||
int pg = 0, idx = 0;
|
||||
|
||||
while (pg < buf->nr_pages) {
|
||||
int tidx;
|
||||
@@ -689,9 +682,9 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
|
||||
/* advance to next topa table */
|
||||
idx = 0;
|
||||
cur = list_entry(cur->list.next, struct topa, list);
|
||||
ntopa++;
|
||||
} else
|
||||
} else {
|
||||
idx++;
|
||||
}
|
||||
te_cur = TOPA_ENTRY(cur, idx);
|
||||
}
|
||||
|
||||
@@ -703,7 +696,14 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
|
||||
* @head: Write pointer (aux_head) from AUX buffer.
|
||||
*
|
||||
* Find the ToPA table and entry corresponding to given @head and set buffer's
|
||||
* "current" pointers accordingly.
|
||||
* "current" pointers accordingly. This is done after we have obtained the
|
||||
* current aux_head position from a successful call to perf_aux_output_begin()
|
||||
* to make sure the hardware is writing to the right place.
|
||||
*
|
||||
* This function modifies buf::{cur,cur_idx,output_off} that will be programmed
|
||||
* into PT msrs when the tracing is enabled and buf::head and buf::data_size,
|
||||
* which are used to determine INT and STOP markers' locations by a subsequent
|
||||
* call to pt_buffer_reset_markers().
|
||||
*/
|
||||
static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
|
||||
{
|
||||
@@ -901,6 +901,7 @@ void intel_pt_interrupt(void)
|
||||
}
|
||||
|
||||
pt_buffer_reset_offsets(buf, pt->handle.head);
|
||||
/* snapshot counters don't use PMI, so it's safe */
|
||||
ret = pt_buffer_reset_markers(buf, &pt->handle);
|
||||
if (ret) {
|
||||
perf_aux_output_end(&pt->handle, 0, true);
|
||||
@@ -923,7 +924,7 @@ static void pt_event_start(struct perf_event *event, int mode)
|
||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||
struct pt_buffer *buf = perf_get_aux(&pt->handle);
|
||||
|
||||
if (pt_is_running() || !buf || pt_buffer_is_full(buf, pt)) {
|
||||
if (!buf || pt_buffer_is_full(buf, pt)) {
|
||||
event->hw.state = PERF_HES_STOPPED;
|
||||
return;
|
||||
}
|
||||
@@ -954,7 +955,6 @@ static void pt_event_stop(struct perf_event *event, int mode)
|
||||
event->hw.state = PERF_HES_STOPPED;
|
||||
|
||||
if (mode & PERF_EF_UPDATE) {
|
||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||
struct pt_buffer *buf = perf_get_aux(&pt->handle);
|
||||
|
||||
if (!buf)
|
||||
|
@@ -922,6 +922,9 @@ static int __init uncore_pci_init(void)
|
||||
case 69: /* Haswell Celeron */
|
||||
ret = hsw_uncore_pci_init();
|
||||
break;
|
||||
case 61: /* Broadwell */
|
||||
ret = bdw_uncore_pci_init();
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
@@ -325,6 +325,7 @@ extern struct event_constraint uncore_constraint_empty;
|
||||
int snb_uncore_pci_init(void);
|
||||
int ivb_uncore_pci_init(void);
|
||||
int hsw_uncore_pci_init(void);
|
||||
int bdw_uncore_pci_init(void);
|
||||
void snb_uncore_cpu_init(void);
|
||||
void nhm_uncore_cpu_init(void);
|
||||
|
||||
|
@@ -7,6 +7,7 @@
|
||||
#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
|
||||
#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00
|
||||
#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04
|
||||
#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604
|
||||
|
||||
/* SNB event control */
|
||||
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
|
||||
@@ -486,6 +487,14 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = {
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static const struct pci_device_id bdw_uncore_pci_ids[] = {
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static struct pci_driver snb_uncore_pci_driver = {
|
||||
.name = "snb_uncore",
|
||||
.id_table = snb_uncore_pci_ids,
|
||||
@@ -501,6 +510,11 @@ static struct pci_driver hsw_uncore_pci_driver = {
|
||||
.id_table = hsw_uncore_pci_ids,
|
||||
};
|
||||
|
||||
static struct pci_driver bdw_uncore_pci_driver = {
|
||||
.name = "bdw_uncore",
|
||||
.id_table = bdw_uncore_pci_ids,
|
||||
};
|
||||
|
||||
struct imc_uncore_pci_dev {
|
||||
__u32 pci_id;
|
||||
struct pci_driver *driver;
|
||||
@@ -514,6 +528,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
|
||||
IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
|
||||
IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */
|
||||
IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */
|
||||
IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */
|
||||
{ /* end marker */ }
|
||||
};
|
||||
|
||||
@@ -561,6 +576,11 @@ int hsw_uncore_pci_init(void)
|
||||
return imc_uncore_pci_init();
|
||||
}
|
||||
|
||||
int bdw_uncore_pci_init(void)
|
||||
{
|
||||
return imc_uncore_pci_init();
|
||||
}
|
||||
|
||||
/* end of Sandy Bridge uncore support */
|
||||
|
||||
/* Nehalem uncore support */
|
||||
|
Reference in New Issue
Block a user