Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf changes from Ingo Molnar: "Main changes: Kernel side changes: - Add SNB/IVB/HSW client uncore memory controller support (Stephane Eranian) - Fix various x86/P4 PMU driver bugs (Don Zickus) Tooling, user visible changes: - Add several futex 'perf bench' microbenchmarks (Davidlohr Bueso) - Speed up thread map generation (Don Zickus) - Introduce 'perf kvm --list-cmds' command line option for use by scripts (Ramkumar Ramachandra) - Print the evsel name in the annotate stdio output, prep to fix support outputting annotation for multiple events, not just for the first one (Arnaldo Carvalho de Melo) - Allow setting preferred callchain method in .perfconfig (Jiri Olsa) - Show in what binaries/modules 'perf probe's are set (Masami Hiramatsu) - Support distro-style debuginfo for uprobe in 'perf probe' (Masami Hiramatsu) Tooling, internal changes and fixes: - Use tid in mmap/mmap2 events to find maps (Don Zickus) - Record the reason for filtering an address_location (Namhyung Kim) - Apply all filters to an addr_location (Namhyung Kim) - Merge al->filtered with hist_entry->filtered in report/hists (Namhyung Kim) - Fix memory leak when synthesizing thread records (Namhyung Kim) - Use ui__has_annotation() in 'report' (Namhyung Kim) - hists browser refactorings to reuse code accross UIs (Namhyung Kim) - Add support for the new DWARF unwinder library in elfutils (Jiri Olsa) - Fix build race in the generation of bison files (Jiri Olsa) - Further streamline the feature detection display, trimming it a bit to show just the libraries detected, using VF=1 gets a more verbose output, showing the less interesting feature checks as well (Jiri Olsa). - Check compatible symtab type before loading dso (Namhyung Kim) - Check return value of filename__read_debuglink() (Stephane Eranian) - Move some hashing and fs related code from tools/perf/util/ to tools/lib/ so that it can be used by more tools/ living utilities (Borislav Petkov) - Prepare DWARF unwinding code for using an elfutils alternative unwinding library (Jiri Olsa) - Fix DWARF unwind max_stack processing (Jiri Olsa) - Add dwarf unwind 'perf test' entry (Jiri Olsa) - 'perf probe' improvements including memory leak fixes, sharing the intlist class with other tools, uprobes/kprobes code sharing and use of ref_reloc_sym (Masami Hiramatsu) - Shorten sample symbol resolving by adding cpumode to struct addr_location (Arnaldo Carvalho de Melo) - Fix synthesizing mmaps for threads (Don Zickus) - Fix invalid output on event group stdio report (Namhyung Kim) - Fixup header alignment in 'perf sched latency' output (Ramkumar Ramachandra) - Fix off-by-one error in 'perf timechart record' argv handling (Ramkumar Ramachandra) Tooling, cleanups: - Remove unused thread__find_map function (Jiri Olsa) - Remove unused simple_strtoul() function (Ramkumar Ramachandra) Tooling, documentation updates: - Update function names in debug messages (Ramkumar Ramachandra) - Update some code references in design.txt (Ramkumar Ramachandra) - Clarify load-latency information in the 'perf mem' docs (Andi Kleen) - Clarify x86 register naming in 'perf probe' docs (Andi Kleen)" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (96 commits) perf tools: Remove unused simple_strtoul() function perf tools: Update some code references in design.txt perf evsel: Update function names in debug messages perf tools: Remove thread__find_map function perf annotate: Print the evsel name in the stdio output perf report: Use ui__has_annotation() perf tools: Fix memory leak when synthesizing thread records perf tools: Use tid in mmap/mmap2 events to find maps perf report: Merge al->filtered with hist_entry->filtered perf symbols: Apply all filters to an addr_location perf symbols: Record the reason for filtering an address_location perf sched: Fixup header alignment in 'latency' output perf timechart: Fix off-by-one error in 'record' argv handling perf machine: Factor machine__find_thread to take tid argument perf tools: Speed up thread map generation perf kvm: introduce --list-cmds for use by scripts perf ui hists: Pass evsel to hpp->header/width functions explicitly perf symbols: Introduce thread__find_cpumode_addr_location perf session: Change header.misc dump from decimal to hex perf ui/tui: Reuse generic __hpp__fmt() code ...
This commit is contained in:
@@ -892,7 +892,6 @@ static void x86_pmu_enable(struct pmu *pmu)
|
||||
* hw_perf_group_sched_in() or x86_pmu_enable()
|
||||
*
|
||||
* step1: save events moving to new counters
|
||||
* step2: reprogram moved events into new counters
|
||||
*/
|
||||
for (i = 0; i < n_running; i++) {
|
||||
event = cpuc->event_list[i];
|
||||
@@ -918,6 +917,9 @@ static void x86_pmu_enable(struct pmu *pmu)
|
||||
x86_pmu_stop(event, PERF_EF_UPDATE);
|
||||
}
|
||||
|
||||
/*
|
||||
* step2: reprogram moved events into new counters
|
||||
*/
|
||||
for (i = 0; i < cpuc->n_events; i++) {
|
||||
event = cpuc->event_list[i];
|
||||
hwc = &event->hw;
|
||||
@@ -1043,7 +1045,7 @@ static int x86_pmu_add(struct perf_event *event, int flags)
|
||||
/*
|
||||
* If group events scheduling transaction was started,
|
||||
* skip the schedulability test here, it will be performed
|
||||
* at commit time (->commit_txn) as a whole
|
||||
* at commit time (->commit_txn) as a whole.
|
||||
*/
|
||||
if (cpuc->group_flag & PERF_EVENT_TXN)
|
||||
goto done_collect;
|
||||
@@ -1058,6 +1060,10 @@ static int x86_pmu_add(struct perf_event *event, int flags)
|
||||
memcpy(cpuc->assign, assign, n*sizeof(int));
|
||||
|
||||
done_collect:
|
||||
/*
|
||||
* Commit the collect_events() state. See x86_pmu_del() and
|
||||
* x86_pmu_*_txn().
|
||||
*/
|
||||
cpuc->n_events = n;
|
||||
cpuc->n_added += n - n0;
|
||||
cpuc->n_txn += n - n0;
|
||||
@@ -1183,28 +1189,38 @@ static void x86_pmu_del(struct perf_event *event, int flags)
|
||||
* If we're called during a txn, we don't need to do anything.
|
||||
* The events never got scheduled and ->cancel_txn will truncate
|
||||
* the event_list.
|
||||
*
|
||||
* XXX assumes any ->del() called during a TXN will only be on
|
||||
* an event added during that same TXN.
|
||||
*/
|
||||
if (cpuc->group_flag & PERF_EVENT_TXN)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Not a TXN, therefore cleanup properly.
|
||||
*/
|
||||
x86_pmu_stop(event, PERF_EF_UPDATE);
|
||||
|
||||
for (i = 0; i < cpuc->n_events; i++) {
|
||||
if (event == cpuc->event_list[i]) {
|
||||
|
||||
if (i >= cpuc->n_events - cpuc->n_added)
|
||||
--cpuc->n_added;
|
||||
|
||||
if (x86_pmu.put_event_constraints)
|
||||
x86_pmu.put_event_constraints(cpuc, event);
|
||||
|
||||
while (++i < cpuc->n_events)
|
||||
cpuc->event_list[i-1] = cpuc->event_list[i];
|
||||
|
||||
--cpuc->n_events;
|
||||
if (event == cpuc->event_list[i])
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */
|
||||
return;
|
||||
|
||||
/* If we have a newly added event; make sure to decrease n_added. */
|
||||
if (i >= cpuc->n_events - cpuc->n_added)
|
||||
--cpuc->n_added;
|
||||
|
||||
if (x86_pmu.put_event_constraints)
|
||||
x86_pmu.put_event_constraints(cpuc, event);
|
||||
|
||||
/* Delete the array entry. */
|
||||
while (++i < cpuc->n_events)
|
||||
cpuc->event_list[i-1] = cpuc->event_list[i];
|
||||
--cpuc->n_events;
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
}
|
||||
|
||||
@@ -1598,7 +1614,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
|
||||
{
|
||||
__this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN);
|
||||
/*
|
||||
* Truncate the collected events.
|
||||
* Truncate collected array by the number of events added in this
|
||||
* transaction. See x86_pmu_add() and x86_pmu_*_txn().
|
||||
*/
|
||||
__this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
|
||||
__this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
|
||||
@@ -1609,6 +1626,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
|
||||
* Commit group events scheduling transaction
|
||||
* Perform the group schedulability test as a whole
|
||||
* Return 0 if success
|
||||
*
|
||||
* Does not cancel the transaction on failure; expects the caller to do this.
|
||||
*/
|
||||
static int x86_pmu_commit_txn(struct pmu *pmu)
|
||||
{
|
||||
|
@@ -130,9 +130,11 @@ struct cpu_hw_events {
|
||||
unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||
int enabled;
|
||||
|
||||
int n_events;
|
||||
int n_added;
|
||||
int n_txn;
|
||||
int n_events; /* the # of events in the below arrays */
|
||||
int n_added; /* the # last events in the below arrays;
|
||||
they've never been enabled yet */
|
||||
int n_txn; /* the # last events in the below arrays;
|
||||
added in the current transaction */
|
||||
int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
|
||||
u64 tags[X86_PMC_IDX_MAX];
|
||||
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
|
||||
|
@@ -66,6 +66,47 @@ DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63");
|
||||
|
||||
static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
|
||||
static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
|
||||
static void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event);
|
||||
static void uncore_pmu_event_read(struct perf_event *event);
|
||||
|
||||
static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
|
||||
{
|
||||
return container_of(event->pmu, struct intel_uncore_pmu, pmu);
|
||||
}
|
||||
|
||||
static struct intel_uncore_box *
|
||||
uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
|
||||
{
|
||||
struct intel_uncore_box *box;
|
||||
|
||||
box = *per_cpu_ptr(pmu->box, cpu);
|
||||
if (box)
|
||||
return box;
|
||||
|
||||
raw_spin_lock(&uncore_box_lock);
|
||||
list_for_each_entry(box, &pmu->box_list, list) {
|
||||
if (box->phys_id == topology_physical_package_id(cpu)) {
|
||||
atomic_inc(&box->refcnt);
|
||||
*per_cpu_ptr(pmu->box, cpu) = box;
|
||||
break;
|
||||
}
|
||||
}
|
||||
raw_spin_unlock(&uncore_box_lock);
|
||||
|
||||
return *per_cpu_ptr(pmu->box, cpu);
|
||||
}
|
||||
|
||||
static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
|
||||
{
|
||||
/*
|
||||
* perf core schedules event on the basis of cpu, uncore events are
|
||||
* collected by one of the cpus inside a physical package.
|
||||
*/
|
||||
return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
|
||||
}
|
||||
|
||||
static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
|
||||
{
|
||||
u64 count;
|
||||
@@ -1639,6 +1680,349 @@ static struct intel_uncore_type *snb_msr_uncores[] = {
|
||||
&snb_uncore_cbox,
|
||||
NULL,
|
||||
};
|
||||
|
||||
enum {
|
||||
SNB_PCI_UNCORE_IMC,
|
||||
};
|
||||
|
||||
static struct uncore_event_desc snb_uncore_imc_events[] = {
|
||||
INTEL_UNCORE_EVENT_DESC(data_reads, "event=0x01"),
|
||||
INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"),
|
||||
INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"),
|
||||
|
||||
INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"),
|
||||
INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"),
|
||||
INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"),
|
||||
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
#define SNB_UNCORE_PCI_IMC_EVENT_MASK 0xff
|
||||
#define SNB_UNCORE_PCI_IMC_BAR_OFFSET 0x48
|
||||
|
||||
/* page size multiple covering all config regs */
|
||||
#define SNB_UNCORE_PCI_IMC_MAP_SIZE 0x6000
|
||||
|
||||
#define SNB_UNCORE_PCI_IMC_DATA_READS 0x1
|
||||
#define SNB_UNCORE_PCI_IMC_DATA_READS_BASE 0x5050
|
||||
#define SNB_UNCORE_PCI_IMC_DATA_WRITES 0x2
|
||||
#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054
|
||||
#define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE
|
||||
|
||||
static struct attribute *snb_uncore_imc_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group snb_uncore_imc_format_group = {
|
||||
.name = "format",
|
||||
.attrs = snb_uncore_imc_formats_attr,
|
||||
};
|
||||
|
||||
static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
|
||||
{
|
||||
struct pci_dev *pdev = box->pci_dev;
|
||||
int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET;
|
||||
resource_size_t addr;
|
||||
u32 pci_dword;
|
||||
|
||||
pci_read_config_dword(pdev, where, &pci_dword);
|
||||
addr = pci_dword;
|
||||
|
||||
#ifdef CONFIG_PHYS_ADDR_T_64BIT
|
||||
pci_read_config_dword(pdev, where + 4, &pci_dword);
|
||||
addr |= ((resource_size_t)pci_dword << 32);
|
||||
#endif
|
||||
|
||||
addr &= ~(PAGE_SIZE - 1);
|
||||
|
||||
box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE);
|
||||
box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
|
||||
}
|
||||
|
||||
static void snb_uncore_imc_enable_box(struct intel_uncore_box *box)
|
||||
{}
|
||||
|
||||
static void snb_uncore_imc_disable_box(struct intel_uncore_box *box)
|
||||
{}
|
||||
|
||||
static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event)
|
||||
{}
|
||||
|
||||
static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event)
|
||||
{}
|
||||
|
||||
static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
return (u64)*(unsigned int *)(box->io_addr + hwc->event_base);
|
||||
}
|
||||
|
||||
/*
|
||||
* custom event_init() function because we define our own fixed, free
|
||||
* running counters, so we do not want to conflict with generic uncore
|
||||
* logic. Also simplifies processing
|
||||
*/
|
||||
static int snb_uncore_imc_event_init(struct perf_event *event)
|
||||
{
|
||||
struct intel_uncore_pmu *pmu;
|
||||
struct intel_uncore_box *box;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK;
|
||||
int idx, base;
|
||||
|
||||
if (event->attr.type != event->pmu->type)
|
||||
return -ENOENT;
|
||||
|
||||
pmu = uncore_event_to_pmu(event);
|
||||
/* no device found for this pmu */
|
||||
if (pmu->func_id < 0)
|
||||
return -ENOENT;
|
||||
|
||||
/* Sampling not supported yet */
|
||||
if (hwc->sample_period)
|
||||
return -EINVAL;
|
||||
|
||||
/* unsupported modes and filters */
|
||||
if (event->attr.exclude_user ||
|
||||
event->attr.exclude_kernel ||
|
||||
event->attr.exclude_hv ||
|
||||
event->attr.exclude_idle ||
|
||||
event->attr.exclude_host ||
|
||||
event->attr.exclude_guest ||
|
||||
event->attr.sample_period) /* no sampling */
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Place all uncore events for a particular physical package
|
||||
* onto a single cpu
|
||||
*/
|
||||
if (event->cpu < 0)
|
||||
return -EINVAL;
|
||||
|
||||
/* check only supported bits are set */
|
||||
if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
box = uncore_pmu_to_box(pmu, event->cpu);
|
||||
if (!box || box->cpu < 0)
|
||||
return -EINVAL;
|
||||
|
||||
event->cpu = box->cpu;
|
||||
|
||||
event->hw.idx = -1;
|
||||
event->hw.last_tag = ~0ULL;
|
||||
event->hw.extra_reg.idx = EXTRA_REG_NONE;
|
||||
event->hw.branch_reg.idx = EXTRA_REG_NONE;
|
||||
/*
|
||||
* check event is known (whitelist, determines counter)
|
||||
*/
|
||||
switch (cfg) {
|
||||
case SNB_UNCORE_PCI_IMC_DATA_READS:
|
||||
base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE;
|
||||
idx = UNCORE_PMC_IDX_FIXED;
|
||||
break;
|
||||
case SNB_UNCORE_PCI_IMC_DATA_WRITES:
|
||||
base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE;
|
||||
idx = UNCORE_PMC_IDX_FIXED + 1;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* must be done before validate_group */
|
||||
event->hw.event_base = base;
|
||||
event->hw.config = cfg;
|
||||
event->hw.idx = idx;
|
||||
|
||||
/* no group validation needed, we have free running counters */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void snb_uncore_imc_event_start(struct perf_event *event, int flags)
|
||||
{
|
||||
struct intel_uncore_box *box = uncore_event_to_box(event);
|
||||
u64 count;
|
||||
|
||||
if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
|
||||
return;
|
||||
|
||||
event->hw.state = 0;
|
||||
box->n_active++;
|
||||
|
||||
list_add_tail(&event->active_entry, &box->active_list);
|
||||
|
||||
count = snb_uncore_imc_read_counter(box, event);
|
||||
local64_set(&event->hw.prev_count, count);
|
||||
|
||||
if (box->n_active == 1)
|
||||
uncore_pmu_start_hrtimer(box);
|
||||
}
|
||||
|
||||
static void snb_uncore_imc_event_stop(struct perf_event *event, int flags)
|
||||
{
|
||||
struct intel_uncore_box *box = uncore_event_to_box(event);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if (!(hwc->state & PERF_HES_STOPPED)) {
|
||||
box->n_active--;
|
||||
|
||||
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
|
||||
hwc->state |= PERF_HES_STOPPED;
|
||||
|
||||
list_del(&event->active_entry);
|
||||
|
||||
if (box->n_active == 0)
|
||||
uncore_pmu_cancel_hrtimer(box);
|
||||
}
|
||||
|
||||
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
|
||||
/*
|
||||
* Drain the remaining delta count out of a event
|
||||
* that we are disabling:
|
||||
*/
|
||||
uncore_perf_event_update(box, event);
|
||||
hwc->state |= PERF_HES_UPTODATE;
|
||||
}
|
||||
}
|
||||
|
||||
static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
|
||||
{
|
||||
struct intel_uncore_box *box = uncore_event_to_box(event);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if (!box)
|
||||
return -ENODEV;
|
||||
|
||||
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
|
||||
if (!(flags & PERF_EF_START))
|
||||
hwc->state |= PERF_HES_ARCH;
|
||||
|
||||
snb_uncore_imc_event_start(event, 0);
|
||||
|
||||
box->n_events++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
|
||||
{
|
||||
struct intel_uncore_box *box = uncore_event_to_box(event);
|
||||
int i;
|
||||
|
||||
snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
|
||||
|
||||
for (i = 0; i < box->n_events; i++) {
|
||||
if (event == box->event_list[i]) {
|
||||
--box->n_events;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int snb_pci2phy_map_init(int devid)
|
||||
{
|
||||
struct pci_dev *dev = NULL;
|
||||
int bus;
|
||||
|
||||
dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev);
|
||||
if (!dev)
|
||||
return -ENOTTY;
|
||||
|
||||
bus = dev->bus->number;
|
||||
|
||||
pcibus_to_physid[bus] = 0;
|
||||
|
||||
pci_dev_put(dev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct pmu snb_uncore_imc_pmu = {
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.event_init = snb_uncore_imc_event_init,
|
||||
.add = snb_uncore_imc_event_add,
|
||||
.del = snb_uncore_imc_event_del,
|
||||
.start = snb_uncore_imc_event_start,
|
||||
.stop = snb_uncore_imc_event_stop,
|
||||
.read = uncore_pmu_event_read,
|
||||
};
|
||||
|
||||
static struct intel_uncore_ops snb_uncore_imc_ops = {
|
||||
.init_box = snb_uncore_imc_init_box,
|
||||
.enable_box = snb_uncore_imc_enable_box,
|
||||
.disable_box = snb_uncore_imc_disable_box,
|
||||
.disable_event = snb_uncore_imc_disable_event,
|
||||
.enable_event = snb_uncore_imc_enable_event,
|
||||
.hw_config = snb_uncore_imc_hw_config,
|
||||
.read_counter = snb_uncore_imc_read_counter,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type snb_uncore_imc = {
|
||||
.name = "imc",
|
||||
.num_counters = 2,
|
||||
.num_boxes = 1,
|
||||
.fixed_ctr_bits = 32,
|
||||
.fixed_ctr = SNB_UNCORE_PCI_IMC_CTR_BASE,
|
||||
.event_descs = snb_uncore_imc_events,
|
||||
.format_group = &snb_uncore_imc_format_group,
|
||||
.perf_ctr = SNB_UNCORE_PCI_IMC_DATA_READS_BASE,
|
||||
.event_mask = SNB_UNCORE_PCI_IMC_EVENT_MASK,
|
||||
.ops = &snb_uncore_imc_ops,
|
||||
.pmu = &snb_uncore_imc_pmu,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type *snb_pci_uncores[] = {
|
||||
[SNB_PCI_UNCORE_IMC] = &snb_uncore_imc,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static DEFINE_PCI_DEVICE_TABLE(snb_uncore_pci_ids) = {
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static DEFINE_PCI_DEVICE_TABLE(ivb_uncore_pci_ids) = {
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static DEFINE_PCI_DEVICE_TABLE(hsw_uncore_pci_ids) = {
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static struct pci_driver snb_uncore_pci_driver = {
|
||||
.name = "snb_uncore",
|
||||
.id_table = snb_uncore_pci_ids,
|
||||
};
|
||||
|
||||
static struct pci_driver ivb_uncore_pci_driver = {
|
||||
.name = "ivb_uncore",
|
||||
.id_table = ivb_uncore_pci_ids,
|
||||
};
|
||||
|
||||
static struct pci_driver hsw_uncore_pci_driver = {
|
||||
.name = "hsw_uncore",
|
||||
.id_table = hsw_uncore_pci_ids,
|
||||
};
|
||||
|
||||
/* end of Sandy Bridge uncore support */
|
||||
|
||||
/* Nehalem uncore support */
|
||||
@@ -2789,6 +3173,7 @@ again:
|
||||
static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
|
||||
{
|
||||
struct intel_uncore_box *box;
|
||||
struct perf_event *event;
|
||||
unsigned long flags;
|
||||
int bit;
|
||||
|
||||
@@ -2801,19 +3186,27 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
|
||||
/*
|
||||
* handle boxes with an active event list as opposed to active
|
||||
* counters
|
||||
*/
|
||||
list_for_each_entry(event, &box->active_list, active_entry) {
|
||||
uncore_perf_event_update(box, event);
|
||||
}
|
||||
|
||||
for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
|
||||
uncore_perf_event_update(box, box->events[bit]);
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
hrtimer_forward_now(hrtimer, ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL));
|
||||
hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
|
||||
return HRTIMER_RESTART;
|
||||
}
|
||||
|
||||
static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
|
||||
{
|
||||
__hrtimer_start_range_ns(&box->hrtimer,
|
||||
ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL), 0,
|
||||
ns_to_ktime(box->hrtimer_duration), 0,
|
||||
HRTIMER_MODE_REL_PINNED, 0);
|
||||
}
|
||||
|
||||
@@ -2847,45 +3240,14 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
|
||||
box->cpu = -1;
|
||||
box->phys_id = -1;
|
||||
|
||||
/* set default hrtimer timeout */
|
||||
box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
|
||||
|
||||
INIT_LIST_HEAD(&box->active_list);
|
||||
|
||||
return box;
|
||||
}
|
||||
|
||||
static struct intel_uncore_box *
|
||||
uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
|
||||
{
|
||||
struct intel_uncore_box *box;
|
||||
|
||||
box = *per_cpu_ptr(pmu->box, cpu);
|
||||
if (box)
|
||||
return box;
|
||||
|
||||
raw_spin_lock(&uncore_box_lock);
|
||||
list_for_each_entry(box, &pmu->box_list, list) {
|
||||
if (box->phys_id == topology_physical_package_id(cpu)) {
|
||||
atomic_inc(&box->refcnt);
|
||||
*per_cpu_ptr(pmu->box, cpu) = box;
|
||||
break;
|
||||
}
|
||||
}
|
||||
raw_spin_unlock(&uncore_box_lock);
|
||||
|
||||
return *per_cpu_ptr(pmu->box, cpu);
|
||||
}
|
||||
|
||||
static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
|
||||
{
|
||||
return container_of(event->pmu, struct intel_uncore_pmu, pmu);
|
||||
}
|
||||
|
||||
static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
|
||||
{
|
||||
/*
|
||||
* perf core schedules event on the basis of cpu, uncore events are
|
||||
* collected by one of the cpus inside a physical package.
|
||||
*/
|
||||
return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
|
||||
}
|
||||
|
||||
static int
|
||||
uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
|
||||
{
|
||||
@@ -3279,16 +3641,21 @@ static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
|
||||
{
|
||||
int ret;
|
||||
|
||||
pmu->pmu = (struct pmu) {
|
||||
.attr_groups = pmu->type->attr_groups,
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.event_init = uncore_pmu_event_init,
|
||||
.add = uncore_pmu_event_add,
|
||||
.del = uncore_pmu_event_del,
|
||||
.start = uncore_pmu_event_start,
|
||||
.stop = uncore_pmu_event_stop,
|
||||
.read = uncore_pmu_event_read,
|
||||
};
|
||||
if (!pmu->type->pmu) {
|
||||
pmu->pmu = (struct pmu) {
|
||||
.attr_groups = pmu->type->attr_groups,
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.event_init = uncore_pmu_event_init,
|
||||
.add = uncore_pmu_event_add,
|
||||
.del = uncore_pmu_event_del,
|
||||
.start = uncore_pmu_event_start,
|
||||
.stop = uncore_pmu_event_stop,
|
||||
.read = uncore_pmu_event_read,
|
||||
};
|
||||
} else {
|
||||
pmu->pmu = *pmu->type->pmu;
|
||||
pmu->pmu.attr_groups = pmu->type->attr_groups;
|
||||
}
|
||||
|
||||
if (pmu->type->num_boxes == 1) {
|
||||
if (strlen(pmu->type->name) > 0)
|
||||
@@ -3502,6 +3869,28 @@ static int __init uncore_pci_init(void)
|
||||
pci_uncores = ivt_pci_uncores;
|
||||
uncore_pci_driver = &ivt_uncore_pci_driver;
|
||||
break;
|
||||
case 42: /* Sandy Bridge */
|
||||
ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_SNB_IMC);
|
||||
if (ret)
|
||||
return ret;
|
||||
pci_uncores = snb_pci_uncores;
|
||||
uncore_pci_driver = &snb_uncore_pci_driver;
|
||||
break;
|
||||
case 58: /* Ivy Bridge */
|
||||
ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_IVB_IMC);
|
||||
if (ret)
|
||||
return ret;
|
||||
pci_uncores = snb_pci_uncores;
|
||||
uncore_pci_driver = &ivb_uncore_pci_driver;
|
||||
break;
|
||||
case 60: /* Haswell */
|
||||
case 69: /* Haswell Celeron */
|
||||
ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_HSW_IMC);
|
||||
if (ret)
|
||||
return ret;
|
||||
pci_uncores = snb_pci_uncores;
|
||||
uncore_pci_driver = &hsw_uncore_pci_driver;
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
@@ -3773,7 +4162,7 @@ static void __init uncore_cpu_setup(void *dummy)
|
||||
|
||||
static int __init uncore_cpu_init(void)
|
||||
{
|
||||
int ret, cpu, max_cores;
|
||||
int ret, max_cores;
|
||||
|
||||
max_cores = boot_cpu_data.x86_max_cores;
|
||||
switch (boot_cpu_data.x86_model) {
|
||||
@@ -3817,29 +4206,6 @@ static int __init uncore_cpu_init(void)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
get_online_cpus();
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
int i, phys_id = topology_physical_package_id(cpu);
|
||||
|
||||
for_each_cpu(i, &uncore_cpu_mask) {
|
||||
if (phys_id == topology_physical_package_id(i)) {
|
||||
phys_id = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (phys_id < 0)
|
||||
continue;
|
||||
|
||||
uncore_cpu_prepare(cpu, phys_id);
|
||||
uncore_event_init_cpu(cpu);
|
||||
}
|
||||
on_each_cpu(uncore_cpu_setup, NULL, 1);
|
||||
|
||||
register_cpu_notifier(&uncore_cpu_nb);
|
||||
|
||||
put_online_cpus();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3868,6 +4234,41 @@ static int __init uncore_pmus_register(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __init uncore_cpumask_init(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
/*
|
||||
* ony invoke once from msr or pci init code
|
||||
*/
|
||||
if (!cpumask_empty(&uncore_cpu_mask))
|
||||
return;
|
||||
|
||||
get_online_cpus();
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
int i, phys_id = topology_physical_package_id(cpu);
|
||||
|
||||
for_each_cpu(i, &uncore_cpu_mask) {
|
||||
if (phys_id == topology_physical_package_id(i)) {
|
||||
phys_id = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (phys_id < 0)
|
||||
continue;
|
||||
|
||||
uncore_cpu_prepare(cpu, phys_id);
|
||||
uncore_event_init_cpu(cpu);
|
||||
}
|
||||
on_each_cpu(uncore_cpu_setup, NULL, 1);
|
||||
|
||||
register_cpu_notifier(&uncore_cpu_nb);
|
||||
|
||||
put_online_cpus();
|
||||
}
|
||||
|
||||
|
||||
static int __init intel_uncore_init(void)
|
||||
{
|
||||
int ret;
|
||||
@@ -3886,6 +4287,7 @@ static int __init intel_uncore_init(void)
|
||||
uncore_pci_exit();
|
||||
goto fail;
|
||||
}
|
||||
uncore_cpumask_init();
|
||||
|
||||
uncore_pmus_register();
|
||||
return 0;
|
||||
|
@@ -6,6 +6,7 @@
|
||||
|
||||
#define UNCORE_PMU_NAME_LEN 32
|
||||
#define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC)
|
||||
#define UNCORE_SNB_IMC_HRTIMER_INTERVAL (5ULL * NSEC_PER_SEC)
|
||||
|
||||
#define UNCORE_FIXED_EVENT 0xff
|
||||
#define UNCORE_PMC_IDX_MAX_GENERIC 8
|
||||
@@ -440,6 +441,7 @@ struct intel_uncore_type {
|
||||
struct intel_uncore_ops *ops;
|
||||
struct uncore_event_desc *event_descs;
|
||||
const struct attribute_group *attr_groups[4];
|
||||
struct pmu *pmu; /* for custom pmu ops */
|
||||
};
|
||||
|
||||
#define pmu_group attr_groups[0]
|
||||
@@ -488,8 +490,11 @@ struct intel_uncore_box {
|
||||
u64 tags[UNCORE_PMC_IDX_MAX];
|
||||
struct pci_dev *pci_dev;
|
||||
struct intel_uncore_pmu *pmu;
|
||||
u64 hrtimer_duration; /* hrtimer timeout for this box */
|
||||
struct hrtimer hrtimer;
|
||||
struct list_head list;
|
||||
struct list_head active_list;
|
||||
void *io_addr;
|
||||
struct intel_uncore_extra_reg shared_regs[0];
|
||||
};
|
||||
|
||||
|
@@ -1257,7 +1257,24 @@ again:
|
||||
pass++;
|
||||
goto again;
|
||||
}
|
||||
|
||||
/*
|
||||
* Perf does test runs to see if a whole group can be assigned
|
||||
* together succesfully. There can be multiple rounds of this.
|
||||
* Unfortunately, p4_pmu_swap_config_ts touches the hwc->config
|
||||
* bits, such that the next round of group assignments will
|
||||
* cause the above p4_should_swap_ts to pass instead of fail.
|
||||
* This leads to counters exclusive to thread0 being used by
|
||||
* thread1.
|
||||
*
|
||||
* Solve this with a cheap hack, reset the idx back to -1 to
|
||||
* force a new lookup (p4_next_cntr) to get the right counter
|
||||
* for the right thread.
|
||||
*
|
||||
* This probably doesn't comply with the general spirit of how
|
||||
* perf wants to work, but P4 is special. :-(
|
||||
*/
|
||||
if (p4_should_swap_ts(hwc->config, cpu))
|
||||
hwc->idx = -1;
|
||||
p4_pmu_swap_config_ts(hwc, cpu);
|
||||
if (assign)
|
||||
assign[i] = cntr_idx;
|
||||
@@ -1322,6 +1339,7 @@ static __initconst const struct x86_pmu p4_pmu = {
|
||||
__init int p4_pmu_init(void)
|
||||
{
|
||||
unsigned int low, high;
|
||||
int i, reg;
|
||||
|
||||
/* If we get stripped -- indexing fails */
|
||||
BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC);
|
||||
@@ -1340,5 +1358,19 @@ __init int p4_pmu_init(void)
|
||||
|
||||
x86_pmu = p4_pmu;
|
||||
|
||||
/*
|
||||
* Even though the counters are configured to interrupt a particular
|
||||
* logical processor when an overflow happens, testing has shown that
|
||||
* on kdump kernels (which uses a single cpu), thread1's counter
|
||||
* continues to run and will report an NMI on thread0. Due to the
|
||||
* overflow bug, this leads to a stream of unknown NMIs.
|
||||
*
|
||||
* Solve this by zero'ing out the registers to mimic a reset.
|
||||
*/
|
||||
for (i = 0; i < x86_pmu.num_counters; i++) {
|
||||
reg = x86_pmu_config_addr(i);
|
||||
wrmsrl_safe(reg, 0ULL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@@ -87,6 +87,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
|
||||
#define nmi_to_desc(type) (&nmi_desc[type])
|
||||
|
||||
static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC;
|
||||
|
||||
static int __init nmi_warning_debugfs(void)
|
||||
{
|
||||
debugfs_create_u64("nmi_longest_ns", 0644,
|
||||
@@ -95,6 +96,20 @@ static int __init nmi_warning_debugfs(void)
|
||||
}
|
||||
fs_initcall(nmi_warning_debugfs);
|
||||
|
||||
static void nmi_max_handler(struct irq_work *w)
|
||||
{
|
||||
struct nmiaction *a = container_of(w, struct nmiaction, irq_work);
|
||||
int remainder_ns, decimal_msecs;
|
||||
u64 whole_msecs = ACCESS_ONCE(a->max_duration);
|
||||
|
||||
remainder_ns = do_div(whole_msecs, (1000 * 1000));
|
||||
decimal_msecs = remainder_ns / 1000;
|
||||
|
||||
printk_ratelimited(KERN_INFO
|
||||
"INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
|
||||
a->handler, whole_msecs, decimal_msecs);
|
||||
}
|
||||
|
||||
static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
|
||||
{
|
||||
struct nmi_desc *desc = nmi_to_desc(type);
|
||||
@@ -110,26 +125,20 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2
|
||||
* to handle those situations.
|
||||
*/
|
||||
list_for_each_entry_rcu(a, &desc->head, list) {
|
||||
u64 before, delta, whole_msecs;
|
||||
int remainder_ns, decimal_msecs, thishandled;
|
||||
int thishandled;
|
||||
u64 delta;
|
||||
|
||||
before = sched_clock();
|
||||
delta = sched_clock();
|
||||
thishandled = a->handler(type, regs);
|
||||
handled += thishandled;
|
||||
delta = sched_clock() - before;
|
||||
delta = sched_clock() - delta;
|
||||
trace_nmi_handler(a->handler, (int)delta, thishandled);
|
||||
|
||||
if (delta < nmi_longest_ns)
|
||||
if (delta < nmi_longest_ns || delta < a->max_duration)
|
||||
continue;
|
||||
|
||||
nmi_longest_ns = delta;
|
||||
whole_msecs = delta;
|
||||
remainder_ns = do_div(whole_msecs, (1000 * 1000));
|
||||
decimal_msecs = remainder_ns / 1000;
|
||||
printk_ratelimited(KERN_INFO
|
||||
"INFO: NMI handler (%ps) took too long to run: "
|
||||
"%lld.%03d msecs\n", a->handler, whole_msecs,
|
||||
decimal_msecs);
|
||||
a->max_duration = delta;
|
||||
irq_work_queue(&a->irq_work);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
@@ -146,6 +155,8 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
|
||||
if (!action->handler)
|
||||
return -EINVAL;
|
||||
|
||||
init_irq_work(&action->irq_work, nmi_max_handler);
|
||||
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
|
||||
/*
|
||||
|
Reference in New Issue
Block a user