Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf changes from Ingo Molnar:
 "Main changes:

  Kernel side changes:

   - Add SNB/IVB/HSW client uncore memory controller support (Stephane
     Eranian)

   - Fix various x86/P4 PMU driver bugs (Don Zickus)

  Tooling, user visible changes:

   - Add several futex 'perf bench' microbenchmarks (Davidlohr Bueso)

   - Speed up thread map generation (Don Zickus)

   - Introduce 'perf kvm --list-cmds' command line option for use by
     scripts (Ramkumar Ramachandra)

   - Print the evsel name in the annotate stdio output, prep to fix
     support outputting annotation for multiple events, not just for the
     first one (Arnaldo Carvalho de Melo)

   - Allow setting preferred callchain method in .perfconfig (Jiri Olsa)

   - Show in what binaries/modules 'perf probe's are set (Masami
     Hiramatsu)

   - Support distro-style debuginfo for uprobe in 'perf probe' (Masami
     Hiramatsu)

  Tooling, internal changes and fixes:

   - Use tid in mmap/mmap2 events to find maps (Don Zickus)

   - Record the reason for filtering an address_location (Namhyung Kim)

   - Apply all filters to an addr_location (Namhyung Kim)

   - Merge al->filtered with hist_entry->filtered in report/hists
     (Namhyung Kim)

   - Fix memory leak when synthesizing thread records (Namhyung Kim)

   - Use ui__has_annotation() in 'report' (Namhyung Kim)

   - hists browser refactorings to reuse code accross UIs (Namhyung Kim)

   - Add support for the new DWARF unwinder library in elfutils (Jiri
     Olsa)

   - Fix build race in the generation of bison files (Jiri Olsa)

   - Further streamline the feature detection display, trimming it a bit
     to show just the libraries detected, using VF=1 gets a more verbose
     output, showing the less interesting feature checks as well (Jiri
     Olsa).

   - Check compatible symtab type before loading dso (Namhyung Kim)

   - Check return value of filename__read_debuglink() (Stephane Eranian)

   - Move some hashing and fs related code from tools/perf/util/ to
     tools/lib/ so that it can be used by more tools/ living utilities
     (Borislav Petkov)

   - Prepare DWARF unwinding code for using an elfutils alternative
     unwinding library (Jiri Olsa)

   - Fix DWARF unwind max_stack processing (Jiri Olsa)

   - Add dwarf unwind 'perf test' entry (Jiri Olsa)

   - 'perf probe' improvements including memory leak fixes, sharing the
     intlist class with other tools, uprobes/kprobes code sharing and
     use of ref_reloc_sym (Masami Hiramatsu)

   - Shorten sample symbol resolving by adding cpumode to struct
     addr_location (Arnaldo Carvalho de Melo)

   - Fix synthesizing mmaps for threads (Don Zickus)

   - Fix invalid output on event group stdio report (Namhyung Kim)

   - Fixup header alignment in 'perf sched latency' output (Ramkumar
     Ramachandra)

   - Fix off-by-one error in 'perf timechart record' argv handling
     (Ramkumar Ramachandra)

  Tooling, cleanups:

   - Remove unused thread__find_map function (Jiri Olsa)

   - Remove unused simple_strtoul() function (Ramkumar Ramachandra)

  Tooling, documentation updates:

   - Update function names in debug messages (Ramkumar Ramachandra)

   - Update some code references in design.txt (Ramkumar Ramachandra)

   - Clarify load-latency information in the 'perf mem' docs (Andi
     Kleen)

   - Clarify x86 register naming in 'perf probe' docs (Andi Kleen)"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (96 commits)
  perf tools: Remove unused simple_strtoul() function
  perf tools: Update some code references in design.txt
  perf evsel: Update function names in debug messages
  perf tools: Remove thread__find_map function
  perf annotate: Print the evsel name in the stdio output
  perf report: Use ui__has_annotation()
  perf tools: Fix memory leak when synthesizing thread records
  perf tools: Use tid in mmap/mmap2 events to find maps
  perf report: Merge al->filtered with hist_entry->filtered
  perf symbols: Apply all filters to an addr_location
  perf symbols: Record the reason for filtering an address_location
  perf sched: Fixup header alignment in 'latency' output
  perf timechart: Fix off-by-one error in 'record' argv handling
  perf machine: Factor machine__find_thread to take tid argument
  perf tools: Speed up thread map generation
  perf kvm: introduce --list-cmds for use by scripts
  perf ui hists: Pass evsel to hpp->header/width functions explicitly
  perf symbols: Introduce thread__find_cpumode_addr_location
  perf session: Change header.misc dump from decimal to hex
  perf ui/tui: Reuse generic __hpp__fmt() code
  ...
This commit is contained in:
Linus Torvalds
2014-03-31 11:13:25 -07:00
102 changed files with 3493 additions and 1310 deletions

View File

@@ -892,7 +892,6 @@ static void x86_pmu_enable(struct pmu *pmu)
* hw_perf_group_sched_in() or x86_pmu_enable()
*
* step1: save events moving to new counters
* step2: reprogram moved events into new counters
*/
for (i = 0; i < n_running; i++) {
event = cpuc->event_list[i];
@@ -918,6 +917,9 @@ static void x86_pmu_enable(struct pmu *pmu)
x86_pmu_stop(event, PERF_EF_UPDATE);
}
/*
* step2: reprogram moved events into new counters
*/
for (i = 0; i < cpuc->n_events; i++) {
event = cpuc->event_list[i];
hwc = &event->hw;
@@ -1043,7 +1045,7 @@ static int x86_pmu_add(struct perf_event *event, int flags)
/*
* If group events scheduling transaction was started,
* skip the schedulability test here, it will be performed
* at commit time (->commit_txn) as a whole
* at commit time (->commit_txn) as a whole.
*/
if (cpuc->group_flag & PERF_EVENT_TXN)
goto done_collect;
@@ -1058,6 +1060,10 @@ static int x86_pmu_add(struct perf_event *event, int flags)
memcpy(cpuc->assign, assign, n*sizeof(int));
done_collect:
/*
* Commit the collect_events() state. See x86_pmu_del() and
* x86_pmu_*_txn().
*/
cpuc->n_events = n;
cpuc->n_added += n - n0;
cpuc->n_txn += n - n0;
@@ -1183,28 +1189,38 @@ static void x86_pmu_del(struct perf_event *event, int flags)
* If we're called during a txn, we don't need to do anything.
* The events never got scheduled and ->cancel_txn will truncate
* the event_list.
*
* XXX assumes any ->del() called during a TXN will only be on
* an event added during that same TXN.
*/
if (cpuc->group_flag & PERF_EVENT_TXN)
return;
/*
* Not a TXN, therefore cleanup properly.
*/
x86_pmu_stop(event, PERF_EF_UPDATE);
for (i = 0; i < cpuc->n_events; i++) {
if (event == cpuc->event_list[i]) {
if (i >= cpuc->n_events - cpuc->n_added)
--cpuc->n_added;
if (x86_pmu.put_event_constraints)
x86_pmu.put_event_constraints(cpuc, event);
while (++i < cpuc->n_events)
cpuc->event_list[i-1] = cpuc->event_list[i];
--cpuc->n_events;
if (event == cpuc->event_list[i])
break;
}
}
if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */
return;
/* If we have a newly added event; make sure to decrease n_added. */
if (i >= cpuc->n_events - cpuc->n_added)
--cpuc->n_added;
if (x86_pmu.put_event_constraints)
x86_pmu.put_event_constraints(cpuc, event);
/* Delete the array entry. */
while (++i < cpuc->n_events)
cpuc->event_list[i-1] = cpuc->event_list[i];
--cpuc->n_events;
perf_event_update_userpage(event);
}
@@ -1598,7 +1614,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
{
__this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN);
/*
* Truncate the collected events.
* Truncate collected array by the number of events added in this
* transaction. See x86_pmu_add() and x86_pmu_*_txn().
*/
__this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
__this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
@@ -1609,6 +1626,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
* Commit group events scheduling transaction
* Perform the group schedulability test as a whole
* Return 0 if success
*
* Does not cancel the transaction on failure; expects the caller to do this.
*/
static int x86_pmu_commit_txn(struct pmu *pmu)
{

View File

@@ -130,9 +130,11 @@ struct cpu_hw_events {
unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
int enabled;
int n_events;
int n_added;
int n_txn;
int n_events; /* the # of events in the below arrays */
int n_added; /* the # last events in the below arrays;
they've never been enabled yet */
int n_txn; /* the # last events in the below arrays;
added in the current transaction */
int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
u64 tags[X86_PMC_IDX_MAX];
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */

View File

@@ -66,6 +66,47 @@ DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4");
DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31");
DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63");
static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
static void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event);
static void uncore_pmu_event_read(struct perf_event *event);
static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
{
return container_of(event->pmu, struct intel_uncore_pmu, pmu);
}
static struct intel_uncore_box *
uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
{
struct intel_uncore_box *box;
box = *per_cpu_ptr(pmu->box, cpu);
if (box)
return box;
raw_spin_lock(&uncore_box_lock);
list_for_each_entry(box, &pmu->box_list, list) {
if (box->phys_id == topology_physical_package_id(cpu)) {
atomic_inc(&box->refcnt);
*per_cpu_ptr(pmu->box, cpu) = box;
break;
}
}
raw_spin_unlock(&uncore_box_lock);
return *per_cpu_ptr(pmu->box, cpu);
}
static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
{
/*
* perf core schedules event on the basis of cpu, uncore events are
* collected by one of the cpus inside a physical package.
*/
return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
}
static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
{
u64 count;
@@ -1639,6 +1680,349 @@ static struct intel_uncore_type *snb_msr_uncores[] = {
&snb_uncore_cbox,
NULL,
};
enum {
SNB_PCI_UNCORE_IMC,
};
static struct uncore_event_desc snb_uncore_imc_events[] = {
INTEL_UNCORE_EVENT_DESC(data_reads, "event=0x01"),
INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"),
INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"),
{ /* end: all zeroes */ },
};
#define SNB_UNCORE_PCI_IMC_EVENT_MASK 0xff
#define SNB_UNCORE_PCI_IMC_BAR_OFFSET 0x48
/* page size multiple covering all config regs */
#define SNB_UNCORE_PCI_IMC_MAP_SIZE 0x6000
#define SNB_UNCORE_PCI_IMC_DATA_READS 0x1
#define SNB_UNCORE_PCI_IMC_DATA_READS_BASE 0x5050
#define SNB_UNCORE_PCI_IMC_DATA_WRITES 0x2
#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054
#define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE
static struct attribute *snb_uncore_imc_formats_attr[] = {
&format_attr_event.attr,
NULL,
};
static struct attribute_group snb_uncore_imc_format_group = {
.name = "format",
.attrs = snb_uncore_imc_formats_attr,
};
static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
{
struct pci_dev *pdev = box->pci_dev;
int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET;
resource_size_t addr;
u32 pci_dword;
pci_read_config_dword(pdev, where, &pci_dword);
addr = pci_dword;
#ifdef CONFIG_PHYS_ADDR_T_64BIT
pci_read_config_dword(pdev, where + 4, &pci_dword);
addr |= ((resource_size_t)pci_dword << 32);
#endif
addr &= ~(PAGE_SIZE - 1);
box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE);
box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
}
static void snb_uncore_imc_enable_box(struct intel_uncore_box *box)
{}
static void snb_uncore_imc_disable_box(struct intel_uncore_box *box)
{}
static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event)
{}
static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event)
{}
static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
return (u64)*(unsigned int *)(box->io_addr + hwc->event_base);
}
/*
* custom event_init() function because we define our own fixed, free
* running counters, so we do not want to conflict with generic uncore
* logic. Also simplifies processing
*/
static int snb_uncore_imc_event_init(struct perf_event *event)
{
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
struct hw_perf_event *hwc = &event->hw;
u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK;
int idx, base;
if (event->attr.type != event->pmu->type)
return -ENOENT;
pmu = uncore_event_to_pmu(event);
/* no device found for this pmu */
if (pmu->func_id < 0)
return -ENOENT;
/* Sampling not supported yet */
if (hwc->sample_period)
return -EINVAL;
/* unsupported modes and filters */
if (event->attr.exclude_user ||
event->attr.exclude_kernel ||
event->attr.exclude_hv ||
event->attr.exclude_idle ||
event->attr.exclude_host ||
event->attr.exclude_guest ||
event->attr.sample_period) /* no sampling */
return -EINVAL;
/*
* Place all uncore events for a particular physical package
* onto a single cpu
*/
if (event->cpu < 0)
return -EINVAL;
/* check only supported bits are set */
if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK)
return -EINVAL;
box = uncore_pmu_to_box(pmu, event->cpu);
if (!box || box->cpu < 0)
return -EINVAL;
event->cpu = box->cpu;
event->hw.idx = -1;
event->hw.last_tag = ~0ULL;
event->hw.extra_reg.idx = EXTRA_REG_NONE;
event->hw.branch_reg.idx = EXTRA_REG_NONE;
/*
* check event is known (whitelist, determines counter)
*/
switch (cfg) {
case SNB_UNCORE_PCI_IMC_DATA_READS:
base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE;
idx = UNCORE_PMC_IDX_FIXED;
break;
case SNB_UNCORE_PCI_IMC_DATA_WRITES:
base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE;
idx = UNCORE_PMC_IDX_FIXED + 1;
break;
default:
return -EINVAL;
}
/* must be done before validate_group */
event->hw.event_base = base;
event->hw.config = cfg;
event->hw.idx = idx;
/* no group validation needed, we have free running counters */
return 0;
}
static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event)
{
return 0;
}
static void snb_uncore_imc_event_start(struct perf_event *event, int flags)
{
struct intel_uncore_box *box = uncore_event_to_box(event);
u64 count;
if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
return;
event->hw.state = 0;
box->n_active++;
list_add_tail(&event->active_entry, &box->active_list);
count = snb_uncore_imc_read_counter(box, event);
local64_set(&event->hw.prev_count, count);
if (box->n_active == 1)
uncore_pmu_start_hrtimer(box);
}
static void snb_uncore_imc_event_stop(struct perf_event *event, int flags)
{
struct intel_uncore_box *box = uncore_event_to_box(event);
struct hw_perf_event *hwc = &event->hw;
if (!(hwc->state & PERF_HES_STOPPED)) {
box->n_active--;
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
hwc->state |= PERF_HES_STOPPED;
list_del(&event->active_entry);
if (box->n_active == 0)
uncore_pmu_cancel_hrtimer(box);
}
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
/*
* Drain the remaining delta count out of a event
* that we are disabling:
*/
uncore_perf_event_update(box, event);
hwc->state |= PERF_HES_UPTODATE;
}
}
static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
{
struct intel_uncore_box *box = uncore_event_to_box(event);
struct hw_perf_event *hwc = &event->hw;
if (!box)
return -ENODEV;
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
if (!(flags & PERF_EF_START))
hwc->state |= PERF_HES_ARCH;
snb_uncore_imc_event_start(event, 0);
box->n_events++;
return 0;
}
static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
{
struct intel_uncore_box *box = uncore_event_to_box(event);
int i;
snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
for (i = 0; i < box->n_events; i++) {
if (event == box->event_list[i]) {
--box->n_events;
break;
}
}
}
static int snb_pci2phy_map_init(int devid)
{
struct pci_dev *dev = NULL;
int bus;
dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev);
if (!dev)
return -ENOTTY;
bus = dev->bus->number;
pcibus_to_physid[bus] = 0;
pci_dev_put(dev);
return 0;
}
static struct pmu snb_uncore_imc_pmu = {
.task_ctx_nr = perf_invalid_context,
.event_init = snb_uncore_imc_event_init,
.add = snb_uncore_imc_event_add,
.del = snb_uncore_imc_event_del,
.start = snb_uncore_imc_event_start,
.stop = snb_uncore_imc_event_stop,
.read = uncore_pmu_event_read,
};
static struct intel_uncore_ops snb_uncore_imc_ops = {
.init_box = snb_uncore_imc_init_box,
.enable_box = snb_uncore_imc_enable_box,
.disable_box = snb_uncore_imc_disable_box,
.disable_event = snb_uncore_imc_disable_event,
.enable_event = snb_uncore_imc_enable_event,
.hw_config = snb_uncore_imc_hw_config,
.read_counter = snb_uncore_imc_read_counter,
};
static struct intel_uncore_type snb_uncore_imc = {
.name = "imc",
.num_counters = 2,
.num_boxes = 1,
.fixed_ctr_bits = 32,
.fixed_ctr = SNB_UNCORE_PCI_IMC_CTR_BASE,
.event_descs = snb_uncore_imc_events,
.format_group = &snb_uncore_imc_format_group,
.perf_ctr = SNB_UNCORE_PCI_IMC_DATA_READS_BASE,
.event_mask = SNB_UNCORE_PCI_IMC_EVENT_MASK,
.ops = &snb_uncore_imc_ops,
.pmu = &snb_uncore_imc_pmu,
};
static struct intel_uncore_type *snb_pci_uncores[] = {
[SNB_PCI_UNCORE_IMC] = &snb_uncore_imc,
NULL,
};
static DEFINE_PCI_DEVICE_TABLE(snb_uncore_pci_ids) = {
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* end: all zeroes */ },
};
static DEFINE_PCI_DEVICE_TABLE(ivb_uncore_pci_ids) = {
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* end: all zeroes */ },
};
static DEFINE_PCI_DEVICE_TABLE(hsw_uncore_pci_ids) = {
{ /* IMC */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
{ /* end: all zeroes */ },
};
static struct pci_driver snb_uncore_pci_driver = {
.name = "snb_uncore",
.id_table = snb_uncore_pci_ids,
};
static struct pci_driver ivb_uncore_pci_driver = {
.name = "ivb_uncore",
.id_table = ivb_uncore_pci_ids,
};
static struct pci_driver hsw_uncore_pci_driver = {
.name = "hsw_uncore",
.id_table = hsw_uncore_pci_ids,
};
/* end of Sandy Bridge uncore support */
/* Nehalem uncore support */
@@ -2789,6 +3173,7 @@ again:
static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
{
struct intel_uncore_box *box;
struct perf_event *event;
unsigned long flags;
int bit;
@@ -2801,19 +3186,27 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
*/
local_irq_save(flags);
/*
* handle boxes with an active event list as opposed to active
* counters
*/
list_for_each_entry(event, &box->active_list, active_entry) {
uncore_perf_event_update(box, event);
}
for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
uncore_perf_event_update(box, box->events[bit]);
local_irq_restore(flags);
hrtimer_forward_now(hrtimer, ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL));
hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
return HRTIMER_RESTART;
}
static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
{
__hrtimer_start_range_ns(&box->hrtimer,
ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL), 0,
ns_to_ktime(box->hrtimer_duration), 0,
HRTIMER_MODE_REL_PINNED, 0);
}
@@ -2847,45 +3240,14 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
box->cpu = -1;
box->phys_id = -1;
/* set default hrtimer timeout */
box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
INIT_LIST_HEAD(&box->active_list);
return box;
}
static struct intel_uncore_box *
uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
{
struct intel_uncore_box *box;
box = *per_cpu_ptr(pmu->box, cpu);
if (box)
return box;
raw_spin_lock(&uncore_box_lock);
list_for_each_entry(box, &pmu->box_list, list) {
if (box->phys_id == topology_physical_package_id(cpu)) {
atomic_inc(&box->refcnt);
*per_cpu_ptr(pmu->box, cpu) = box;
break;
}
}
raw_spin_unlock(&uncore_box_lock);
return *per_cpu_ptr(pmu->box, cpu);
}
static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
{
return container_of(event->pmu, struct intel_uncore_pmu, pmu);
}
static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
{
/*
* perf core schedules event on the basis of cpu, uncore events are
* collected by one of the cpus inside a physical package.
*/
return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
}
static int
uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
{
@@ -3279,16 +3641,21 @@ static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
{
int ret;
pmu->pmu = (struct pmu) {
.attr_groups = pmu->type->attr_groups,
.task_ctx_nr = perf_invalid_context,
.event_init = uncore_pmu_event_init,
.add = uncore_pmu_event_add,
.del = uncore_pmu_event_del,
.start = uncore_pmu_event_start,
.stop = uncore_pmu_event_stop,
.read = uncore_pmu_event_read,
};
if (!pmu->type->pmu) {
pmu->pmu = (struct pmu) {
.attr_groups = pmu->type->attr_groups,
.task_ctx_nr = perf_invalid_context,
.event_init = uncore_pmu_event_init,
.add = uncore_pmu_event_add,
.del = uncore_pmu_event_del,
.start = uncore_pmu_event_start,
.stop = uncore_pmu_event_stop,
.read = uncore_pmu_event_read,
};
} else {
pmu->pmu = *pmu->type->pmu;
pmu->pmu.attr_groups = pmu->type->attr_groups;
}
if (pmu->type->num_boxes == 1) {
if (strlen(pmu->type->name) > 0)
@@ -3502,6 +3869,28 @@ static int __init uncore_pci_init(void)
pci_uncores = ivt_pci_uncores;
uncore_pci_driver = &ivt_uncore_pci_driver;
break;
case 42: /* Sandy Bridge */
ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_SNB_IMC);
if (ret)
return ret;
pci_uncores = snb_pci_uncores;
uncore_pci_driver = &snb_uncore_pci_driver;
break;
case 58: /* Ivy Bridge */
ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_IVB_IMC);
if (ret)
return ret;
pci_uncores = snb_pci_uncores;
uncore_pci_driver = &ivb_uncore_pci_driver;
break;
case 60: /* Haswell */
case 69: /* Haswell Celeron */
ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_HSW_IMC);
if (ret)
return ret;
pci_uncores = snb_pci_uncores;
uncore_pci_driver = &hsw_uncore_pci_driver;
break;
default:
return 0;
}
@@ -3773,7 +4162,7 @@ static void __init uncore_cpu_setup(void *dummy)
static int __init uncore_cpu_init(void)
{
int ret, cpu, max_cores;
int ret, max_cores;
max_cores = boot_cpu_data.x86_max_cores;
switch (boot_cpu_data.x86_model) {
@@ -3817,29 +4206,6 @@ static int __init uncore_cpu_init(void)
if (ret)
return ret;
get_online_cpus();
for_each_online_cpu(cpu) {
int i, phys_id = topology_physical_package_id(cpu);
for_each_cpu(i, &uncore_cpu_mask) {
if (phys_id == topology_physical_package_id(i)) {
phys_id = -1;
break;
}
}
if (phys_id < 0)
continue;
uncore_cpu_prepare(cpu, phys_id);
uncore_event_init_cpu(cpu);
}
on_each_cpu(uncore_cpu_setup, NULL, 1);
register_cpu_notifier(&uncore_cpu_nb);
put_online_cpus();
return 0;
}
@@ -3868,6 +4234,41 @@ static int __init uncore_pmus_register(void)
return 0;
}
static void __init uncore_cpumask_init(void)
{
int cpu;
/*
* ony invoke once from msr or pci init code
*/
if (!cpumask_empty(&uncore_cpu_mask))
return;
get_online_cpus();
for_each_online_cpu(cpu) {
int i, phys_id = topology_physical_package_id(cpu);
for_each_cpu(i, &uncore_cpu_mask) {
if (phys_id == topology_physical_package_id(i)) {
phys_id = -1;
break;
}
}
if (phys_id < 0)
continue;
uncore_cpu_prepare(cpu, phys_id);
uncore_event_init_cpu(cpu);
}
on_each_cpu(uncore_cpu_setup, NULL, 1);
register_cpu_notifier(&uncore_cpu_nb);
put_online_cpus();
}
static int __init intel_uncore_init(void)
{
int ret;
@@ -3886,6 +4287,7 @@ static int __init intel_uncore_init(void)
uncore_pci_exit();
goto fail;
}
uncore_cpumask_init();
uncore_pmus_register();
return 0;

View File

@@ -6,6 +6,7 @@
#define UNCORE_PMU_NAME_LEN 32
#define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC)
#define UNCORE_SNB_IMC_HRTIMER_INTERVAL (5ULL * NSEC_PER_SEC)
#define UNCORE_FIXED_EVENT 0xff
#define UNCORE_PMC_IDX_MAX_GENERIC 8
@@ -440,6 +441,7 @@ struct intel_uncore_type {
struct intel_uncore_ops *ops;
struct uncore_event_desc *event_descs;
const struct attribute_group *attr_groups[4];
struct pmu *pmu; /* for custom pmu ops */
};
#define pmu_group attr_groups[0]
@@ -488,8 +490,11 @@ struct intel_uncore_box {
u64 tags[UNCORE_PMC_IDX_MAX];
struct pci_dev *pci_dev;
struct intel_uncore_pmu *pmu;
u64 hrtimer_duration; /* hrtimer timeout for this box */
struct hrtimer hrtimer;
struct list_head list;
struct list_head active_list;
void *io_addr;
struct intel_uncore_extra_reg shared_regs[0];
};

View File

@@ -1257,7 +1257,24 @@ again:
pass++;
goto again;
}
/*
* Perf does test runs to see if a whole group can be assigned
* together succesfully. There can be multiple rounds of this.
* Unfortunately, p4_pmu_swap_config_ts touches the hwc->config
* bits, such that the next round of group assignments will
* cause the above p4_should_swap_ts to pass instead of fail.
* This leads to counters exclusive to thread0 being used by
* thread1.
*
* Solve this with a cheap hack, reset the idx back to -1 to
* force a new lookup (p4_next_cntr) to get the right counter
* for the right thread.
*
* This probably doesn't comply with the general spirit of how
* perf wants to work, but P4 is special. :-(
*/
if (p4_should_swap_ts(hwc->config, cpu))
hwc->idx = -1;
p4_pmu_swap_config_ts(hwc, cpu);
if (assign)
assign[i] = cntr_idx;
@@ -1322,6 +1339,7 @@ static __initconst const struct x86_pmu p4_pmu = {
__init int p4_pmu_init(void)
{
unsigned int low, high;
int i, reg;
/* If we get stripped -- indexing fails */
BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC);
@@ -1340,5 +1358,19 @@ __init int p4_pmu_init(void)
x86_pmu = p4_pmu;
/*
* Even though the counters are configured to interrupt a particular
* logical processor when an overflow happens, testing has shown that
* on kdump kernels (which uses a single cpu), thread1's counter
* continues to run and will report an NMI on thread0. Due to the
* overflow bug, this leads to a stream of unknown NMIs.
*
* Solve this by zero'ing out the registers to mimic a reset.
*/
for (i = 0; i < x86_pmu.num_counters; i++) {
reg = x86_pmu_config_addr(i);
wrmsrl_safe(reg, 0ULL);
}
return 0;
}

View File

@@ -87,6 +87,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
#define nmi_to_desc(type) (&nmi_desc[type])
static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC;
static int __init nmi_warning_debugfs(void)
{
debugfs_create_u64("nmi_longest_ns", 0644,
@@ -95,6 +96,20 @@ static int __init nmi_warning_debugfs(void)
}
fs_initcall(nmi_warning_debugfs);
static void nmi_max_handler(struct irq_work *w)
{
struct nmiaction *a = container_of(w, struct nmiaction, irq_work);
int remainder_ns, decimal_msecs;
u64 whole_msecs = ACCESS_ONCE(a->max_duration);
remainder_ns = do_div(whole_msecs, (1000 * 1000));
decimal_msecs = remainder_ns / 1000;
printk_ratelimited(KERN_INFO
"INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
a->handler, whole_msecs, decimal_msecs);
}
static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
{
struct nmi_desc *desc = nmi_to_desc(type);
@@ -110,26 +125,20 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2
* to handle those situations.
*/
list_for_each_entry_rcu(a, &desc->head, list) {
u64 before, delta, whole_msecs;
int remainder_ns, decimal_msecs, thishandled;
int thishandled;
u64 delta;
before = sched_clock();
delta = sched_clock();
thishandled = a->handler(type, regs);
handled += thishandled;
delta = sched_clock() - before;
delta = sched_clock() - delta;
trace_nmi_handler(a->handler, (int)delta, thishandled);
if (delta < nmi_longest_ns)
if (delta < nmi_longest_ns || delta < a->max_duration)
continue;
nmi_longest_ns = delta;
whole_msecs = delta;
remainder_ns = do_div(whole_msecs, (1000 * 1000));
decimal_msecs = remainder_ns / 1000;
printk_ratelimited(KERN_INFO
"INFO: NMI handler (%ps) took too long to run: "
"%lld.%03d msecs\n", a->handler, whole_msecs,
decimal_msecs);
a->max_duration = delta;
irq_work_queue(&a->irq_work);
}
rcu_read_unlock();
@@ -146,6 +155,8 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
if (!action->handler)
return -EINVAL;
init_irq_work(&action->irq_work, nmi_max_handler);
spin_lock_irqsave(&desc->lock, flags);
/*