Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (49 commits)
  perfcounter: Handle some IO return values
  perf_counter: Push perf_sample_data through the swcounter code
  perf_counter tools: Define and use our own u64, s64 etc. definitions
  perf_counter: Close race in perf_lock_task_context()
  perf_counter, x86: Improve interactions with fast-gup
  perf_counter: Simplify and fix task migration counting
  perf_counter tools: Add a data file header
  perf_counter: Update userspace callchain sampling uses
  perf_counter: Make callchain samples extensible
  perf report: Filter to parent set by default
  perf_counter tools: Handle lost events
  perf_counter: Add event overlow handling
  fs: Provide empty .set_page_dirty() aop for anon inodes
  perf_counter: tools: Makefile tweaks for 64-bit powerpc
  perf_counter: powerpc: Add processor back-end for MPC7450 family
  perf_counter: powerpc: Make powerpc perf_counter code safe for 32-bit kernels
  perf_counter: powerpc: Change how processor-specific back-ends get selected
  perf_counter: powerpc: Use unsigned long for register and constraint values
  perf_counter: powerpc: Enable use of software counters on 32-bit powerpc
  perf_counter tools: Add and use isprint()
  ...
This commit is contained in:
Linus Torvalds
2009-06-20 11:29:32 -07:00
40 changed files with 2327 additions and 898 deletions

View File

@@ -19,6 +19,7 @@
#include <linux/kdebug.h>
#include <linux/sched.h>
#include <linux/uaccess.h>
#include <linux/highmem.h>
#include <asm/apic.h>
#include <asm/stacktrace.h>
@@ -389,23 +390,23 @@ static u64 intel_pmu_raw_event(u64 event)
return event & CORE_EVNTSEL_MASK;
}
static const u64 amd_0f_hw_cache_event_ids
static const u64 amd_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(L1D) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
[ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */
[ C(RESULT_MISS) ] = 0,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
[ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
},
},
[ C(L1I ) ] = {
@@ -418,17 +419,17 @@ static const u64 amd_0f_hw_cache_event_ids
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
[ C(RESULT_MISS) ] = 0,
},
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
[ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
[ C(RESULT_MISS) ] = 0,
},
[ C(OP_PREFETCH) ] = {
@@ -438,8 +439,8 @@ static const u64 amd_0f_hw_cache_event_ids
},
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
[ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0,
@@ -1223,6 +1224,8 @@ again:
if (!intel_pmu_save_and_restart(counter))
continue;
data.period = counter->hw.last_period;
if (perf_counter_overflow(counter, 1, &data))
intel_pmu_disable_counter(&counter->hw, bit);
}
@@ -1459,18 +1462,16 @@ static int intel_pmu_init(void)
static int amd_pmu_init(void)
{
/* Performance-monitoring supported from K7 and later: */
if (boot_cpu_data.x86 < 6)
return -ENODEV;
x86_pmu = amd_pmu;
switch (boot_cpu_data.x86) {
case 0x0f:
case 0x10:
case 0x11:
memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
/* Events are common for all AMDs */
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
pr_cont("AMD Family 0f/10/11 events, ");
break;
}
return 0;
}
@@ -1554,9 +1555,9 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
*/
static inline
void callchain_store(struct perf_callchain_entry *entry, unsigned long ip)
void callchain_store(struct perf_callchain_entry *entry, u64 ip)
{
if (entry->nr < MAX_STACK_DEPTH)
if (entry->nr < PERF_MAX_STACK_DEPTH)
entry->ip[entry->nr++] = ip;
}
@@ -1577,8 +1578,8 @@ static void backtrace_warning(void *data, char *msg)
static int backtrace_stack(void *data, char *name)
{
/* Don't bother with IRQ stacks for now */
return -1;
/* Process all stacks: */
return 0;
}
static void backtrace_address(void *data, unsigned long addr, int reliable)
@@ -1596,47 +1597,59 @@ static const struct stacktrace_ops backtrace_ops = {
.address = backtrace_address,
};
#include "../dumpstack.h"
static void
perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
unsigned long bp;
char *stack;
int nr = entry->nr;
callchain_store(entry, PERF_CONTEXT_KERNEL);
callchain_store(entry, regs->ip);
callchain_store(entry, instruction_pointer(regs));
stack = ((char *)regs + sizeof(struct pt_regs));
#ifdef CONFIG_FRAME_POINTER
bp = frame_pointer(regs);
#else
bp = 0;
#endif
dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry);
entry->kernel = entry->nr - nr;
dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
}
/*
* best effort, GUP based copy_from_user() that assumes IRQ or NMI context
*/
static unsigned long
copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
{
unsigned long offset, addr = (unsigned long)from;
int type = in_nmi() ? KM_NMI : KM_IRQ0;
unsigned long size, len = 0;
struct page *page;
void *map;
int ret;
struct stack_frame {
const void __user *next_fp;
unsigned long return_address;
};
do {
ret = __get_user_pages_fast(addr, 1, 0, &page);
if (!ret)
break;
offset = addr & (PAGE_SIZE - 1);
size = min(PAGE_SIZE - offset, n - len);
map = kmap_atomic(page, type);
memcpy(to, map+offset, size);
kunmap_atomic(map, type);
put_page(page);
len += size;
to += size;
addr += size;
} while (len < n);
return len;
}
static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
{
int ret;
unsigned long bytes;
if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
return 0;
bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
ret = 1;
pagefault_disable();
if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
ret = 0;
pagefault_enable();
return ret;
return bytes == sizeof(*frame);
}
static void
@@ -1644,28 +1657,28 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
struct stack_frame frame;
const void __user *fp;
int nr = entry->nr;
regs = (struct pt_regs *)current->thread.sp0 - 1;
fp = (void __user *)regs->bp;
if (!user_mode(regs))
regs = task_pt_regs(current);
fp = (void __user *)regs->bp;
callchain_store(entry, PERF_CONTEXT_USER);
callchain_store(entry, regs->ip);
while (entry->nr < MAX_STACK_DEPTH) {
frame.next_fp = NULL;
while (entry->nr < PERF_MAX_STACK_DEPTH) {
frame.next_frame = NULL;
frame.return_address = 0;
if (!copy_stack_frame(fp, &frame))
break;
if ((unsigned long)fp < user_stack_pointer(regs))
if ((unsigned long)fp < regs->sp)
break;
callchain_store(entry, frame.return_address);
fp = frame.next_fp;
fp = frame.next_frame;
}
entry->user = entry->nr - nr;
}
static void
@@ -1701,9 +1714,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
entry = &__get_cpu_var(irq_entry);
entry->nr = 0;
entry->hv = 0;
entry->kernel = 0;
entry->user = 0;
perf_do_callchain(regs, entry);