Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (162 commits)
  tracing/kprobes: unregister_trace_probe needs to be called under mutex
  perf: expose event__process function
  perf events: Fix mmap offset determination
  perf, powerpc: fsl_emb: Restore setting perf_sample_data.period
  perf, powerpc: Convert the FSL driver to use local64_t
  perf tools: Don't keep unreferenced maps when unmaps are detected
  perf session: Invalidate last_match when removing threads from rb_tree
  perf session: Free the ref_reloc_sym memory at the right place
  x86,mmiotrace: Add support for tracing STOS instruction
  perf, sched migration: Librarize task states and event headers helpers
  perf, sched migration: Librarize the GUI class
  perf, sched migration: Make the GUI class client agnostic
  perf, sched migration: Make it vertically scrollable
  perf, sched migration: Parameterize cpu height and spacing
  perf, sched migration: Fix key bindings
  perf, sched migration: Ignore unhandled task states
  perf, sched migration: Handle ignored migrate out events
  perf: New migration tool overview
  tracing: Drop cpparg() macro
  perf: Use tracepoint_synchronize_unregister() to flush any pending tracepoint call
  ...

Fix up trivial conflicts in Makefile and drivers/cpufreq/cpufreq.c
This commit is contained in:
Linus Torvalds
2010-08-06 09:30:52 -07:00
179 changed files with 5610 additions and 4808 deletions

View File

@@ -76,8 +76,8 @@ obj-$(CONFIG_GCOV_KERNEL) += gcov/
obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KGDB) += debug/
obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o

View File

@@ -41,6 +41,7 @@
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/cpu.h>
#include <linux/smp.h>
@@ -62,6 +63,9 @@ static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
static int nr_slots[TYPE_MAX];
/* Keep track of the breakpoints attached to tasks */
static LIST_HEAD(bp_task_head);
static int constraints_initialized;
/* Gather the number of total pinned and un-pinned bp in a cpuset */
@@ -103,33 +107,21 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
return 0;
}
static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type)
/*
* Count the number of breakpoints of the same type and same task.
* The given event must be not on the list.
*/
static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type)
{
struct perf_event_context *ctx = tsk->perf_event_ctxp;
struct list_head *list;
struct perf_event *bp;
unsigned long flags;
struct perf_event_context *ctx = bp->ctx;
struct perf_event *iter;
int count = 0;
if (WARN_ONCE(!ctx, "No perf context for this task"))
return 0;
list = &ctx->event_list;
raw_spin_lock_irqsave(&ctx->lock, flags);
/*
* The current breakpoint counter is not included in the list
* at the open() callback time
*/
list_for_each_entry(bp, list, event_entry) {
if (bp->attr.type == PERF_TYPE_BREAKPOINT)
if (find_slot_idx(bp) == type)
count += hw_breakpoint_weight(bp);
list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
if (iter->ctx == ctx && find_slot_idx(iter) == type)
count += hw_breakpoint_weight(iter);
}
raw_spin_unlock_irqrestore(&ctx->lock, flags);
return count;
}
@@ -149,7 +141,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
if (!tsk)
slots->pinned += max_task_bp_pinned(cpu, type);
else
slots->pinned += task_bp_pinned(tsk, type);
slots->pinned += task_bp_pinned(bp, type);
slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
return;
@@ -162,7 +154,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
if (!tsk)
nr += max_task_bp_pinned(cpu, type);
else
nr += task_bp_pinned(tsk, type);
nr += task_bp_pinned(bp, type);
if (nr > slots->pinned)
slots->pinned = nr;
@@ -188,7 +180,7 @@ fetch_this_slot(struct bp_busy_slots *slots, int weight)
/*
* Add a pinned breakpoint for the given task in our constraint table
*/
static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
static void toggle_bp_task_slot(struct perf_event *bp, int cpu, bool enable,
enum bp_type_idx type, int weight)
{
unsigned int *tsk_pinned;
@@ -196,10 +188,11 @@ static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
int old_idx = 0;
int idx = 0;
old_count = task_bp_pinned(tsk, type);
old_count = task_bp_pinned(bp, type);
old_idx = old_count - 1;
idx = old_idx + weight;
/* tsk_pinned[n] is the number of tasks having n breakpoints */
tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
if (enable) {
tsk_pinned[idx]++;
@@ -222,23 +215,30 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
int cpu = bp->cpu;
struct task_struct *tsk = bp->ctx->task;
/* Pinned counter task profiling */
if (tsk) {
if (cpu >= 0) {
toggle_bp_task_slot(tsk, cpu, enable, type, weight);
return;
}
/* Pinned counter cpu profiling */
if (!tsk) {
for_each_online_cpu(cpu)
toggle_bp_task_slot(tsk, cpu, enable, type, weight);
if (enable)
per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
else
per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
return;
}
/* Pinned counter cpu profiling */
/* Pinned counter task profiling */
if (!enable)
list_del(&bp->hw.bp_list);
if (cpu >= 0) {
toggle_bp_task_slot(bp, cpu, enable, type, weight);
} else {
for_each_online_cpu(cpu)
toggle_bp_task_slot(bp, cpu, enable, type, weight);
}
if (enable)
per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
else
per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
list_add_tail(&bp->hw.bp_list, &bp_task_head);
}
/*
@@ -312,6 +312,10 @@ static int __reserve_bp_slot(struct perf_event *bp)
weight = hw_breakpoint_weight(bp);
fetch_bp_busy_slots(&slots, bp, type);
/*
* Simulate the addition of this breakpoint to the constraints
* and see the result.
*/
fetch_this_slot(&slots, weight);
/* Flexible counters need to keep at least one slot */

File diff suppressed because it is too large Load Diff

View File

@@ -3726,7 +3726,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
* off of preempt_enable. Kernel preemptions off return from interrupt
* occur there and call schedule directly.
*/
asmlinkage void __sched preempt_schedule(void)
asmlinkage void __sched notrace preempt_schedule(void)
{
struct thread_info *ti = current_thread_info();
@@ -3738,9 +3738,9 @@ asmlinkage void __sched preempt_schedule(void)
return;
do {
add_preempt_count(PREEMPT_ACTIVE);
add_preempt_count_notrace(PREEMPT_ACTIVE);
schedule();
sub_preempt_count(PREEMPT_ACTIVE);
sub_preempt_count_notrace(PREEMPT_ACTIVE);
/*
* Check again in case we missed a preemption opportunity

View File

@@ -1,293 +0,0 @@
/*
* Detect Soft Lockups
*
* started by Ingo Molnar, Copyright (C) 2005, 2006 Red Hat, Inc.
*
* this code detects soft lockups: incidents in where on a CPU
* the kernel does not reschedule for 10 seconds or more.
*/
#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/nmi.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/lockdep.h>
#include <linux/notifier.h>
#include <linux/module.h>
#include <linux/sysctl.h>
#include <asm/irq_regs.h>
static DEFINE_SPINLOCK(print_lock);
static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */
static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */
static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
static DEFINE_PER_CPU(bool, softlock_touch_sync);
static int __read_mostly did_panic;
int __read_mostly softlockup_thresh = 60;
/*
* Should we panic (and reboot, if panic_timeout= is set) when a
* soft-lockup occurs:
*/
unsigned int __read_mostly softlockup_panic =
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
static int __init softlockup_panic_setup(char *str)
{
softlockup_panic = simple_strtoul(str, NULL, 0);
return 1;
}
__setup("softlockup_panic=", softlockup_panic_setup);
static int
softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
{
did_panic = 1;
return NOTIFY_DONE;
}
static struct notifier_block panic_block = {
.notifier_call = softlock_panic,
};
/*
* Returns seconds, approximately. We don't need nanosecond
* resolution, and we don't need to waste time with a big divide when
* 2^30ns == 1.074s.
*/
static unsigned long get_timestamp(int this_cpu)
{
return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */
}
static void __touch_softlockup_watchdog(void)
{
int this_cpu = raw_smp_processor_id();
__raw_get_cpu_var(softlockup_touch_ts) = get_timestamp(this_cpu);
}
void touch_softlockup_watchdog(void)
{
__raw_get_cpu_var(softlockup_touch_ts) = 0;
}
EXPORT_SYMBOL(touch_softlockup_watchdog);
void touch_softlockup_watchdog_sync(void)
{
__raw_get_cpu_var(softlock_touch_sync) = true;
__raw_get_cpu_var(softlockup_touch_ts) = 0;
}
void touch_all_softlockup_watchdogs(void)
{
int cpu;
/* Cause each CPU to re-update its timestamp rather than complain */
for_each_online_cpu(cpu)
per_cpu(softlockup_touch_ts, cpu) = 0;
}
EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
touch_all_softlockup_watchdogs();
return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
}
/*
* This callback runs from the timer interrupt, and checks
* whether the watchdog thread has hung or not:
*/
void softlockup_tick(void)
{
int this_cpu = smp_processor_id();
unsigned long touch_ts = per_cpu(softlockup_touch_ts, this_cpu);
unsigned long print_ts;
struct pt_regs *regs = get_irq_regs();
unsigned long now;
/* Is detection switched off? */
if (!per_cpu(softlockup_watchdog, this_cpu) || softlockup_thresh <= 0) {
/* Be sure we don't false trigger if switched back on */
if (touch_ts)
per_cpu(softlockup_touch_ts, this_cpu) = 0;
return;
}
if (touch_ts == 0) {
if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) {
/*
* If the time stamp was touched atomically
* make sure the scheduler tick is up to date.
*/
per_cpu(softlock_touch_sync, this_cpu) = false;
sched_clock_tick();
}
__touch_softlockup_watchdog();
return;
}
print_ts = per_cpu(softlockup_print_ts, this_cpu);
/* report at most once a second */
if (print_ts == touch_ts || did_panic)
return;
/* do not print during early bootup: */
if (unlikely(system_state != SYSTEM_RUNNING)) {
__touch_softlockup_watchdog();
return;
}
now = get_timestamp(this_cpu);
/*
* Wake up the high-prio watchdog task twice per
* threshold timespan.
*/
if (time_after(now - softlockup_thresh/2, touch_ts))
wake_up_process(per_cpu(softlockup_watchdog, this_cpu));
/* Warn about unreasonable delays: */
if (time_before_eq(now - softlockup_thresh, touch_ts))
return;
per_cpu(softlockup_print_ts, this_cpu) = touch_ts;
spin_lock(&print_lock);
printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
this_cpu, now - touch_ts,
current->comm, task_pid_nr(current));
print_modules();
print_irqtrace_events(current);
if (regs)
show_regs(regs);
else
dump_stack();
spin_unlock(&print_lock);
if (softlockup_panic)
panic("softlockup: hung tasks");
}
/*
* The watchdog thread - runs every second and touches the timestamp.
*/
static int watchdog(void *__bind_cpu)
{
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
sched_setscheduler(current, SCHED_FIFO, &param);
/* initialize timestamp */
__touch_softlockup_watchdog();
set_current_state(TASK_INTERRUPTIBLE);
/*
* Run briefly once per second to reset the softlockup timestamp.
* If this gets delayed for more than 60 seconds then the
* debug-printout triggers in softlockup_tick().
*/
while (!kthread_should_stop()) {
__touch_softlockup_watchdog();
schedule();
if (kthread_should_stop())
break;
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
return 0;
}
/*
* Create/destroy watchdog threads as CPUs come and go:
*/
static int __cpuinit
cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
int hotcpu = (unsigned long)hcpu;
struct task_struct *p;
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
BUG_ON(per_cpu(softlockup_watchdog, hotcpu));
p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu);
if (IS_ERR(p)) {
printk(KERN_ERR "watchdog for %i failed\n", hotcpu);
return NOTIFY_BAD;
}
per_cpu(softlockup_touch_ts, hotcpu) = 0;
per_cpu(softlockup_watchdog, hotcpu) = p;
kthread_bind(p, hotcpu);
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
wake_up_process(per_cpu(softlockup_watchdog, hotcpu));
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
if (!per_cpu(softlockup_watchdog, hotcpu))
break;
/* Unbind so it can run. Fall thru. */
kthread_bind(per_cpu(softlockup_watchdog, hotcpu),
cpumask_any(cpu_online_mask));
case CPU_DEAD:
case CPU_DEAD_FROZEN:
p = per_cpu(softlockup_watchdog, hotcpu);
per_cpu(softlockup_watchdog, hotcpu) = NULL;
kthread_stop(p);
break;
#endif /* CONFIG_HOTPLUG_CPU */
}
return NOTIFY_OK;
}
static struct notifier_block __cpuinitdata cpu_nfb = {
.notifier_call = cpu_callback
};
static int __initdata nosoftlockup;
static int __init nosoftlockup_setup(char *str)
{
nosoftlockup = 1;
return 1;
}
__setup("nosoftlockup", nosoftlockup_setup);
static int __init spawn_softlockup_task(void)
{
void *cpu = (void *)(long)smp_processor_id();
int err;
if (nosoftlockup)
return 0;
err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
if (err == NOTIFY_BAD) {
BUG();
return 1;
}
cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
register_cpu_notifier(&cpu_nfb);
atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
return 0;
}
early_initcall(spawn_softlockup_task);

View File

@@ -76,6 +76,10 @@
#include <scsi/sg.h>
#endif
#ifdef CONFIG_LOCKUP_DETECTOR
#include <linux/nmi.h>
#endif
#if defined(CONFIG_SYSCTL)
@@ -106,7 +110,7 @@ extern int blk_iopoll_enabled;
#endif
/* Constants used for minimum and maximum */
#ifdef CONFIG_DETECT_SOFTLOCKUP
#ifdef CONFIG_LOCKUP_DETECTOR
static int sixty = 60;
static int neg_one = -1;
#endif
@@ -710,7 +714,34 @@ static struct ctl_table kern_table[] = {
.mode = 0444,
.proc_handler = proc_dointvec,
},
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
#if defined(CONFIG_LOCKUP_DETECTOR)
{
.procname = "watchdog",
.data = &watchdog_enabled,
.maxlen = sizeof (int),
.mode = 0644,
.proc_handler = proc_dowatchdog_enabled,
},
{
.procname = "watchdog_thresh",
.data = &softlockup_thresh,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dowatchdog_thresh,
.extra1 = &neg_one,
.extra2 = &sixty,
},
{
.procname = "softlockup_panic",
.data = &softlockup_panic,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
#endif
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
{
.procname = "unknown_nmi_panic",
.data = &unknown_nmi_panic,
@@ -813,26 +844,6 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
#endif
#ifdef CONFIG_DETECT_SOFTLOCKUP
{
.procname = "softlockup_panic",
.data = &softlockup_panic,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
{
.procname = "softlockup_thresh",
.data = &softlockup_thresh,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dosoftlockup_thresh,
.extra1 = &neg_one,
.extra2 = &sixty,
},
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
{
.procname = "hung_task_panic",

View File

@@ -1302,7 +1302,6 @@ void run_local_timers(void)
{
hrtimer_run_queues();
raise_softirq(TIMER_SOFTIRQ);
softlockup_tick();
}
/*

View File

@@ -194,15 +194,6 @@ config PREEMPT_TRACER
enabled. This option and the irqs-off timing option can be
used together or separately.)
config SYSPROF_TRACER
bool "Sysprof Tracer"
depends on X86
select GENERIC_TRACER
select CONTEXT_SWITCH_TRACER
help
This tracer provides the trace needed by the 'Sysprof' userspace
tool.
config SCHED_TRACER
bool "Scheduling Latency Tracer"
select GENERIC_TRACER
@@ -229,23 +220,6 @@ config FTRACE_SYSCALLS
help
Basic tracer to catch the syscall entry and exit events.
config BOOT_TRACER
bool "Trace boot initcalls"
select GENERIC_TRACER
select CONTEXT_SWITCH_TRACER
help
This tracer helps developers to optimize boot times: it records
the timings of the initcalls and traces key events and the identity
of tasks that can cause boot delays, such as context-switches.
Its aim is to be parsed by the scripts/bootgraph.pl tool to
produce pretty graphics about boot inefficiencies, giving a visual
representation of the delays during initcalls - but the raw
/debug/tracing/trace text output is readable too.
You must pass in initcall_debug and ftrace=initcall to the kernel
command line to enable this on bootup.
config TRACE_BRANCH_PROFILING
bool
select GENERIC_TRACER
@@ -325,28 +299,6 @@ config BRANCH_TRACER
Say N if unsure.
config KSYM_TRACER
bool "Trace read and write access on kernel memory locations"
depends on HAVE_HW_BREAKPOINT
select TRACING
help
This tracer helps find read and write operations on any given kernel
symbol i.e. /proc/kallsyms.
config PROFILE_KSYM_TRACER
bool "Profile all kernel memory accesses on 'watched' variables"
depends on KSYM_TRACER
help
This tracer profiles kernel accesses on variables watched through the
ksym tracer ftrace plugin. Depending upon the hardware, all read
and write operations on kernel variables can be monitored for
accesses.
The results will be displayed in:
/debugfs/tracing/profile_ksym
Say N if unsure.
config STACK_TRACER
bool "Trace max stack"
depends on HAVE_FUNCTION_TRACER
@@ -371,26 +323,6 @@ config STACK_TRACER
Say N if unsure.
config KMEMTRACE
bool "Trace SLAB allocations"
select GENERIC_TRACER
help
kmemtrace provides tracing for slab allocator functions, such as
kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
data is then fed to the userspace application in order to analyse
allocation hotspots, internal fragmentation and so on, making it
possible to see how well an allocator performs, as well as debug
and profile kernel code.
This requires an userspace application to use. See
Documentation/trace/kmemtrace.txt for more information.
Saying Y will make the kernel somewhat larger and slower. However,
if you disable kmemtrace at run-time or boot-time, the performance
impact is minimal (depending on the arch the kernel is built for).
If unsure, say N.
config WORKQUEUE_TRACER
bool "Trace workqueues"
select GENERIC_TRACER

View File

@@ -30,7 +30,6 @@ obj-$(CONFIG_TRACING) += trace_output.o
obj-$(CONFIG_TRACING) += trace_stat.o
obj-$(CONFIG_TRACING) += trace_printk.o
obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
@@ -38,10 +37,8 @@ obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
obj-$(CONFIG_NOP_TRACER) += trace_nop.o
obj-$(CONFIG_STACK_TRACER) += trace_stack.o
obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
ifeq ($(CONFIG_BLOCK),y)
@@ -55,7 +52,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
endif
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
obj-$(CONFIG_EVENT_TRACING) += power-traces.o
ifeq ($(CONFIG_TRACING),y)
obj-$(CONFIG_KGDB_KDB) += trace_kdb.o

View File

@@ -1883,7 +1883,6 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
struct hlist_head *hhd;
struct hlist_node *n;
unsigned long key;
int resched;
key = hash_long(ip, FTRACE_HASH_BITS);
@@ -1897,12 +1896,12 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
* period. This syncs the hash iteration and freeing of items
* on the hash. rcu_read_lock is too dangerous here.
*/
resched = ftrace_preempt_disable();
preempt_disable_notrace();
hlist_for_each_entry_rcu(entry, n, hhd, node) {
if (entry->ip == ip)
entry->ops->func(ip, parent_ip, &entry->data);
}
ftrace_preempt_enable(resched);
preempt_enable_notrace();
}
static struct ftrace_ops trace_probe_ops __read_mostly =

View File

@@ -1,529 +0,0 @@
/*
* Memory allocator tracing
*
* Copyright (C) 2008 Eduard - Gabriel Munteanu
* Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
* Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
*/
#include <linux/tracepoint.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/dcache.h>
#include <linux/fs.h>
#include <linux/kmemtrace.h>
#include "trace_output.h"
#include "trace.h"
/* Select an alternative, minimalistic output than the original one */
#define TRACE_KMEM_OPT_MINIMAL 0x1
static struct tracer_opt kmem_opts[] = {
/* Default disable the minimalistic output */
{ TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
{ }
};
static struct tracer_flags kmem_tracer_flags = {
.val = 0,
.opts = kmem_opts
};
static struct trace_array *kmemtrace_array;
/* Trace allocations */
static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
unsigned long call_site,
const void *ptr,
size_t bytes_req,
size_t bytes_alloc,
gfp_t gfp_flags,
int node)
{
struct ftrace_event_call *call = &event_kmem_alloc;
struct trace_array *tr = kmemtrace_array;
struct kmemtrace_alloc_entry *entry;
struct ring_buffer_event *event;
event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
if (!event)
return;
entry = ring_buffer_event_data(event);
tracing_generic_entry_update(&entry->ent, 0, 0);
entry->ent.type = TRACE_KMEM_ALLOC;
entry->type_id = type_id;
entry->call_site = call_site;
entry->ptr = ptr;
entry->bytes_req = bytes_req;
entry->bytes_alloc = bytes_alloc;
entry->gfp_flags = gfp_flags;
entry->node = node;
if (!filter_check_discard(call, entry, tr->buffer, event))
ring_buffer_unlock_commit(tr->buffer, event);
trace_wake_up();
}
static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
unsigned long call_site,
const void *ptr)
{
struct ftrace_event_call *call = &event_kmem_free;
struct trace_array *tr = kmemtrace_array;
struct kmemtrace_free_entry *entry;
struct ring_buffer_event *event;
event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
if (!event)
return;
entry = ring_buffer_event_data(event);
tracing_generic_entry_update(&entry->ent, 0, 0);
entry->ent.type = TRACE_KMEM_FREE;
entry->type_id = type_id;
entry->call_site = call_site;
entry->ptr = ptr;
if (!filter_check_discard(call, entry, tr->buffer, event))
ring_buffer_unlock_commit(tr->buffer, event);
trace_wake_up();
}
static void kmemtrace_kmalloc(void *ignore,
unsigned long call_site,
const void *ptr,
size_t bytes_req,
size_t bytes_alloc,
gfp_t gfp_flags)
{
kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
bytes_req, bytes_alloc, gfp_flags, -1);
}
static void kmemtrace_kmem_cache_alloc(void *ignore,
unsigned long call_site,
const void *ptr,
size_t bytes_req,
size_t bytes_alloc,
gfp_t gfp_flags)
{
kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
bytes_req, bytes_alloc, gfp_flags, -1);
}
static void kmemtrace_kmalloc_node(void *ignore,
unsigned long call_site,
const void *ptr,
size_t bytes_req,
size_t bytes_alloc,
gfp_t gfp_flags,
int node)
{
kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
bytes_req, bytes_alloc, gfp_flags, node);
}
static void kmemtrace_kmem_cache_alloc_node(void *ignore,
unsigned long call_site,
const void *ptr,
size_t bytes_req,
size_t bytes_alloc,
gfp_t gfp_flags,
int node)
{
kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
bytes_req, bytes_alloc, gfp_flags, node);
}
static void
kmemtrace_kfree(void *ignore, unsigned long call_site, const void *ptr)
{
kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr);
}
static void kmemtrace_kmem_cache_free(void *ignore,
unsigned long call_site, const void *ptr)
{
kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr);
}
static int kmemtrace_start_probes(void)
{
int err;
err = register_trace_kmalloc(kmemtrace_kmalloc, NULL);
if (err)
return err;
err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
if (err)
return err;
err = register_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
if (err)
return err;
err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
if (err)
return err;
err = register_trace_kfree(kmemtrace_kfree, NULL);
if (err)
return err;
err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
return err;
}
static void kmemtrace_stop_probes(void)
{
unregister_trace_kmalloc(kmemtrace_kmalloc, NULL);
unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
unregister_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
unregister_trace_kfree(kmemtrace_kfree, NULL);
unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
}
static int kmem_trace_init(struct trace_array *tr)
{
kmemtrace_array = tr;
tracing_reset_online_cpus(tr);
kmemtrace_start_probes();
return 0;
}
static void kmem_trace_reset(struct trace_array *tr)
{
kmemtrace_stop_probes();
}
static void kmemtrace_headers(struct seq_file *s)
{
/* Don't need headers for the original kmemtrace output */
if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
return;
seq_printf(s, "#\n");
seq_printf(s, "# ALLOC TYPE REQ GIVEN FLAGS "
" POINTER NODE CALLER\n");
seq_printf(s, "# FREE | | | | "
" | | | |\n");
seq_printf(s, "# |\n\n");
}
/*
* The following functions give the original output from kmemtrace,
* plus the origin CPU, since reordering occurs in-kernel now.
*/
#define KMEMTRACE_USER_ALLOC 0
#define KMEMTRACE_USER_FREE 1
struct kmemtrace_user_event {
u8 event_id;
u8 type_id;
u16 event_size;
u32 cpu;
u64 timestamp;
unsigned long call_site;
unsigned long ptr;
};
struct kmemtrace_user_event_alloc {
size_t bytes_req;
size_t bytes_alloc;
unsigned gfp_flags;
int node;
};
static enum print_line_t
kmemtrace_print_alloc(struct trace_iterator *iter, int flags,
struct trace_event *event)
{
struct trace_seq *s = &iter->seq;
struct kmemtrace_alloc_entry *entry;
int ret;
trace_assign_type(entry, iter->ent);
ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu "
"bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
entry->type_id, (void *)entry->call_site, (unsigned long)entry->ptr,
(unsigned long)entry->bytes_req, (unsigned long)entry->bytes_alloc,
(unsigned long)entry->gfp_flags, entry->node);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
return TRACE_TYPE_HANDLED;
}
static enum print_line_t
kmemtrace_print_free(struct trace_iterator *iter, int flags,
struct trace_event *event)
{
struct trace_seq *s = &iter->seq;
struct kmemtrace_free_entry *entry;
int ret;
trace_assign_type(entry, iter->ent);
ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu\n",
entry->type_id, (void *)entry->call_site,
(unsigned long)entry->ptr);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
return TRACE_TYPE_HANDLED;
}
static enum print_line_t
kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags,
struct trace_event *event)
{
struct trace_seq *s = &iter->seq;
struct kmemtrace_alloc_entry *entry;
struct kmemtrace_user_event *ev;
struct kmemtrace_user_event_alloc *ev_alloc;
trace_assign_type(entry, iter->ent);
ev = trace_seq_reserve(s, sizeof(*ev));
if (!ev)
return TRACE_TYPE_PARTIAL_LINE;
ev->event_id = KMEMTRACE_USER_ALLOC;
ev->type_id = entry->type_id;
ev->event_size = sizeof(*ev) + sizeof(*ev_alloc);
ev->cpu = iter->cpu;
ev->timestamp = iter->ts;
ev->call_site = entry->call_site;
ev->ptr = (unsigned long)entry->ptr;
ev_alloc = trace_seq_reserve(s, sizeof(*ev_alloc));
if (!ev_alloc)
return TRACE_TYPE_PARTIAL_LINE;
ev_alloc->bytes_req = entry->bytes_req;
ev_alloc->bytes_alloc = entry->bytes_alloc;
ev_alloc->gfp_flags = entry->gfp_flags;
ev_alloc->node = entry->node;
return TRACE_TYPE_HANDLED;
}
static enum print_line_t
kmemtrace_print_free_user(struct trace_iterator *iter, int flags,
struct trace_event *event)
{
struct trace_seq *s = &iter->seq;
struct kmemtrace_free_entry *entry;
struct kmemtrace_user_event *ev;
trace_assign_type(entry, iter->ent);
ev = trace_seq_reserve(s, sizeof(*ev));
if (!ev)
return TRACE_TYPE_PARTIAL_LINE;
ev->event_id = KMEMTRACE_USER_FREE;
ev->type_id = entry->type_id;
ev->event_size = sizeof(*ev);
ev->cpu = iter->cpu;
ev->timestamp = iter->ts;
ev->call_site = entry->call_site;
ev->ptr = (unsigned long)entry->ptr;
return TRACE_TYPE_HANDLED;
}
/* The two other following provide a more minimalistic output */
static enum print_line_t
kmemtrace_print_alloc_compress(struct trace_iterator *iter)
{
struct kmemtrace_alloc_entry *entry;
struct trace_seq *s = &iter->seq;
int ret;
trace_assign_type(entry, iter->ent);
/* Alloc entry */
ret = trace_seq_printf(s, " + ");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
/* Type */
switch (entry->type_id) {
case KMEMTRACE_TYPE_KMALLOC:
ret = trace_seq_printf(s, "K ");
break;
case KMEMTRACE_TYPE_CACHE:
ret = trace_seq_printf(s, "C ");
break;
case KMEMTRACE_TYPE_PAGES:
ret = trace_seq_printf(s, "P ");
break;
default:
ret = trace_seq_printf(s, "? ");
}
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
/* Requested */
ret = trace_seq_printf(s, "%4zu ", entry->bytes_req);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
/* Allocated */
ret = trace_seq_printf(s, "%4zu ", entry->bytes_alloc);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
/* Flags
* TODO: would be better to see the name of the GFP flag names
*/
ret = trace_seq_printf(s, "%08x ", entry->gfp_flags);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
/* Pointer to allocated */
ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
/* Node and call site*/
ret = trace_seq_printf(s, "%4d %pf\n", entry->node,
(void *)entry->call_site);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
return TRACE_TYPE_HANDLED;
}
static enum print_line_t
kmemtrace_print_free_compress(struct trace_iterator *iter)
{
struct kmemtrace_free_entry *entry;
struct trace_seq *s = &iter->seq;
int ret;
trace_assign_type(entry, iter->ent);
/* Free entry */
ret = trace_seq_printf(s, " - ");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
/* Type */
switch (entry->type_id) {
case KMEMTRACE_TYPE_KMALLOC:
ret = trace_seq_printf(s, "K ");
break;
case KMEMTRACE_TYPE_CACHE:
ret = trace_seq_printf(s, "C ");
break;
case KMEMTRACE_TYPE_PAGES:
ret = trace_seq_printf(s, "P ");
break;
default:
ret = trace_seq_printf(s, "? ");
}
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
/* Skip requested/allocated/flags */
ret = trace_seq_printf(s, " ");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
/* Pointer to allocated */
ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
/* Skip node and print call site*/
ret = trace_seq_printf(s, " %pf\n", (void *)entry->call_site);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
return TRACE_TYPE_HANDLED;
}
static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
{
struct trace_entry *entry = iter->ent;
if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
return TRACE_TYPE_UNHANDLED;
switch (entry->type) {
case TRACE_KMEM_ALLOC:
return kmemtrace_print_alloc_compress(iter);
case TRACE_KMEM_FREE:
return kmemtrace_print_free_compress(iter);
default:
return TRACE_TYPE_UNHANDLED;
}
}
static struct trace_event_functions kmem_trace_alloc_funcs = {
.trace = kmemtrace_print_alloc,
.binary = kmemtrace_print_alloc_user,
};
static struct trace_event kmem_trace_alloc = {
.type = TRACE_KMEM_ALLOC,
.funcs = &kmem_trace_alloc_funcs,
};
static struct trace_event_functions kmem_trace_free_funcs = {
.trace = kmemtrace_print_free,
.binary = kmemtrace_print_free_user,
};
static struct trace_event kmem_trace_free = {
.type = TRACE_KMEM_FREE,
.funcs = &kmem_trace_free_funcs,
};
static struct tracer kmem_tracer __read_mostly = {
.name = "kmemtrace",
.init = kmem_trace_init,
.reset = kmem_trace_reset,
.print_line = kmemtrace_print_line,
.print_header = kmemtrace_headers,
.flags = &kmem_tracer_flags
};
void kmemtrace_init(void)
{
/* earliest opportunity to start kmem tracing */
}
static int __init init_kmem_tracer(void)
{
if (!register_ftrace_event(&kmem_trace_alloc)) {
pr_warning("Warning: could not register kmem events\n");
return 1;
}
if (!register_ftrace_event(&kmem_trace_free)) {
pr_warning("Warning: could not register kmem events\n");
return 1;
}
if (register_tracer(&kmem_tracer) != 0) {
pr_warning("Warning: could not register the kmem tracer\n");
return 1;
}
return 0;
}
device_initcall(init_kmem_tracer);

View File

@@ -443,6 +443,7 @@ int ring_buffer_print_page_header(struct trace_seq *s)
*/
struct ring_buffer_per_cpu {
int cpu;
atomic_t record_disabled;
struct ring_buffer *buffer;
spinlock_t reader_lock; /* serialize readers */
arch_spinlock_t lock;
@@ -462,7 +463,6 @@ struct ring_buffer_per_cpu {
unsigned long read;
u64 write_stamp;
u64 read_stamp;
atomic_t record_disabled;
};
struct ring_buffer {
@@ -2242,8 +2242,6 @@ static void trace_recursive_unlock(void)
#endif
static DEFINE_PER_CPU(int, rb_need_resched);
/**
* ring_buffer_lock_reserve - reserve a part of the buffer
* @buffer: the ring buffer to reserve from
@@ -2264,13 +2262,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
{
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_event *event;
int cpu, resched;
int cpu;
if (ring_buffer_flags != RB_BUFFERS_ON)
return NULL;
/* If we are tracing schedule, we don't want to recurse */
resched = ftrace_preempt_disable();
preempt_disable_notrace();
if (atomic_read(&buffer->record_disabled))
goto out_nocheck;
@@ -2295,21 +2293,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
if (!event)
goto out;
/*
* Need to store resched state on this cpu.
* Only the first needs to.
*/
if (preempt_count() == 1)
per_cpu(rb_need_resched, cpu) = resched;
return event;
out:
trace_recursive_unlock();
out_nocheck:
ftrace_preempt_enable(resched);
preempt_enable_notrace();
return NULL;
}
EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
@@ -2355,13 +2345,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
trace_recursive_unlock();
/*
* Only the last preempt count needs to restore preemption.
*/
if (preempt_count() == 1)
ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
else
preempt_enable_no_resched_notrace();
preempt_enable_notrace();
return 0;
}
@@ -2469,13 +2453,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
trace_recursive_unlock();
/*
* Only the last preempt count needs to restore preemption.
*/
if (preempt_count() == 1)
ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
else
preempt_enable_no_resched_notrace();
preempt_enable_notrace();
}
EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
@@ -2501,12 +2479,12 @@ int ring_buffer_write(struct ring_buffer *buffer,
struct ring_buffer_event *event;
void *body;
int ret = -EBUSY;
int cpu, resched;
int cpu;
if (ring_buffer_flags != RB_BUFFERS_ON)
return -EBUSY;
resched = ftrace_preempt_disable();
preempt_disable_notrace();
if (atomic_read(&buffer->record_disabled))
goto out;
@@ -2536,7 +2514,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
ret = 0;
out:
ftrace_preempt_enable(resched);
preempt_enable_notrace();
return ret;
}

View File

@@ -341,7 +341,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
/* trace_flags holds trace_options default values */
unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
TRACE_ITER_GRAPH_TIME;
TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD;
static int trace_stop_count;
static DEFINE_SPINLOCK(tracing_start_lock);
@@ -425,6 +425,7 @@ static const char *trace_options[] = {
"latency-format",
"sleep-time",
"graph-time",
"record-cmd",
NULL
};
@@ -656,6 +657,10 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
return;
WARN_ON_ONCE(!irqs_disabled());
if (!current_trace->use_max_tr) {
WARN_ON_ONCE(1);
return;
}
arch_spin_lock(&ftrace_max_lock);
tr->buffer = max_tr.buffer;
@@ -682,6 +687,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
return;
WARN_ON_ONCE(!irqs_disabled());
if (!current_trace->use_max_tr) {
WARN_ON_ONCE(1);
return;
}
arch_spin_lock(&ftrace_max_lock);
ftrace_disable_cpu();
@@ -726,7 +736,7 @@ __acquires(kernel_lock)
return -1;
}
if (strlen(type->name) > MAX_TRACER_SIZE) {
if (strlen(type->name) >= MAX_TRACER_SIZE) {
pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
return -1;
}
@@ -1328,61 +1338,6 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
#endif /* CONFIG_STACKTRACE */
static void
ftrace_trace_special(void *__tr,
unsigned long arg1, unsigned long arg2, unsigned long arg3,
int pc)
{
struct ftrace_event_call *call = &event_special;
struct ring_buffer_event *event;
struct trace_array *tr = __tr;
struct ring_buffer *buffer = tr->buffer;
struct special_entry *entry;
event = trace_buffer_lock_reserve(buffer, TRACE_SPECIAL,
sizeof(*entry), 0, pc);
if (!event)
return;
entry = ring_buffer_event_data(event);
entry->arg1 = arg1;
entry->arg2 = arg2;
entry->arg3 = arg3;
if (!filter_check_discard(call, entry, buffer, event))
trace_buffer_unlock_commit(buffer, event, 0, pc);
}
void
__trace_special(void *__tr, void *__data,
unsigned long arg1, unsigned long arg2, unsigned long arg3)
{
ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count());
}
void
ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
{
struct trace_array *tr = &global_trace;
struct trace_array_cpu *data;
unsigned long flags;
int cpu;
int pc;
if (tracing_disabled)
return;
pc = preempt_count();
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
if (likely(atomic_inc_return(&data->disabled) == 1))
ftrace_trace_special(tr, arg1, arg2, arg3, pc);
atomic_dec(&data->disabled);
local_irq_restore(flags);
}
/**
* trace_vbprintk - write binary msg to tracing buffer
*
@@ -1401,7 +1356,6 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
struct bprint_entry *entry;
unsigned long flags;
int disable;
int resched;
int cpu, len = 0, size, pc;
if (unlikely(tracing_selftest_running || tracing_disabled))
@@ -1411,7 +1365,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
pause_graph_tracing();
pc = preempt_count();
resched = ftrace_preempt_disable();
preempt_disable_notrace();
cpu = raw_smp_processor_id();
data = tr->data[cpu];
@@ -1449,7 +1403,7 @@ out_unlock:
out:
atomic_dec_return(&data->disabled);
ftrace_preempt_enable(resched);
preempt_enable_notrace();
unpause_graph_tracing();
return len;
@@ -2386,6 +2340,7 @@ static const struct file_operations show_traces_fops = {
.open = show_traces_open,
.read = seq_read,
.release = seq_release,
.llseek = seq_lseek,
};
/*
@@ -2479,6 +2434,7 @@ static const struct file_operations tracing_cpumask_fops = {
.open = tracing_open_generic,
.read = tracing_cpumask_read,
.write = tracing_cpumask_write,
.llseek = generic_file_llseek,
};
static int tracing_trace_options_show(struct seq_file *m, void *v)
@@ -2554,6 +2510,9 @@ static void set_tracer_flags(unsigned int mask, int enabled)
trace_flags |= mask;
else
trace_flags &= ~mask;
if (mask == TRACE_ITER_RECORD_CMD)
trace_event_enable_cmd_record(enabled);
}
static ssize_t
@@ -2645,6 +2604,7 @@ tracing_readme_read(struct file *filp, char __user *ubuf,
static const struct file_operations tracing_readme_fops = {
.open = tracing_open_generic,
.read = tracing_readme_read,
.llseek = generic_file_llseek,
};
static ssize_t
@@ -2695,6 +2655,7 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
static const struct file_operations tracing_saved_cmdlines_fops = {
.open = tracing_open_generic,
.read = tracing_saved_cmdlines_read,
.llseek = generic_file_llseek,
};
static ssize_t
@@ -2790,6 +2751,9 @@ static int tracing_resize_ring_buffer(unsigned long size)
if (ret < 0)
return ret;
if (!current_trace->use_max_tr)
goto out;
ret = ring_buffer_resize(max_tr.buffer, size);
if (ret < 0) {
int r;
@@ -2817,11 +2781,14 @@ static int tracing_resize_ring_buffer(unsigned long size)
return ret;
}
max_tr.entries = size;
out:
global_trace.entries = size;
return ret;
}
/**
* tracing_update_buffers - used by tracing facility to expand ring buffers
*
@@ -2882,12 +2849,26 @@ static int tracing_set_tracer(const char *buf)
trace_branch_disable();
if (current_trace && current_trace->reset)
current_trace->reset(tr);
if (current_trace && current_trace->use_max_tr) {
/*
* We don't free the ring buffer. instead, resize it because
* The max_tr ring buffer has some state (e.g. ring->clock) and
* we want preserve it.
*/
ring_buffer_resize(max_tr.buffer, 1);
max_tr.entries = 1;
}
destroy_trace_option_files(topts);
current_trace = t;
topts = create_trace_option_files(current_trace);
if (current_trace->use_max_tr) {
ret = ring_buffer_resize(max_tr.buffer, global_trace.entries);
if (ret < 0)
goto out;
max_tr.entries = global_trace.entries;
}
if (t->init) {
ret = tracer_init(t, tr);
@@ -3024,6 +3005,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
if (iter->trace->pipe_open)
iter->trace->pipe_open(iter);
nonseekable_open(inode, filp);
out:
mutex_unlock(&trace_types_lock);
return ret;
@@ -3469,7 +3451,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
}
tracing_start();
max_tr.entries = global_trace.entries;
mutex_unlock(&trace_types_lock);
return cnt;
@@ -3582,18 +3563,21 @@ static const struct file_operations tracing_max_lat_fops = {
.open = tracing_open_generic,
.read = tracing_max_lat_read,
.write = tracing_max_lat_write,
.llseek = generic_file_llseek,
};
static const struct file_operations tracing_ctrl_fops = {
.open = tracing_open_generic,
.read = tracing_ctrl_read,
.write = tracing_ctrl_write,
.llseek = generic_file_llseek,
};
static const struct file_operations set_tracer_fops = {
.open = tracing_open_generic,
.read = tracing_set_trace_read,
.write = tracing_set_trace_write,
.llseek = generic_file_llseek,
};
static const struct file_operations tracing_pipe_fops = {
@@ -3602,17 +3586,20 @@ static const struct file_operations tracing_pipe_fops = {
.read = tracing_read_pipe,
.splice_read = tracing_splice_read_pipe,
.release = tracing_release_pipe,
.llseek = no_llseek,
};
static const struct file_operations tracing_entries_fops = {
.open = tracing_open_generic,
.read = tracing_entries_read,
.write = tracing_entries_write,
.llseek = generic_file_llseek,
};
static const struct file_operations tracing_mark_fops = {
.open = tracing_open_generic,
.write = tracing_mark_write,
.llseek = generic_file_llseek,
};
static const struct file_operations trace_clock_fops = {
@@ -3918,6 +3905,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
static const struct file_operations tracing_stats_fops = {
.open = tracing_open_generic,
.read = tracing_stats_read,
.llseek = generic_file_llseek,
};
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -3954,6 +3942,7 @@ tracing_read_dyn_info(struct file *filp, char __user *ubuf,
static const struct file_operations tracing_dyn_info_fops = {
.open = tracing_open_generic,
.read = tracing_read_dyn_info,
.llseek = generic_file_llseek,
};
#endif
@@ -4107,6 +4096,7 @@ static const struct file_operations trace_options_fops = {
.open = tracing_open_generic,
.read = trace_options_read,
.write = trace_options_write,
.llseek = generic_file_llseek,
};
static ssize_t
@@ -4158,6 +4148,7 @@ static const struct file_operations trace_options_core_fops = {
.open = tracing_open_generic,
.read = trace_options_core_read,
.write = trace_options_core_write,
.llseek = generic_file_llseek,
};
struct dentry *trace_create_file(const char *name,
@@ -4347,9 +4338,6 @@ static __init int tracer_init_debugfs(void)
trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
#endif
#ifdef CONFIG_SYSPROF_TRACER
init_tracer_sysprof_debugfs(d_tracer);
#endif
create_trace_options_dir();
@@ -4576,16 +4564,14 @@ __init static int tracer_alloc_buffers(void)
#ifdef CONFIG_TRACER_MAX_TRACE
max_tr.buffer = ring_buffer_alloc(ring_buf_size,
TRACE_BUFFER_FLAGS);
max_tr.buffer = ring_buffer_alloc(1, TRACE_BUFFER_FLAGS);
if (!max_tr.buffer) {
printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
WARN_ON(1);
ring_buffer_free(global_trace.buffer);
goto out_free_cpumask;
}
max_tr.entries = ring_buffer_size(max_tr.buffer);
WARN_ON(max_tr.entries != global_trace.entries);
max_tr.entries = 1;
#endif
/* Allocate the first page for all buffers */
@@ -4598,9 +4584,6 @@ __init static int tracer_alloc_buffers(void)
register_tracer(&nop_trace);
current_trace = &nop_trace;
#ifdef CONFIG_BOOT_TRACER
register_tracer(&boot_tracer);
#endif
/* All seems OK, enable tracing */
tracing_disabled = 0;

View File

@@ -9,10 +9,7 @@
#include <linux/mmiotrace.h>
#include <linux/tracepoint.h>
#include <linux/ftrace.h>
#include <trace/boot.h>
#include <linux/kmemtrace.h>
#include <linux/hw_breakpoint.h>
#include <linux/trace_seq.h>
#include <linux/ftrace_event.h>
@@ -25,30 +22,17 @@ enum trace_type {
TRACE_STACK,
TRACE_PRINT,
TRACE_BPRINT,
TRACE_SPECIAL,
TRACE_MMIO_RW,
TRACE_MMIO_MAP,
TRACE_BRANCH,
TRACE_BOOT_CALL,
TRACE_BOOT_RET,
TRACE_GRAPH_RET,
TRACE_GRAPH_ENT,
TRACE_USER_STACK,
TRACE_KMEM_ALLOC,
TRACE_KMEM_FREE,
TRACE_BLK,
TRACE_KSYM,
__TRACE_LAST_TYPE,
};
enum kmemtrace_type_id {
KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
};
extern struct tracer boot_tracer;
#undef __field
#define __field(type, item) type item;
@@ -204,23 +188,15 @@ extern void __ftrace_bad_type(void);
IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \
IF_ASSIGN(var, ent, struct special_entry, 0); \
IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
TRACE_MMIO_RW); \
IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
TRACE_MMIO_MAP); \
IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \
TRACE_GRAPH_ENT); \
IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
TRACE_GRAPH_RET); \
IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
TRACE_KMEM_ALLOC); \
IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
TRACE_KMEM_FREE); \
IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
__ftrace_bad_type(); \
} while (0)
@@ -298,6 +274,7 @@ struct tracer {
struct tracer *next;
int print_max;
struct tracer_flags *flags;
int use_max_tr;
};
@@ -318,7 +295,6 @@ struct dentry *trace_create_file(const char *name,
const struct file_operations *fops);
struct dentry *tracing_init_dentry(void);
void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
struct ring_buffer_event;
@@ -363,11 +339,6 @@ void tracing_sched_wakeup_trace(struct trace_array *tr,
struct task_struct *wakee,
struct task_struct *cur,
unsigned long flags, int pc);
void trace_special(struct trace_array *tr,
struct trace_array_cpu *data,
unsigned long arg1,
unsigned long arg2,
unsigned long arg3, int pc);
void trace_function(struct trace_array *tr,
unsigned long ip,
unsigned long parent_ip,
@@ -398,8 +369,6 @@ extern cpumask_var_t __read_mostly tracing_buffer_mask;
#define for_each_tracing_cpu(cpu) \
for_each_cpu(cpu, tracing_buffer_mask)
extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
extern unsigned long nsecs_to_usecs(unsigned long nsecs);
extern unsigned long tracing_thresh;
@@ -469,12 +438,8 @@ extern int trace_selftest_startup_nop(struct tracer *trace,
struct trace_array *tr);
extern int trace_selftest_startup_sched_switch(struct tracer *trace,
struct trace_array *tr);
extern int trace_selftest_startup_sysprof(struct tracer *trace,
struct trace_array *tr);
extern int trace_selftest_startup_branch(struct tracer *trace,
struct trace_array *tr);
extern int trace_selftest_startup_ksym(struct tracer *trace,
struct trace_array *tr);
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
@@ -636,6 +601,7 @@ enum trace_iterator_flags {
TRACE_ITER_LATENCY_FMT = 0x20000,
TRACE_ITER_SLEEP_TIME = 0x40000,
TRACE_ITER_GRAPH_TIME = 0x80000,
TRACE_ITER_RECORD_CMD = 0x100000,
};
/*
@@ -647,54 +613,6 @@ enum trace_iterator_flags {
extern struct tracer nop_trace;
/**
* ftrace_preempt_disable - disable preemption scheduler safe
*
* When tracing can happen inside the scheduler, there exists
* cases that the tracing might happen before the need_resched
* flag is checked. If this happens and the tracer calls
* preempt_enable (after a disable), a schedule might take place
* causing an infinite recursion.
*
* To prevent this, we read the need_resched flag before
* disabling preemption. When we want to enable preemption we
* check the flag, if it is set, then we call preempt_enable_no_resched.
* Otherwise, we call preempt_enable.
*
* The rational for doing the above is that if need_resched is set
* and we have yet to reschedule, we are either in an atomic location
* (where we do not need to check for scheduling) or we are inside
* the scheduler and do not want to resched.
*/
static inline int ftrace_preempt_disable(void)
{
int resched;
resched = need_resched();
preempt_disable_notrace();
return resched;
}
/**
* ftrace_preempt_enable - enable preemption scheduler safe
* @resched: the return value from ftrace_preempt_disable
*
* This is a scheduler safe way to enable preemption and not miss
* any preemption checks. The disabled saved the state of preemption.
* If resched is set, then we are either inside an atomic or
* are inside the scheduler (we would have already scheduled
* otherwise). In this case, we do not want to call normal
* preempt_enable, but preempt_enable_no_resched instead.
*/
static inline void ftrace_preempt_enable(int resched)
{
if (resched)
preempt_enable_no_resched_notrace();
else
preempt_enable_notrace();
}
#ifdef CONFIG_BRANCH_TRACER
extern int enable_branch_tracing(struct trace_array *tr);
extern void disable_branch_tracing(void);
@@ -785,6 +703,8 @@ struct filter_pred {
int pop_n;
};
extern struct list_head ftrace_common_fields;
extern enum regex_type
filter_parse_regex(char *buff, int len, char **search, int *not);
extern void print_event_filter(struct ftrace_event_call *call,
@@ -814,6 +734,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
return 0;
}
extern void trace_event_enable_cmd_record(bool enable);
extern struct mutex event_mutex;
extern struct list_head ftrace_events;

View File

@@ -1,185 +0,0 @@
/*
* ring buffer based initcalls tracer
*
* Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
*
*/
#include <linux/init.h>
#include <linux/debugfs.h>
#include <linux/ftrace.h>
#include <linux/kallsyms.h>
#include <linux/time.h>
#include "trace.h"
#include "trace_output.h"
static struct trace_array *boot_trace;
static bool pre_initcalls_finished;
/* Tells the boot tracer that the pre_smp_initcalls are finished.
* So we are ready .
* It doesn't enable sched events tracing however.
* You have to call enable_boot_trace to do so.
*/
void start_boot_trace(void)
{
pre_initcalls_finished = true;
}
void enable_boot_trace(void)
{
if (boot_trace && pre_initcalls_finished)
tracing_start_sched_switch_record();
}
void disable_boot_trace(void)
{
if (boot_trace && pre_initcalls_finished)
tracing_stop_sched_switch_record();
}
static int boot_trace_init(struct trace_array *tr)
{
boot_trace = tr;
if (!tr)
return 0;
tracing_reset_online_cpus(tr);
tracing_sched_switch_assign_trace(tr);
return 0;
}
static enum print_line_t
initcall_call_print_line(struct trace_iterator *iter)
{
struct trace_entry *entry = iter->ent;
struct trace_seq *s = &iter->seq;
struct trace_boot_call *field;
struct boot_trace_call *call;
u64 ts;
unsigned long nsec_rem;
int ret;
trace_assign_type(field, entry);
call = &field->boot_call;
ts = iter->ts;
nsec_rem = do_div(ts, NSEC_PER_SEC);
ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
(unsigned long)ts, nsec_rem, call->func, call->caller);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
else
return TRACE_TYPE_HANDLED;
}
static enum print_line_t
initcall_ret_print_line(struct trace_iterator *iter)
{
struct trace_entry *entry = iter->ent;
struct trace_seq *s = &iter->seq;
struct trace_boot_ret *field;
struct boot_trace_ret *init_ret;
u64 ts;
unsigned long nsec_rem;
int ret;
trace_assign_type(field, entry);
init_ret = &field->boot_ret;
ts = iter->ts;
nsec_rem = do_div(ts, NSEC_PER_SEC);
ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
"returned %d after %llu msecs\n",
(unsigned long) ts,
nsec_rem,
init_ret->func, init_ret->result, init_ret->duration);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
else
return TRACE_TYPE_HANDLED;
}
static enum print_line_t initcall_print_line(struct trace_iterator *iter)
{
struct trace_entry *entry = iter->ent;
switch (entry->type) {
case TRACE_BOOT_CALL:
return initcall_call_print_line(iter);
case TRACE_BOOT_RET:
return initcall_ret_print_line(iter);
default:
return TRACE_TYPE_UNHANDLED;
}
}
struct tracer boot_tracer __read_mostly =
{
.name = "initcall",
.init = boot_trace_init,
.reset = tracing_reset_online_cpus,
.print_line = initcall_print_line,
};
void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
{
struct ftrace_event_call *call = &event_boot_call;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
struct trace_boot_call *entry;
struct trace_array *tr = boot_trace;
if (!tr || !pre_initcalls_finished)
return;
/* Get its name now since this function could
* disappear because it is in the .init section.
*/
sprint_symbol(bt->func, (unsigned long)fn);
preempt_disable();
buffer = tr->buffer;
event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_CALL,
sizeof(*entry), 0, 0);
if (!event)
goto out;
entry = ring_buffer_event_data(event);
entry->boot_call = *bt;
if (!filter_check_discard(call, entry, buffer, event))
trace_buffer_unlock_commit(buffer, event, 0, 0);
out:
preempt_enable();
}
void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
{
struct ftrace_event_call *call = &event_boot_ret;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
struct trace_boot_ret *entry;
struct trace_array *tr = boot_trace;
if (!tr || !pre_initcalls_finished)
return;
sprint_symbol(bt->func, (unsigned long)fn);
preempt_disable();
buffer = tr->buffer;
event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_RET,
sizeof(*entry), 0, 0);
if (!event)
goto out;
entry = ring_buffer_event_data(event);
entry->boot_ret = *bt;
if (!filter_check_discard(call, entry, buffer, event))
trace_buffer_unlock_commit(buffer, event, 0, 0);
out:
preempt_enable();
}

View File

@@ -32,16 +32,15 @@
u64 notrace trace_clock_local(void)
{
u64 clock;
int resched;
/*
* sched_clock() is an architecture implemented, fast, scalable,
* lockless clock. It is not guaranteed to be coherent across
* CPUs, nor across CPU idle events.
*/
resched = ftrace_preempt_disable();
preempt_disable_notrace();
clock = sched_clock();
ftrace_preempt_enable(resched);
preempt_enable_notrace();
return clock;
}

View File

@@ -150,23 +150,6 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
)
);
/*
* Special (free-form) trace entry:
*/
FTRACE_ENTRY(special, special_entry,
TRACE_SPECIAL,
F_STRUCT(
__field( unsigned long, arg1 )
__field( unsigned long, arg2 )
__field( unsigned long, arg3 )
),
F_printk("(%08lx) (%08lx) (%08lx)",
__entry->arg1, __entry->arg2, __entry->arg3)
);
/*
* Stack-trace entry:
*/
@@ -271,33 +254,6 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
__entry->map_id, __entry->opcode)
);
FTRACE_ENTRY(boot_call, trace_boot_call,
TRACE_BOOT_CALL,
F_STRUCT(
__field_struct( struct boot_trace_call, boot_call )
__field_desc( pid_t, boot_call, caller )
__array_desc( char, boot_call, func, KSYM_SYMBOL_LEN)
),
F_printk("%d %s", __entry->caller, __entry->func)
);
FTRACE_ENTRY(boot_ret, trace_boot_ret,
TRACE_BOOT_RET,
F_STRUCT(
__field_struct( struct boot_trace_ret, boot_ret )
__array_desc( char, boot_ret, func, KSYM_SYMBOL_LEN)
__field_desc( int, boot_ret, result )
__field_desc( unsigned long, boot_ret, duration )
),
F_printk("%s %d %lx",
__entry->func, __entry->result, __entry->duration)
);
#define TRACE_FUNC_SIZE 30
#define TRACE_FILE_SIZE 20
@@ -318,53 +274,3 @@ FTRACE_ENTRY(branch, trace_branch,
__entry->func, __entry->file, __entry->correct)
);
FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
TRACE_KMEM_ALLOC,
F_STRUCT(
__field( enum kmemtrace_type_id, type_id )
__field( unsigned long, call_site )
__field( const void *, ptr )
__field( size_t, bytes_req )
__field( size_t, bytes_alloc )
__field( gfp_t, gfp_flags )
__field( int, node )
),
F_printk("type:%u call_site:%lx ptr:%p req:%zi alloc:%zi"
" flags:%x node:%d",
__entry->type_id, __entry->call_site, __entry->ptr,
__entry->bytes_req, __entry->bytes_alloc,
__entry->gfp_flags, __entry->node)
);
FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
TRACE_KMEM_FREE,
F_STRUCT(
__field( enum kmemtrace_type_id, type_id )
__field( unsigned long, call_site )
__field( const void *, ptr )
),
F_printk("type:%u call_site:%lx ptr:%p",
__entry->type_id, __entry->call_site, __entry->ptr)
);
FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
TRACE_KSYM,
F_STRUCT(
__field( unsigned long, ip )
__field( unsigned char, type )
__array( char , cmd, TASK_COMM_LEN )
__field( unsigned long, addr )
),
F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
(void *)__entry->ip, (unsigned int)__entry->type,
(void *)__entry->addr, __entry->cmd)
);

View File

@@ -9,8 +9,6 @@
#include <linux/kprobes.h>
#include "trace.h"
EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
static char *perf_trace_buf[4];
/*
@@ -56,13 +54,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
}
}
if (tp_event->class->reg)
ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
else
ret = tracepoint_probe_register(tp_event->name,
tp_event->class->perf_probe,
tp_event);
ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
if (ret)
goto fail;
@@ -96,9 +88,7 @@ int perf_trace_init(struct perf_event *p_event)
mutex_lock(&event_mutex);
list_for_each_entry(tp_event, &ftrace_events, list) {
if (tp_event->event.type == event_id &&
tp_event->class &&
(tp_event->class->perf_probe ||
tp_event->class->reg) &&
tp_event->class && tp_event->class->reg &&
try_module_get(tp_event->mod)) {
ret = perf_trace_event_init(tp_event, p_event);
break;
@@ -138,18 +128,13 @@ void perf_trace_destroy(struct perf_event *p_event)
if (--tp_event->perf_refcount > 0)
goto out;
if (tp_event->class->reg)
tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
else
tracepoint_probe_unregister(tp_event->name,
tp_event->class->perf_probe,
tp_event);
tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
/*
* Ensure our callback won't be called anymore. See
* tracepoint_probe_unregister() and __DO_TRACE().
* Ensure our callback won't be called anymore. The buffers
* will be freed after that.
*/
synchronize_sched();
tracepoint_synchronize_unregister();
free_percpu(tp_event->perf_events);
tp_event->perf_events = NULL;

View File

@@ -28,6 +28,7 @@
DEFINE_MUTEX(event_mutex);
LIST_HEAD(ftrace_events);
LIST_HEAD(ftrace_common_fields);
struct list_head *
trace_get_fields(struct ftrace_event_call *event_call)
@@ -37,15 +38,11 @@ trace_get_fields(struct ftrace_event_call *event_call)
return event_call->class->get_fields(event_call);
}
int trace_define_field(struct ftrace_event_call *call, const char *type,
const char *name, int offset, int size, int is_signed,
int filter_type)
static int __trace_define_field(struct list_head *head, const char *type,
const char *name, int offset, int size,
int is_signed, int filter_type)
{
struct ftrace_event_field *field;
struct list_head *head;
if (WARN_ON(!call->class))
return 0;
field = kzalloc(sizeof(*field), GFP_KERNEL);
if (!field)
@@ -68,7 +65,6 @@ int trace_define_field(struct ftrace_event_call *call, const char *type,
field->size = size;
field->is_signed = is_signed;
head = trace_get_fields(call);
list_add(&field->link, head);
return 0;
@@ -80,17 +76,32 @@ err:
return -ENOMEM;
}
int trace_define_field(struct ftrace_event_call *call, const char *type,
const char *name, int offset, int size, int is_signed,
int filter_type)
{
struct list_head *head;
if (WARN_ON(!call->class))
return 0;
head = trace_get_fields(call);
return __trace_define_field(head, type, name, offset, size,
is_signed, filter_type);
}
EXPORT_SYMBOL_GPL(trace_define_field);
#define __common_field(type, item) \
ret = trace_define_field(call, #type, "common_" #item, \
offsetof(typeof(ent), item), \
sizeof(ent.item), \
is_signed_type(type), FILTER_OTHER); \
ret = __trace_define_field(&ftrace_common_fields, #type, \
"common_" #item, \
offsetof(typeof(ent), item), \
sizeof(ent.item), \
is_signed_type(type), FILTER_OTHER); \
if (ret) \
return ret;
static int trace_define_common_fields(struct ftrace_event_call *call)
static int trace_define_common_fields(void)
{
int ret;
struct trace_entry ent;
@@ -130,6 +141,55 @@ int trace_event_raw_init(struct ftrace_event_call *call)
}
EXPORT_SYMBOL_GPL(trace_event_raw_init);
int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type)
{
switch (type) {
case TRACE_REG_REGISTER:
return tracepoint_probe_register(call->name,
call->class->probe,
call);
case TRACE_REG_UNREGISTER:
tracepoint_probe_unregister(call->name,
call->class->probe,
call);
return 0;
#ifdef CONFIG_PERF_EVENTS
case TRACE_REG_PERF_REGISTER:
return tracepoint_probe_register(call->name,
call->class->perf_probe,
call);
case TRACE_REG_PERF_UNREGISTER:
tracepoint_probe_unregister(call->name,
call->class->perf_probe,
call);
return 0;
#endif
}
return 0;
}
EXPORT_SYMBOL_GPL(ftrace_event_reg);
void trace_event_enable_cmd_record(bool enable)
{
struct ftrace_event_call *call;
mutex_lock(&event_mutex);
list_for_each_entry(call, &ftrace_events, list) {
if (!(call->flags & TRACE_EVENT_FL_ENABLED))
continue;
if (enable) {
tracing_start_cmdline_record();
call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
} else {
tracing_stop_cmdline_record();
call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
}
}
mutex_unlock(&event_mutex);
}
static int ftrace_event_enable_disable(struct ftrace_event_call *call,
int enable)
{
@@ -139,24 +199,20 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
case 0:
if (call->flags & TRACE_EVENT_FL_ENABLED) {
call->flags &= ~TRACE_EVENT_FL_ENABLED;
tracing_stop_cmdline_record();
if (call->class->reg)
call->class->reg(call, TRACE_REG_UNREGISTER);
else
tracepoint_probe_unregister(call->name,
call->class->probe,
call);
if (call->flags & TRACE_EVENT_FL_RECORDED_CMD) {
tracing_stop_cmdline_record();
call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
}
call->class->reg(call, TRACE_REG_UNREGISTER);
}
break;
case 1:
if (!(call->flags & TRACE_EVENT_FL_ENABLED)) {
tracing_start_cmdline_record();
if (call->class->reg)
ret = call->class->reg(call, TRACE_REG_REGISTER);
else
ret = tracepoint_probe_register(call->name,
call->class->probe,
call);
if (trace_flags & TRACE_ITER_RECORD_CMD) {
tracing_start_cmdline_record();
call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
}
ret = call->class->reg(call, TRACE_REG_REGISTER);
if (ret) {
tracing_stop_cmdline_record();
pr_info("event trace: Could not enable event "
@@ -194,8 +250,7 @@ static int __ftrace_set_clr_event(const char *match, const char *sub,
mutex_lock(&event_mutex);
list_for_each_entry(call, &ftrace_events, list) {
if (!call->name || !call->class ||
(!call->class->probe && !call->class->reg))
if (!call->name || !call->class || !call->class->reg)
continue;
if (match &&
@@ -321,7 +376,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
* The ftrace subsystem is for showing formats only.
* They can not be enabled or disabled via the event files.
*/
if (call->class && (call->class->probe || call->class->reg))
if (call->class && call->class->reg)
return call;
}
@@ -474,8 +529,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
mutex_lock(&event_mutex);
list_for_each_entry(call, &ftrace_events, list) {
if (!call->name || !call->class ||
(!call->class->probe && !call->class->reg))
if (!call->name || !call->class || !call->class->reg)
continue;
if (system && strcmp(call->class->system, system) != 0)
@@ -544,32 +598,10 @@ out:
return ret;
}
static ssize_t
event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
static void print_event_fields(struct trace_seq *s, struct list_head *head)
{
struct ftrace_event_call *call = filp->private_data;
struct ftrace_event_field *field;
struct list_head *head;
struct trace_seq *s;
int common_field_count = 5;
char *buf;
int r = 0;
if (*ppos)
return 0;
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s)
return -ENOMEM;
trace_seq_init(s);
trace_seq_printf(s, "name: %s\n", call->name);
trace_seq_printf(s, "ID: %d\n", call->event.type);
trace_seq_printf(s, "format:\n");
head = trace_get_fields(call);
list_for_each_entry_reverse(field, head, link) {
/*
* Smartly shows the array type(except dynamic array).
@@ -584,29 +616,54 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
array_descriptor = NULL;
if (!array_descriptor) {
r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
"\tsize:%u;\tsigned:%d;\n",
field->type, field->name, field->offset,
field->size, !!field->is_signed);
} else {
r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
"\tsize:%u;\tsigned:%d;\n",
(int)(array_descriptor - field->type),
field->type, field->name,
array_descriptor, field->offset,
field->size, !!field->is_signed);
}
if (--common_field_count == 0)
r = trace_seq_printf(s, "\n");
if (!r)
break;
}
}
if (r)
r = trace_seq_printf(s, "\nprint fmt: %s\n",
call->print_fmt);
static ssize_t
event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
{
struct ftrace_event_call *call = filp->private_data;
struct list_head *head;
struct trace_seq *s;
char *buf;
int r;
if (*ppos)
return 0;
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s)
return -ENOMEM;
trace_seq_init(s);
trace_seq_printf(s, "name: %s\n", call->name);
trace_seq_printf(s, "ID: %d\n", call->event.type);
trace_seq_printf(s, "format:\n");
/* print common fields */
print_event_fields(s, &ftrace_common_fields);
trace_seq_putc(s, '\n');
/* print event specific fields */
head = trace_get_fields(call);
print_event_fields(s, head);
r = trace_seq_printf(s, "\nprint fmt: %s\n", call->print_fmt);
if (!r) {
/*
@@ -963,35 +1020,31 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
return -1;
}
if (call->class->probe || call->class->reg)
if (call->class->reg)
trace_create_file("enable", 0644, call->dir, call,
enable);
#ifdef CONFIG_PERF_EVENTS
if (call->event.type && (call->class->perf_probe || call->class->reg))
if (call->event.type && call->class->reg)
trace_create_file("id", 0444, call->dir, call,
id);
#endif
if (call->class->define_fields) {
/*
* Other events may have the same class. Only update
* the fields if they are not already defined.
*/
head = trace_get_fields(call);
if (list_empty(head)) {
ret = trace_define_common_fields(call);
if (!ret)
ret = call->class->define_fields(call);
if (ret < 0) {
pr_warning("Could not initialize trace point"
" events/%s\n", call->name);
return ret;
}
/*
* Other events may have the same class. Only update
* the fields if they are not already defined.
*/
head = trace_get_fields(call);
if (list_empty(head)) {
ret = call->class->define_fields(call);
if (ret < 0) {
pr_warning("Could not initialize trace point"
" events/%s\n", call->name);
return ret;
}
trace_create_file("filter", 0644, call->dir, call,
filter);
}
trace_create_file("filter", 0644, call->dir, call,
filter);
trace_create_file("format", 0444, call->dir, call,
format);
@@ -999,11 +1052,17 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
return 0;
}
static int __trace_add_event_call(struct ftrace_event_call *call)
static int
__trace_add_event_call(struct ftrace_event_call *call, struct module *mod,
const struct file_operations *id,
const struct file_operations *enable,
const struct file_operations *filter,
const struct file_operations *format)
{
struct dentry *d_events;
int ret;
/* The linker may leave blanks */
if (!call->name)
return -EINVAL;
@@ -1011,8 +1070,8 @@ static int __trace_add_event_call(struct ftrace_event_call *call)
ret = call->class->raw_init(call);
if (ret < 0) {
if (ret != -ENOSYS)
pr_warning("Could not initialize trace "
"events/%s\n", call->name);
pr_warning("Could not initialize trace events/%s\n",
call->name);
return ret;
}
}
@@ -1021,11 +1080,10 @@ static int __trace_add_event_call(struct ftrace_event_call *call)
if (!d_events)
return -ENOENT;
ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
&ftrace_enable_fops, &ftrace_event_filter_fops,
&ftrace_event_format_fops);
ret = event_create_dir(call, d_events, id, enable, filter, format);
if (!ret)
list_add(&call->list, &ftrace_events);
call->mod = mod;
return ret;
}
@@ -1035,7 +1093,10 @@ int trace_add_event_call(struct ftrace_event_call *call)
{
int ret;
mutex_lock(&event_mutex);
ret = __trace_add_event_call(call);
ret = __trace_add_event_call(call, NULL, &ftrace_event_id_fops,
&ftrace_enable_fops,
&ftrace_event_filter_fops,
&ftrace_event_format_fops);
mutex_unlock(&event_mutex);
return ret;
}
@@ -1152,8 +1213,6 @@ static void trace_module_add_events(struct module *mod)
{
struct ftrace_module_file_ops *file_ops = NULL;
struct ftrace_event_call *call, *start, *end;
struct dentry *d_events;
int ret;
start = mod->trace_events;
end = mod->trace_events + mod->num_trace_events;
@@ -1161,38 +1220,14 @@ static void trace_module_add_events(struct module *mod)
if (start == end)
return;
d_events = event_trace_events_dir();
if (!d_events)
file_ops = trace_create_file_ops(mod);
if (!file_ops)
return;
for_each_event(call, start, end) {
/* The linker may leave blanks */
if (!call->name)
continue;
if (call->class->raw_init) {
ret = call->class->raw_init(call);
if (ret < 0) {
if (ret != -ENOSYS)
pr_warning("Could not initialize trace "
"point events/%s\n", call->name);
continue;
}
}
/*
* This module has events, create file ops for this module
* if not already done.
*/
if (!file_ops) {
file_ops = trace_create_file_ops(mod);
if (!file_ops)
return;
}
call->mod = mod;
ret = event_create_dir(call, d_events,
__trace_add_event_call(call, mod,
&file_ops->id, &file_ops->enable,
&file_ops->filter, &file_ops->format);
if (!ret)
list_add(&call->list, &ftrace_events);
}
}
@@ -1319,25 +1354,14 @@ static __init int event_trace_init(void)
trace_create_file("enable", 0644, d_events,
NULL, &ftrace_system_enable_fops);
if (trace_define_common_fields())
pr_warning("tracing: Failed to allocate common fields");
for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
/* The linker may leave blanks */
if (!call->name)
continue;
if (call->class->raw_init) {
ret = call->class->raw_init(call);
if (ret < 0) {
if (ret != -ENOSYS)
pr_warning("Could not initialize trace "
"point events/%s\n", call->name);
continue;
}
}
ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
__trace_add_event_call(call, NULL, &ftrace_event_id_fops,
&ftrace_enable_fops,
&ftrace_event_filter_fops,
&ftrace_event_format_fops);
if (!ret)
list_add(&call->list, &ftrace_events);
}
while (true) {
@@ -1524,12 +1548,11 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
struct ftrace_entry *entry;
unsigned long flags;
long disabled;
int resched;
int cpu;
int pc;
pc = preempt_count();
resched = ftrace_preempt_disable();
preempt_disable_notrace();
cpu = raw_smp_processor_id();
disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
@@ -1551,7 +1574,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
out:
atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
ftrace_preempt_enable(resched);
preempt_enable_notrace();
}
static struct ftrace_ops trace_ops __initdata =

View File

@@ -497,12 +497,10 @@ void print_subsystem_event_filter(struct event_subsystem *system,
}
static struct ftrace_event_field *
find_event_field(struct ftrace_event_call *call, char *name)
__find_event_field(struct list_head *head, char *name)
{
struct ftrace_event_field *field;
struct list_head *head;
head = trace_get_fields(call);
list_for_each_entry(field, head, link) {
if (!strcmp(field->name, name))
return field;
@@ -511,6 +509,20 @@ find_event_field(struct ftrace_event_call *call, char *name)
return NULL;
}
static struct ftrace_event_field *
find_event_field(struct ftrace_event_call *call, char *name)
{
struct ftrace_event_field *field;
struct list_head *head;
field = __find_event_field(&ftrace_common_fields, name);
if (field)
return field;
head = trace_get_fields(call);
return __find_event_field(head, name);
}
static void filter_free_pred(struct filter_pred *pred)
{
if (!pred)
@@ -627,9 +639,6 @@ static int init_subsystem_preds(struct event_subsystem *system)
int err;
list_for_each_entry(call, &ftrace_events, list) {
if (!call->class || !call->class->define_fields)
continue;
if (strcmp(call->class->system, system->name) != 0)
continue;
@@ -646,9 +655,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
struct ftrace_event_call *call;
list_for_each_entry(call, &ftrace_events, list) {
if (!call->class || !call->class->define_fields)
continue;
if (strcmp(call->class->system, system->name) != 0)
continue;
@@ -1251,9 +1257,6 @@ static int replace_system_preds(struct event_subsystem *system,
list_for_each_entry(call, &ftrace_events, list) {
struct event_filter *filter = call->filter;
if (!call->class || !call->class->define_fields)
continue;
if (strcmp(call->class->system, system->name) != 0)
continue;

View File

@@ -125,12 +125,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
#include "trace_entries.h"
static int ftrace_raw_init_event(struct ftrace_event_call *call)
{
INIT_LIST_HEAD(&call->class->fields);
return 0;
}
#undef __entry
#define __entry REC
@@ -158,7 +152,7 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
struct ftrace_event_class event_class_ftrace_##call = { \
.system = __stringify(TRACE_SYSTEM), \
.define_fields = ftrace_define_fields_##call, \
.raw_init = ftrace_raw_init_event, \
.fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
}; \
\
struct ftrace_event_call __used \

View File

@@ -54,14 +54,14 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
int cpu, resched;
int cpu;
int pc;
if (unlikely(!ftrace_function_enabled))
return;
pc = preempt_count();
resched = ftrace_preempt_disable();
preempt_disable_notrace();
local_save_flags(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
@@ -71,7 +71,7 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
trace_function(tr, ip, parent_ip, flags, pc);
atomic_dec(&data->disabled);
ftrace_preempt_enable(resched);
preempt_enable_notrace();
}
static void

View File

@@ -641,7 +641,8 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
/* Print nsecs (we don't want to exceed 7 numbers) */
if (len < 7) {
snprintf(nsecs_str, 8 - len, "%03lu", nsecs_rem);
snprintf(nsecs_str, min(sizeof(nsecs_str), 8UL - len), "%03lu",
nsecs_rem);
ret = trace_seq_printf(s, ".%s", nsecs_str);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;

View File

@@ -649,6 +649,7 @@ static struct tracer irqsoff_tracer __read_mostly =
#endif
.open = irqsoff_trace_open,
.close = irqsoff_trace_close,
.use_max_tr = 1,
};
# define register_irqsoff(trace) register_tracer(&trace)
#else
@@ -681,6 +682,7 @@ static struct tracer preemptoff_tracer __read_mostly =
#endif
.open = irqsoff_trace_open,
.close = irqsoff_trace_close,
.use_max_tr = 1,
};
# define register_preemptoff(trace) register_tracer(&trace)
#else
@@ -715,6 +717,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
#endif
.open = irqsoff_trace_open,
.close = irqsoff_trace_close,
.use_max_tr = 1,
};
# define register_preemptirqsoff(trace) register_tracer(&trace)

View File

@@ -30,6 +30,8 @@
#include <linux/ptrace.h>
#include <linux/perf_event.h>
#include <linux/stringify.h>
#include <linux/limits.h>
#include <linux/uaccess.h>
#include <asm/bitsperlong.h>
#include "trace.h"
@@ -38,6 +40,7 @@
#define MAX_TRACE_ARGS 128
#define MAX_ARGSTR_LEN 63
#define MAX_EVENT_NAME_LEN 64
#define MAX_STRING_SIZE PATH_MAX
#define KPROBE_EVENT_SYSTEM "kprobes"
/* Reserved field names */
@@ -58,14 +61,16 @@ const char *reserved_field_names[] = {
};
/* Printing function type */
typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *);
typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *,
void *);
#define PRINT_TYPE_FUNC_NAME(type) print_type_##type
#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type
/* Printing in basic type function template */
#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \
static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \
const char *name, void *data)\
const char *name, \
void *data, void *ent)\
{ \
return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
} \
@@ -80,6 +85,49 @@ DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)
/* data_rloc: data relative location, compatible with u32 */
#define make_data_rloc(len, roffs) \
(((u32)(len) << 16) | ((u32)(roffs) & 0xffff))
#define get_rloc_len(dl) ((u32)(dl) >> 16)
#define get_rloc_offs(dl) ((u32)(dl) & 0xffff)
static inline void *get_rloc_data(u32 *dl)
{
return (u8 *)dl + get_rloc_offs(*dl);
}
/* For data_loc conversion */
static inline void *get_loc_data(u32 *dl, void *ent)
{
return (u8 *)ent + get_rloc_offs(*dl);
}
/*
* Convert data_rloc to data_loc:
* data_rloc stores the offset from data_rloc itself, but data_loc
* stores the offset from event entry.
*/
#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs))
/* For defining macros, define string/string_size types */
typedef u32 string;
typedef u32 string_size;
/* Print type function for string type */
static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
const char *name,
void *data, void *ent)
{
int len = *(u32 *)data >> 16;
if (!len)
return trace_seq_printf(s, " %s=(fault)", name);
else
return trace_seq_printf(s, " %s=\"%s\"", name,
(const char *)get_loc_data(data, ent));
}
static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
/* Data fetch function type */
typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
@@ -94,32 +142,38 @@ static __kprobes void call_fetch(struct fetch_param *fprm,
return fprm->fn(regs, fprm->data, dest);
}
#define FETCH_FUNC_NAME(kind, type) fetch_##kind##_##type
#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type
/*
* Define macro for basic types - we don't need to define s* types, because
* we have to care only about bitwidth at recording time.
*/
#define DEFINE_BASIC_FETCH_FUNCS(kind) \
DEFINE_FETCH_##kind(u8) \
DEFINE_FETCH_##kind(u16) \
DEFINE_FETCH_##kind(u32) \
DEFINE_FETCH_##kind(u64)
#define DEFINE_BASIC_FETCH_FUNCS(method) \
DEFINE_FETCH_##method(u8) \
DEFINE_FETCH_##method(u16) \
DEFINE_FETCH_##method(u32) \
DEFINE_FETCH_##method(u64)
#define CHECK_BASIC_FETCH_FUNCS(kind, fn) \
((FETCH_FUNC_NAME(kind, u8) == fn) || \
(FETCH_FUNC_NAME(kind, u16) == fn) || \
(FETCH_FUNC_NAME(kind, u32) == fn) || \
(FETCH_FUNC_NAME(kind, u64) == fn))
#define CHECK_FETCH_FUNCS(method, fn) \
(((FETCH_FUNC_NAME(method, u8) == fn) || \
(FETCH_FUNC_NAME(method, u16) == fn) || \
(FETCH_FUNC_NAME(method, u32) == fn) || \
(FETCH_FUNC_NAME(method, u64) == fn) || \
(FETCH_FUNC_NAME(method, string) == fn) || \
(FETCH_FUNC_NAME(method, string_size) == fn)) \
&& (fn != NULL))
/* Data fetch function templates */
#define DEFINE_FETCH_reg(type) \
static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \
void *offset, void *dest) \
void *offset, void *dest) \
{ \
*(type *)dest = (type)regs_get_register(regs, \
(unsigned int)((unsigned long)offset)); \
}
DEFINE_BASIC_FETCH_FUNCS(reg)
/* No string on the register */
#define fetch_reg_string NULL
#define fetch_reg_string_size NULL
#define DEFINE_FETCH_stack(type) \
static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
@@ -129,6 +183,9 @@ static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
(unsigned int)((unsigned long)offset)); \
}
DEFINE_BASIC_FETCH_FUNCS(stack)
/* No string on the stack entry */
#define fetch_stack_string NULL
#define fetch_stack_string_size NULL
#define DEFINE_FETCH_retval(type) \
static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
@@ -137,6 +194,9 @@ static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
*(type *)dest = (type)regs_return_value(regs); \
}
DEFINE_BASIC_FETCH_FUNCS(retval)
/* No string on the retval */
#define fetch_retval_string NULL
#define fetch_retval_string_size NULL
#define DEFINE_FETCH_memory(type) \
static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
@@ -149,6 +209,62 @@ static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
*(type *)dest = retval; \
}
DEFINE_BASIC_FETCH_FUNCS(memory)
/*
* Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
* length and relative data location.
*/
static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
void *addr, void *dest)
{
long ret;
int maxlen = get_rloc_len(*(u32 *)dest);
u8 *dst = get_rloc_data(dest);
u8 *src = addr;
mm_segment_t old_fs = get_fs();
if (!maxlen)
return;
/*
* Try to get string again, since the string can be changed while
* probing.
*/
set_fs(KERNEL_DS);
pagefault_disable();
do
ret = __copy_from_user_inatomic(dst++, src++, 1);
while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen);
dst[-1] = '\0';
pagefault_enable();
set_fs(old_fs);
if (ret < 0) { /* Failed to fetch string */
((u8 *)get_rloc_data(dest))[0] = '\0';
*(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
} else
*(u32 *)dest = make_data_rloc(src - (u8 *)addr,
get_rloc_offs(*(u32 *)dest));
}
/* Return the length of string -- including null terminal byte */
static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
void *addr, void *dest)
{
int ret, len = 0;
u8 c;
mm_segment_t old_fs = get_fs();
set_fs(KERNEL_DS);
pagefault_disable();
do {
ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
len++;
} while (c && ret == 0 && len < MAX_STRING_SIZE);
pagefault_enable();
set_fs(old_fs);
if (ret < 0) /* Failed to check the length */
*(u32 *)dest = 0;
else
*(u32 *)dest = len;
}
/* Memory fetching by symbol */
struct symbol_cache {
@@ -203,6 +319,8 @@ static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
*(type *)dest = 0; \
}
DEFINE_BASIC_FETCH_FUNCS(symbol)
DEFINE_FETCH_symbol(string)
DEFINE_FETCH_symbol(string_size)
/* Dereference memory access function */
struct deref_fetch_param {
@@ -224,12 +342,14 @@ static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
*(type *)dest = 0; \
}
DEFINE_BASIC_FETCH_FUNCS(deref)
DEFINE_FETCH_deref(string)
DEFINE_FETCH_deref(string_size)
static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
{
if (CHECK_BASIC_FETCH_FUNCS(deref, data->orig.fn))
if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
free_deref_fetch_param(data->orig.data);
else if (CHECK_BASIC_FETCH_FUNCS(symbol, data->orig.fn))
else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
free_symbol_cache(data->orig.data);
kfree(data);
}
@@ -240,23 +360,43 @@ static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
#define ASSIGN_FETCH_FUNC(kind, type) \
.kind = FETCH_FUNC_NAME(kind, type)
/* Fetch types */
enum {
FETCH_MTD_reg = 0,
FETCH_MTD_stack,
FETCH_MTD_retval,
FETCH_MTD_memory,
FETCH_MTD_symbol,
FETCH_MTD_deref,
FETCH_MTD_END,
};
#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
{.name = #ptype, \
.size = sizeof(ftype), \
.is_signed = sign, \
.print = PRINT_TYPE_FUNC_NAME(ptype), \
.fmt = PRINT_TYPE_FMT_NAME(ptype), \
ASSIGN_FETCH_FUNC(reg, ftype), \
ASSIGN_FETCH_FUNC(stack, ftype), \
ASSIGN_FETCH_FUNC(retval, ftype), \
ASSIGN_FETCH_FUNC(memory, ftype), \
ASSIGN_FETCH_FUNC(symbol, ftype), \
ASSIGN_FETCH_FUNC(deref, ftype), \
#define ASSIGN_FETCH_FUNC(method, type) \
[FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)
#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
{.name = _name, \
.size = _size, \
.is_signed = sign, \
.print = PRINT_TYPE_FUNC_NAME(ptype), \
.fmt = PRINT_TYPE_FMT_NAME(ptype), \
.fmttype = _fmttype, \
.fetch = { \
ASSIGN_FETCH_FUNC(reg, ftype), \
ASSIGN_FETCH_FUNC(stack, ftype), \
ASSIGN_FETCH_FUNC(retval, ftype), \
ASSIGN_FETCH_FUNC(memory, ftype), \
ASSIGN_FETCH_FUNC(symbol, ftype), \
ASSIGN_FETCH_FUNC(deref, ftype), \
} \
}
#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
__ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
#define FETCH_TYPE_STRING 0
#define FETCH_TYPE_STRSIZE 1
/* Fetch type information table */
static const struct fetch_type {
const char *name; /* Name of type */
@@ -264,14 +404,16 @@ static const struct fetch_type {
int is_signed; /* Signed flag */
print_type_func_t print; /* Print functions */
const char *fmt; /* Fromat string */
const char *fmttype; /* Name in format file */
/* Fetch functions */
fetch_func_t reg;
fetch_func_t stack;
fetch_func_t retval;
fetch_func_t memory;
fetch_func_t symbol;
fetch_func_t deref;
fetch_func_t fetch[FETCH_MTD_END];
} fetch_type_table[] = {
/* Special types */
[FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
sizeof(u32), 1, "__data_loc char[]"),
[FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
string_size, sizeof(u32), 0, "u32"),
/* Basic types */
ASSIGN_FETCH_TYPE(u8, u8, 0),
ASSIGN_FETCH_TYPE(u16, u16, 0),
ASSIGN_FETCH_TYPE(u32, u32, 0),
@@ -302,12 +444,28 @@ static __kprobes void fetch_stack_address(struct pt_regs *regs,
*(unsigned long *)dest = kernel_stack_pointer(regs);
}
static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
fetch_func_t orig_fn)
{
int i;
if (type != &fetch_type_table[FETCH_TYPE_STRING])
return NULL; /* Only string type needs size function */
for (i = 0; i < FETCH_MTD_END; i++)
if (type->fetch[i] == orig_fn)
return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i];
WARN_ON(1); /* This should not happen */
return NULL;
}
/**
* Kprobe event core functions
*/
struct probe_arg {
struct fetch_param fetch;
struct fetch_param fetch_size;
unsigned int offset; /* Offset from argument entry */
const char *name; /* Name of this argument */
const char *comm; /* Command of this argument */
@@ -429,9 +587,9 @@ error:
static void free_probe_arg(struct probe_arg *arg)
{
if (CHECK_BASIC_FETCH_FUNCS(deref, arg->fetch.fn))
if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
free_deref_fetch_param(arg->fetch.data);
else if (CHECK_BASIC_FETCH_FUNCS(symbol, arg->fetch.fn))
else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
free_symbol_cache(arg->fetch.data);
kfree(arg->name);
kfree(arg->comm);
@@ -548,7 +706,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
if (strcmp(arg, "retval") == 0) {
if (is_return)
f->fn = t->retval;
f->fn = t->fetch[FETCH_MTD_retval];
else
ret = -EINVAL;
} else if (strncmp(arg, "stack", 5) == 0) {
@@ -562,7 +720,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
if (ret || param > PARAM_MAX_STACK)
ret = -EINVAL;
else {
f->fn = t->stack;
f->fn = t->fetch[FETCH_MTD_stack];
f->data = (void *)param;
}
} else
@@ -588,7 +746,7 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
case '%': /* named register */
ret = regs_query_register_offset(arg + 1);
if (ret >= 0) {
f->fn = t->reg;
f->fn = t->fetch[FETCH_MTD_reg];
f->data = (void *)(unsigned long)ret;
ret = 0;
}
@@ -598,7 +756,7 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
ret = strict_strtoul(arg + 1, 0, &param);
if (ret)
break;
f->fn = t->memory;
f->fn = t->fetch[FETCH_MTD_memory];
f->data = (void *)param;
} else {
ret = split_symbol_offset(arg + 1, &offset);
@@ -606,7 +764,7 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
break;
f->data = alloc_symbol_cache(arg + 1, offset);
if (f->data)
f->fn = t->symbol;
f->fn = t->fetch[FETCH_MTD_symbol];
}
break;
case '+': /* deref memory */
@@ -636,14 +794,17 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
if (ret)
kfree(dprm);
else {
f->fn = t->deref;
f->fn = t->fetch[FETCH_MTD_deref];
f->data = (void *)dprm;
}
}
break;
}
if (!ret && !f->fn)
if (!ret && !f->fn) { /* Parsed, but do not find fetch method */
pr_info("%s type has no corresponding fetch method.\n",
t->name);
ret = -EINVAL;
}
return ret;
}
@@ -652,6 +813,7 @@ static int parse_probe_arg(char *arg, struct trace_probe *tp,
struct probe_arg *parg, int is_return)
{
const char *t;
int ret;
if (strlen(arg) > MAX_ARGSTR_LEN) {
pr_info("Argument is too long.: %s\n", arg);
@@ -674,7 +836,13 @@ static int parse_probe_arg(char *arg, struct trace_probe *tp,
}
parg->offset = tp->size;
tp->size += parg->type->size;
return __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
if (ret >= 0) {
parg->fetch_size.fn = get_fetch_size_function(parg->type,
parg->fetch.fn);
parg->fetch_size.data = parg->fetch.data;
}
return ret;
}
/* Return 1 if name is reserved or already used by another argument */
@@ -757,14 +925,17 @@ static int create_trace_probe(int argc, char **argv)
pr_info("Delete command needs an event name.\n");
return -EINVAL;
}
mutex_lock(&probe_lock);
tp = find_probe_event(event, group);
if (!tp) {
mutex_unlock(&probe_lock);
pr_info("Event %s/%s doesn't exist.\n", group, event);
return -ENOENT;
}
/* delete an event */
unregister_trace_probe(tp);
free_trace_probe(tp);
mutex_unlock(&probe_lock);
return 0;
}
@@ -1043,6 +1214,54 @@ static const struct file_operations kprobe_profile_ops = {
.release = seq_release,
};
/* Sum up total data length for dynamic arraies (strings) */
static __kprobes int __get_data_size(struct trace_probe *tp,
struct pt_regs *regs)
{
int i, ret = 0;
u32 len;
for (i = 0; i < tp->nr_args; i++)
if (unlikely(tp->args[i].fetch_size.fn)) {
call_fetch(&tp->args[i].fetch_size, regs, &len);
ret += len;
}
return ret;
}
/* Store the value of each argument */
static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp,
struct pt_regs *regs,
u8 *data, int maxlen)
{
int i;
u32 end = tp->size;
u32 *dl; /* Data (relative) location */
for (i = 0; i < tp->nr_args; i++) {
if (unlikely(tp->args[i].fetch_size.fn)) {
/*
* First, we set the relative location and
* maximum data length to *dl
*/
dl = (u32 *)(data + tp->args[i].offset);
*dl = make_data_rloc(maxlen, end - tp->args[i].offset);
/* Then try to fetch string or dynamic array data */
call_fetch(&tp->args[i].fetch, regs, dl);
/* Reduce maximum length */
end += get_rloc_len(*dl);
maxlen -= get_rloc_len(*dl);
/* Trick here, convert data_rloc to data_loc */
*dl = convert_rloc_to_loc(*dl,
ent_size + tp->args[i].offset);
} else
/* Just fetching data normally */
call_fetch(&tp->args[i].fetch, regs,
data + tp->args[i].offset);
}
}
/* Kprobe handler */
static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
{
@@ -1050,8 +1269,7 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
struct kprobe_trace_entry_head *entry;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
u8 *data;
int size, i, pc;
int size, dsize, pc;
unsigned long irq_flags;
struct ftrace_event_call *call = &tp->call;
@@ -1060,7 +1278,8 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
local_save_flags(irq_flags);
pc = preempt_count();
size = sizeof(*entry) + tp->size;
dsize = __get_data_size(tp, regs);
size = sizeof(*entry) + tp->size + dsize;
event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
size, irq_flags, pc);
@@ -1069,9 +1288,7 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
entry = ring_buffer_event_data(event);
entry->ip = (unsigned long)kp->addr;
data = (u8 *)&entry[1];
for (i = 0; i < tp->nr_args; i++)
call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
if (!filter_current_check_discard(buffer, call, entry, event))
trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
@@ -1085,15 +1302,15 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
struct kretprobe_trace_entry_head *entry;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
u8 *data;
int size, i, pc;
int size, pc, dsize;
unsigned long irq_flags;
struct ftrace_event_call *call = &tp->call;
local_save_flags(irq_flags);
pc = preempt_count();
size = sizeof(*entry) + tp->size;
dsize = __get_data_size(tp, regs);
size = sizeof(*entry) + tp->size + dsize;
event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
size, irq_flags, pc);
@@ -1103,9 +1320,7 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
entry = ring_buffer_event_data(event);
entry->func = (unsigned long)tp->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
data = (u8 *)&entry[1];
for (i = 0; i < tp->nr_args; i++)
call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
if (!filter_current_check_discard(buffer, call, entry, event))
trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
@@ -1137,7 +1352,7 @@ print_kprobe_event(struct trace_iterator *iter, int flags,
data = (u8 *)&field[1];
for (i = 0; i < tp->nr_args; i++)
if (!tp->args[i].type->print(s, tp->args[i].name,
data + tp->args[i].offset))
data + tp->args[i].offset, field))
goto partial;
if (!trace_seq_puts(s, "\n"))
@@ -1179,7 +1394,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags,
data = (u8 *)&field[1];
for (i = 0; i < tp->nr_args; i++)
if (!tp->args[i].type->print(s, tp->args[i].name,
data + tp->args[i].offset))
data + tp->args[i].offset, field))
goto partial;
if (!trace_seq_puts(s, "\n"))
@@ -1214,11 +1429,6 @@ static void probe_event_disable(struct ftrace_event_call *call)
}
}
static int probe_event_raw_init(struct ftrace_event_call *event_call)
{
return 0;
}
#undef DEFINE_FIELD
#define DEFINE_FIELD(type, item, name, is_signed) \
do { \
@@ -1239,7 +1449,7 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
/* Set argument names as fields */
for (i = 0; i < tp->nr_args; i++) {
ret = trace_define_field(event_call, tp->args[i].type->name,
ret = trace_define_field(event_call, tp->args[i].type->fmttype,
tp->args[i].name,
sizeof(field) + tp->args[i].offset,
tp->args[i].type->size,
@@ -1261,7 +1471,7 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
/* Set argument names as fields */
for (i = 0; i < tp->nr_args; i++) {
ret = trace_define_field(event_call, tp->args[i].type->name,
ret = trace_define_field(event_call, tp->args[i].type->fmttype,
tp->args[i].name,
sizeof(field) + tp->args[i].offset,
tp->args[i].type->size,
@@ -1301,8 +1511,13 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
for (i = 0; i < tp->nr_args; i++) {
pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
tp->args[i].name);
if (strcmp(tp->args[i].type->name, "string") == 0)
pos += snprintf(buf + pos, LEN_OR_ZERO,
", __get_str(%s)",
tp->args[i].name);
else
pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
tp->args[i].name);
}
#undef LEN_OR_ZERO
@@ -1339,11 +1554,11 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
struct ftrace_event_call *call = &tp->call;
struct kprobe_trace_entry_head *entry;
struct hlist_head *head;
u8 *data;
int size, __size, i;
int size, __size, dsize;
int rctx;
__size = sizeof(*entry) + tp->size;
dsize = __get_data_size(tp, regs);
__size = sizeof(*entry) + tp->size + dsize;
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
@@ -1355,9 +1570,8 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
return;
entry->ip = (unsigned long)kp->addr;
data = (u8 *)&entry[1];
for (i = 0; i < tp->nr_args; i++)
call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
memset(&entry[1], 0, dsize);
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
head = this_cpu_ptr(call->perf_events);
perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
@@ -1371,11 +1585,11 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
struct ftrace_event_call *call = &tp->call;
struct kretprobe_trace_entry_head *entry;
struct hlist_head *head;
u8 *data;
int size, __size, i;
int size, __size, dsize;
int rctx;
__size = sizeof(*entry) + tp->size;
dsize = __get_data_size(tp, regs);
__size = sizeof(*entry) + tp->size + dsize;
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
@@ -1388,9 +1602,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
entry->func = (unsigned long)tp->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
data = (u8 *)&entry[1];
for (i = 0; i < tp->nr_args; i++)
call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
head = this_cpu_ptr(call->perf_events);
perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
@@ -1486,15 +1698,12 @@ static int register_probe_event(struct trace_probe *tp)
int ret;
/* Initialize ftrace_event_call */
INIT_LIST_HEAD(&call->class->fields);
if (probe_is_return(tp)) {
INIT_LIST_HEAD(&call->class->fields);
call->event.funcs = &kretprobe_funcs;
call->class->raw_init = probe_event_raw_init;
call->class->define_fields = kretprobe_event_define_fields;
} else {
INIT_LIST_HEAD(&call->class->fields);
call->event.funcs = &kprobe_funcs;
call->class->raw_init = probe_event_raw_init;
call->class->define_fields = kprobe_event_define_fields;
}
if (set_print_fmt(tp) < 0)

View File

@@ -1,508 +0,0 @@
/*
* trace_ksym.c - Kernel Symbol Tracer
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2009
*/
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/debugfs.h>
#include <linux/ftrace.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include "trace_output.h"
#include "trace.h"
#include <linux/hw_breakpoint.h>
#include <asm/hw_breakpoint.h>
#include <asm/atomic.h>
#define KSYM_TRACER_OP_LEN 3 /* rw- */
struct trace_ksym {
struct perf_event **ksym_hbp;
struct perf_event_attr attr;
#ifdef CONFIG_PROFILE_KSYM_TRACER
atomic64_t counter;
#endif
struct hlist_node ksym_hlist;
};
static struct trace_array *ksym_trace_array;
static unsigned int ksym_tracing_enabled;
static HLIST_HEAD(ksym_filter_head);
static DEFINE_MUTEX(ksym_tracer_mutex);
#ifdef CONFIG_PROFILE_KSYM_TRACER
#define MAX_UL_INT 0xffffffff
void ksym_collect_stats(unsigned long hbp_hit_addr)
{
struct hlist_node *node;
struct trace_ksym *entry;
rcu_read_lock();
hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
if (entry->attr.bp_addr == hbp_hit_addr) {
atomic64_inc(&entry->counter);
break;
}
}
rcu_read_unlock();
}
#endif /* CONFIG_PROFILE_KSYM_TRACER */
void ksym_hbp_handler(struct perf_event *hbp, int nmi,
struct perf_sample_data *data,
struct pt_regs *regs)
{
struct ring_buffer_event *event;
struct ksym_trace_entry *entry;
struct ring_buffer *buffer;
int pc;
if (!ksym_tracing_enabled)
return;
buffer = ksym_trace_array->buffer;
pc = preempt_count();
event = trace_buffer_lock_reserve(buffer, TRACE_KSYM,
sizeof(*entry), 0, pc);
if (!event)
return;
entry = ring_buffer_event_data(event);
entry->ip = instruction_pointer(regs);
entry->type = hw_breakpoint_type(hbp);
entry->addr = hw_breakpoint_addr(hbp);
strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
#ifdef CONFIG_PROFILE_KSYM_TRACER
ksym_collect_stats(hw_breakpoint_addr(hbp));
#endif /* CONFIG_PROFILE_KSYM_TRACER */
trace_buffer_unlock_commit(buffer, event, 0, pc);
}
/* Valid access types are represented as
*
* rw- : Set Read/Write Access Breakpoint
* -w- : Set Write Access Breakpoint
* --- : Clear Breakpoints
* --x : Set Execution Break points (Not available yet)
*
*/
static int ksym_trace_get_access_type(char *str)
{
int access = 0;
if (str[0] == 'r')
access |= HW_BREAKPOINT_R;
if (str[1] == 'w')
access |= HW_BREAKPOINT_W;
if (str[2] == 'x')
access |= HW_BREAKPOINT_X;
switch (access) {
case HW_BREAKPOINT_R:
case HW_BREAKPOINT_W:
case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
return access;
default:
return -EINVAL;
}
}
/*
* There can be several possible malformed requests and we attempt to capture
* all of them. We enumerate some of the rules
* 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
* i.e. multiple ':' symbols disallowed. Possible uses are of the form
* <module>:<ksym_name>:<op>.
* 2. No delimiter symbol ':' in the input string
* 3. Spurious operator symbols or symbols not in their respective positions
* 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
* 5. Kernel symbol not a part of /proc/kallsyms
* 6. Duplicate requests
*/
static int parse_ksym_trace_str(char *input_string, char **ksymname,
unsigned long *addr)
{
int ret;
*ksymname = strsep(&input_string, ":");
*addr = kallsyms_lookup_name(*ksymname);
/* Check for malformed request: (2), (1) and (5) */
if ((!input_string) ||
(strlen(input_string) != KSYM_TRACER_OP_LEN) ||
(*addr == 0))
return -EINVAL;;
ret = ksym_trace_get_access_type(input_string);
return ret;
}
int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
{
struct trace_ksym *entry;
int ret = -ENOMEM;
entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
if (!entry)
return -ENOMEM;
hw_breakpoint_init(&entry->attr);
entry->attr.bp_type = op;
entry->attr.bp_addr = addr;
entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
ksym_hbp_handler);
if (IS_ERR(entry->ksym_hbp)) {
ret = PTR_ERR(entry->ksym_hbp);
if (ret == -ENOSPC) {
printk(KERN_ERR "ksym_tracer: Maximum limit reached."
" No new requests for tracing can be accepted now.\n");
} else {
printk(KERN_INFO "ksym_tracer request failed. Try again"
" later!!\n");
}
goto err;
}
hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
return 0;
err:
kfree(entry);
return ret;
}
static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
size_t count, loff_t *ppos)
{
struct trace_ksym *entry;
struct hlist_node *node;
struct trace_seq *s;
ssize_t cnt = 0;
int ret;
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s)
return -ENOMEM;
trace_seq_init(s);
mutex_lock(&ksym_tracer_mutex);
hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
ret = trace_seq_printf(s, "%pS:",
(void *)(unsigned long)entry->attr.bp_addr);
if (entry->attr.bp_type == HW_BREAKPOINT_R)
ret = trace_seq_puts(s, "r--\n");
else if (entry->attr.bp_type == HW_BREAKPOINT_W)
ret = trace_seq_puts(s, "-w-\n");
else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
ret = trace_seq_puts(s, "rw-\n");
WARN_ON_ONCE(!ret);
}
cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
mutex_unlock(&ksym_tracer_mutex);
kfree(s);
return cnt;
}
static void __ksym_trace_reset(void)
{
struct trace_ksym *entry;
struct hlist_node *node, *node1;
mutex_lock(&ksym_tracer_mutex);
hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
ksym_hlist) {
unregister_wide_hw_breakpoint(entry->ksym_hbp);
hlist_del_rcu(&(entry->ksym_hlist));
synchronize_rcu();
kfree(entry);
}
mutex_unlock(&ksym_tracer_mutex);
}
static ssize_t ksym_trace_filter_write(struct file *file,
const char __user *buffer,
size_t count, loff_t *ppos)
{
struct trace_ksym *entry;
struct hlist_node *node;
char *buf, *input_string, *ksymname = NULL;
unsigned long ksym_addr = 0;
int ret, op, changed = 0;
buf = kzalloc(count + 1, GFP_KERNEL);
if (!buf)
return -ENOMEM;
ret = -EFAULT;
if (copy_from_user(buf, buffer, count))
goto out;
buf[count] = '\0';
input_string = strstrip(buf);
/*
* Clear all breakpoints if:
* 1: echo > ksym_trace_filter
* 2: echo 0 > ksym_trace_filter
* 3: echo "*:---" > ksym_trace_filter
*/
if (!input_string[0] || !strcmp(input_string, "0") ||
!strcmp(input_string, "*:---")) {
__ksym_trace_reset();
ret = 0;
goto out;
}
ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
if (ret < 0)
goto out;
mutex_lock(&ksym_tracer_mutex);
ret = -EINVAL;
hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
if (entry->attr.bp_addr == ksym_addr) {
/* Check for malformed request: (6) */
if (entry->attr.bp_type != op)
changed = 1;
else
goto out_unlock;
break;
}
}
if (changed) {
unregister_wide_hw_breakpoint(entry->ksym_hbp);
entry->attr.bp_type = op;
ret = 0;
if (op > 0) {
entry->ksym_hbp =
register_wide_hw_breakpoint(&entry->attr,
ksym_hbp_handler);
if (IS_ERR(entry->ksym_hbp))
ret = PTR_ERR(entry->ksym_hbp);
else
goto out_unlock;
}
/* Error or "symbol:---" case: drop it */
hlist_del_rcu(&(entry->ksym_hlist));
synchronize_rcu();
kfree(entry);
goto out_unlock;
} else {
/* Check for malformed request: (4) */
if (op)
ret = process_new_ksym_entry(ksymname, op, ksym_addr);
}
out_unlock:
mutex_unlock(&ksym_tracer_mutex);
out:
kfree(buf);
return !ret ? count : ret;
}
static const struct file_operations ksym_tracing_fops = {
.open = tracing_open_generic,
.read = ksym_trace_filter_read,
.write = ksym_trace_filter_write,
};
static void ksym_trace_reset(struct trace_array *tr)
{
ksym_tracing_enabled = 0;
__ksym_trace_reset();
}
static int ksym_trace_init(struct trace_array *tr)
{
int cpu, ret = 0;
for_each_online_cpu(cpu)
tracing_reset(tr, cpu);
ksym_tracing_enabled = 1;
ksym_trace_array = tr;
return ret;
}
static void ksym_trace_print_header(struct seq_file *m)
{
seq_puts(m,
"# TASK-PID CPU# Symbol "
"Type Function\n");
seq_puts(m,
"# | | | "
" | |\n");
}
static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
{
struct trace_entry *entry = iter->ent;
struct trace_seq *s = &iter->seq;
struct ksym_trace_entry *field;
char str[KSYM_SYMBOL_LEN];
int ret;
if (entry->type != TRACE_KSYM)
return TRACE_TYPE_UNHANDLED;
trace_assign_type(field, entry);
ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
entry->pid, iter->cpu, (char *)field->addr);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
switch (field->type) {
case HW_BREAKPOINT_R:
ret = trace_seq_printf(s, " R ");
break;
case HW_BREAKPOINT_W:
ret = trace_seq_printf(s, " W ");
break;
case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
ret = trace_seq_printf(s, " RW ");
break;
default:
return TRACE_TYPE_PARTIAL_LINE;
}
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
sprint_symbol(str, field->ip);
ret = trace_seq_printf(s, "%s\n", str);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
return TRACE_TYPE_HANDLED;
}
struct tracer ksym_tracer __read_mostly =
{
.name = "ksym_tracer",
.init = ksym_trace_init,
.reset = ksym_trace_reset,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_ksym,
#endif
.print_header = ksym_trace_print_header,
.print_line = ksym_trace_output
};
#ifdef CONFIG_PROFILE_KSYM_TRACER
static int ksym_profile_show(struct seq_file *m, void *v)
{
struct hlist_node *node;
struct trace_ksym *entry;
int access_type = 0;
char fn_name[KSYM_NAME_LEN];
seq_puts(m, " Access Type ");
seq_puts(m, " Symbol Counter\n");
seq_puts(m, " ----------- ");
seq_puts(m, " ------ -------\n");
rcu_read_lock();
hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
access_type = entry->attr.bp_type;
switch (access_type) {
case HW_BREAKPOINT_R:
seq_puts(m, " R ");
break;
case HW_BREAKPOINT_W:
seq_puts(m, " W ");
break;
case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
seq_puts(m, " RW ");
break;
default:
seq_puts(m, " NA ");
}
if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
seq_printf(m, " %-36s", fn_name);
else
seq_printf(m, " %-36s", "<NA>");
seq_printf(m, " %15llu\n",
(unsigned long long)atomic64_read(&entry->counter));
}
rcu_read_unlock();
return 0;
}
static int ksym_profile_open(struct inode *node, struct file *file)
{
return single_open(file, ksym_profile_show, NULL);
}
static const struct file_operations ksym_profile_fops = {
.open = ksym_profile_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
#endif /* CONFIG_PROFILE_KSYM_TRACER */
__init static int init_ksym_trace(void)
{
struct dentry *d_tracer;
d_tracer = tracing_init_dentry();
trace_create_file("ksym_trace_filter", 0644, d_tracer,
NULL, &ksym_tracing_fops);
#ifdef CONFIG_PROFILE_KSYM_TRACER
trace_create_file("ksym_profile", 0444, d_tracer,
NULL, &ksym_profile_fops);
#endif
return register_tracer(&ksym_tracer);
}
device_initcall(init_ksym_trace);

View File

@@ -16,9 +16,6 @@
DECLARE_RWSEM(trace_event_mutex);
DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq);
EXPORT_PER_CPU_SYMBOL(ftrace_event_seq);
static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
static int next_event_type = __TRACE_LAST_TYPE + 1;
@@ -1069,65 +1066,6 @@ static struct trace_event trace_wake_event = {
.funcs = &trace_wake_funcs,
};
/* TRACE_SPECIAL */
static enum print_line_t trace_special_print(struct trace_iterator *iter,
int flags, struct trace_event *event)
{
struct special_entry *field;
trace_assign_type(field, iter->ent);
if (!trace_seq_printf(&iter->seq, "# %ld %ld %ld\n",
field->arg1,
field->arg2,
field->arg3))
return TRACE_TYPE_PARTIAL_LINE;
return TRACE_TYPE_HANDLED;
}
static enum print_line_t trace_special_hex(struct trace_iterator *iter,
int flags, struct trace_event *event)
{
struct special_entry *field;
struct trace_seq *s = &iter->seq;
trace_assign_type(field, iter->ent);
SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
return TRACE_TYPE_HANDLED;
}
static enum print_line_t trace_special_bin(struct trace_iterator *iter,
int flags, struct trace_event *event)
{
struct special_entry *field;
struct trace_seq *s = &iter->seq;
trace_assign_type(field, iter->ent);
SEQ_PUT_FIELD_RET(s, field->arg1);
SEQ_PUT_FIELD_RET(s, field->arg2);
SEQ_PUT_FIELD_RET(s, field->arg3);
return TRACE_TYPE_HANDLED;
}
static struct trace_event_functions trace_special_funcs = {
.trace = trace_special_print,
.raw = trace_special_print,
.hex = trace_special_hex,
.binary = trace_special_bin,
};
static struct trace_event trace_special_event = {
.type = TRACE_SPECIAL,
.funcs = &trace_special_funcs,
};
/* TRACE_STACK */
static enum print_line_t trace_stack_print(struct trace_iterator *iter,
@@ -1161,9 +1099,6 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
static struct trace_event_functions trace_stack_funcs = {
.trace = trace_stack_print,
.raw = trace_special_print,
.hex = trace_special_hex,
.binary = trace_special_bin,
};
static struct trace_event trace_stack_event = {
@@ -1194,9 +1129,6 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
static struct trace_event_functions trace_user_stack_funcs = {
.trace = trace_user_stack_print,
.raw = trace_special_print,
.hex = trace_special_hex,
.binary = trace_special_bin,
};
static struct trace_event trace_user_stack_event = {
@@ -1314,7 +1246,6 @@ static struct trace_event *events[] __initdata = {
&trace_fn_event,
&trace_ctx_event,
&trace_wake_event,
&trace_special_event,
&trace_stack_event,
&trace_user_stack_event,
&trace_bprint_event,

View File

@@ -46,7 +46,6 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
int resched;
int cpu;
int pc;
@@ -54,7 +53,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
return;
pc = preempt_count();
resched = ftrace_preempt_disable();
preempt_disable_notrace();
cpu = raw_smp_processor_id();
if (cpu != wakeup_current_cpu)
@@ -74,7 +73,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
out:
atomic_dec(&data->disabled);
out_enable:
ftrace_preempt_enable(resched);
preempt_enable_notrace();
}
static struct ftrace_ops trace_ops __read_mostly =
@@ -383,6 +382,7 @@ static struct tracer wakeup_tracer __read_mostly =
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_wakeup,
#endif
.use_max_tr = 1,
};
static struct tracer wakeup_rt_tracer __read_mostly =
@@ -397,6 +397,7 @@ static struct tracer wakeup_rt_tracer __read_mostly =
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_wakeup,
#endif
.use_max_tr = 1,
};
__init static int init_wakeup_tracer(void)

View File

@@ -13,11 +13,9 @@ static inline int trace_valid_entry(struct trace_entry *entry)
case TRACE_WAKE:
case TRACE_STACK:
case TRACE_PRINT:
case TRACE_SPECIAL:
case TRACE_BRANCH:
case TRACE_GRAPH_ENT:
case TRACE_GRAPH_RET:
case TRACE_KSYM:
return 1;
}
return 0;
@@ -691,38 +689,6 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
}
#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
#ifdef CONFIG_SYSPROF_TRACER
int
trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
{
unsigned long count;
int ret;
/* start the tracing */
ret = tracer_init(trace, tr);
if (ret) {
warn_failed_init_tracer(trace, ret);
return ret;
}
/* Sleep for a 1/10 of a second */
msleep(100);
/* stop the tracing. */
tracing_stop();
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
tracing_start();
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
ret = -1;
}
return ret;
}
#endif /* CONFIG_SYSPROF_TRACER */
#ifdef CONFIG_BRANCH_TRACER
int
trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
@@ -755,56 +721,3 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
}
#endif /* CONFIG_BRANCH_TRACER */
#ifdef CONFIG_KSYM_TRACER
static int ksym_selftest_dummy;
int
trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
{
unsigned long count;
int ret;
/* start the tracing */
ret = tracer_init(trace, tr);
if (ret) {
warn_failed_init_tracer(trace, ret);
return ret;
}
ksym_selftest_dummy = 0;
/* Register the read-write tracing request */
ret = process_new_ksym_entry("ksym_selftest_dummy",
HW_BREAKPOINT_R | HW_BREAKPOINT_W,
(unsigned long)(&ksym_selftest_dummy));
if (ret < 0) {
printk(KERN_CONT "ksym_trace read-write startup test failed\n");
goto ret_path;
}
/* Perform a read and a write operation over the dummy variable to
* trigger the tracer
*/
if (ksym_selftest_dummy == 0)
ksym_selftest_dummy++;
/* stop the tracing. */
tracing_stop();
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
tracing_start();
/* read & write operations - one each is performed on the dummy variable
* triggering two entries in the trace buffer
*/
if (!ret && count != 2) {
printk(KERN_CONT "Ksym tracer startup test failed");
ret = -1;
}
ret_path:
return ret;
}
#endif /* CONFIG_KSYM_TRACER */

View File

@@ -110,12 +110,12 @@ static inline void check_stack(void)
static void
stack_trace_call(unsigned long ip, unsigned long parent_ip)
{
int cpu, resched;
int cpu;
if (unlikely(!ftrace_enabled || stack_trace_disabled))
return;
resched = ftrace_preempt_disable();
preempt_disable_notrace();
cpu = raw_smp_processor_id();
/* no atomic needed, we only modify this variable by this cpu */
@@ -127,7 +127,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
out:
per_cpu(trace_active, cpu)--;
/* prevent recursion in schedule */
ftrace_preempt_enable(resched);
preempt_enable_notrace();
}
static struct ftrace_ops trace_ops __read_mostly =

View File

@@ -23,6 +23,9 @@ static int syscall_exit_register(struct ftrace_event_call *event,
static int syscall_enter_define_fields(struct ftrace_event_call *call);
static int syscall_exit_define_fields(struct ftrace_event_call *call);
/* All syscall exit events have the same fields */
static LIST_HEAD(syscall_exit_fields);
static struct list_head *
syscall_get_enter_fields(struct ftrace_event_call *call)
{
@@ -34,9 +37,7 @@ syscall_get_enter_fields(struct ftrace_event_call *call)
static struct list_head *
syscall_get_exit_fields(struct ftrace_event_call *call)
{
struct syscall_metadata *entry = call->data;
return &entry->exit_fields;
return &syscall_exit_fields;
}
struct trace_event_functions enter_syscall_print_funcs = {

View File

@@ -1,329 +0,0 @@
/*
* trace stack traces
*
* Copyright (C) 2004-2008, Soeren Sandmann
* Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
* Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
*/
#include <linux/kallsyms.h>
#include <linux/debugfs.h>
#include <linux/hrtimer.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/module.h>
#include <linux/irq.h>
#include <linux/fs.h>
#include <asm/stacktrace.h>
#include "trace.h"
static struct trace_array *sysprof_trace;
static int __read_mostly tracer_enabled;
/*
* 1 msec sample interval by default:
*/
static unsigned long sample_period = 1000000;
static const unsigned int sample_max_depth = 512;
static DEFINE_MUTEX(sample_timer_lock);
/*
* Per CPU hrtimers that do the profiling:
*/
static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer);
struct stack_frame {
const void __user *next_fp;
unsigned long return_address;
};
static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
{
int ret;
if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
return 0;
ret = 1;
pagefault_disable();
if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
ret = 0;
pagefault_enable();
return ret;
}
struct backtrace_info {
struct trace_array_cpu *data;
struct trace_array *tr;
int pos;
};
static void
backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
{
/* Ignore warnings */
}
static void backtrace_warning(void *data, char *msg)
{
/* Ignore warnings */
}
static int backtrace_stack(void *data, char *name)
{
/* Don't bother with IRQ stacks for now */
return -1;
}
static void backtrace_address(void *data, unsigned long addr, int reliable)
{
struct backtrace_info *info = data;
if (info->pos < sample_max_depth && reliable) {
__trace_special(info->tr, info->data, 1, addr, 0);
info->pos++;
}
}
static const struct stacktrace_ops backtrace_ops = {
.warning = backtrace_warning,
.warning_symbol = backtrace_warning_symbol,
.stack = backtrace_stack,
.address = backtrace_address,
.walk_stack = print_context_stack,
};
static int
trace_kernel(struct pt_regs *regs, struct trace_array *tr,
struct trace_array_cpu *data)
{
struct backtrace_info info;
unsigned long bp;
char *stack;
info.tr = tr;
info.data = data;
info.pos = 1;
__trace_special(info.tr, info.data, 1, regs->ip, 0);
stack = ((char *)regs + sizeof(struct pt_regs));
#ifdef CONFIG_FRAME_POINTER
bp = regs->bp;
#else
bp = 0;
#endif
dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info);
return info.pos;
}
static void timer_notify(struct pt_regs *regs, int cpu)
{
struct trace_array_cpu *data;
struct stack_frame frame;
struct trace_array *tr;
const void __user *fp;
int is_user;
int i;
if (!regs)
return;
tr = sysprof_trace;
data = tr->data[cpu];
is_user = user_mode(regs);
if (!current || current->pid == 0)
return;
if (is_user && current->state != TASK_RUNNING)
return;
__trace_special(tr, data, 0, 0, current->pid);
if (!is_user)
i = trace_kernel(regs, tr, data);
else
i = 0;
/*
* Trace user stack if we are not a kernel thread
*/
if (current->mm && i < sample_max_depth) {
regs = (struct pt_regs *)current->thread.sp0 - 1;
fp = (void __user *)regs->bp;
__trace_special(tr, data, 2, regs->ip, 0);
while (i < sample_max_depth) {
frame.next_fp = NULL;
frame.return_address = 0;
if (!copy_stack_frame(fp, &frame))
break;
if ((unsigned long)fp < regs->sp)
break;
__trace_special(tr, data, 2, frame.return_address,
(unsigned long)fp);
fp = frame.next_fp;
i++;
}
}
/*
* Special trace entry if we overflow the max depth:
*/
if (i == sample_max_depth)
__trace_special(tr, data, -1, -1, -1);
__trace_special(tr, data, 3, current->pid, i);
}
static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer)
{
/* trace here */
timer_notify(get_irq_regs(), smp_processor_id());
hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
return HRTIMER_RESTART;
}
static void start_stack_timer(void *unused)
{
struct hrtimer *hrtimer = &__get_cpu_var(stack_trace_hrtimer);
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = stack_trace_timer_fn;
hrtimer_start(hrtimer, ns_to_ktime(sample_period),
HRTIMER_MODE_REL_PINNED);
}
static void start_stack_timers(void)
{
on_each_cpu(start_stack_timer, NULL, 1);
}
static void stop_stack_timer(int cpu)
{
struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
hrtimer_cancel(hrtimer);
}
static void stop_stack_timers(void)
{
int cpu;
for_each_online_cpu(cpu)
stop_stack_timer(cpu);
}
static void stop_stack_trace(struct trace_array *tr)
{
mutex_lock(&sample_timer_lock);
stop_stack_timers();
tracer_enabled = 0;
mutex_unlock(&sample_timer_lock);
}
static int stack_trace_init(struct trace_array *tr)
{
sysprof_trace = tr;
tracing_start_cmdline_record();
mutex_lock(&sample_timer_lock);
start_stack_timers();
tracer_enabled = 1;
mutex_unlock(&sample_timer_lock);
return 0;
}
static void stack_trace_reset(struct trace_array *tr)
{
tracing_stop_cmdline_record();
stop_stack_trace(tr);
}
static struct tracer stack_trace __read_mostly =
{
.name = "sysprof",
.init = stack_trace_init,
.reset = stack_trace_reset,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_sysprof,
#endif
};
__init static int init_stack_trace(void)
{
return register_tracer(&stack_trace);
}
device_initcall(init_stack_trace);
#define MAX_LONG_DIGITS 22
static ssize_t
sysprof_sample_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char buf[MAX_LONG_DIGITS];
int r;
r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period));
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
static ssize_t
sysprof_sample_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char buf[MAX_LONG_DIGITS];
unsigned long val;
if (cnt > MAX_LONG_DIGITS-1)
cnt = MAX_LONG_DIGITS-1;
if (copy_from_user(&buf, ubuf, cnt))
return -EFAULT;
buf[cnt] = 0;
val = simple_strtoul(buf, NULL, 10);
/*
* Enforce a minimum sample period of 100 usecs:
*/
if (val < 100)
val = 100;
mutex_lock(&sample_timer_lock);
stop_stack_timers();
sample_period = val * 1000;
start_stack_timers();
mutex_unlock(&sample_timer_lock);
return cnt;
}
static const struct file_operations sysprof_sample_fops = {
.read = sysprof_sample_read,
.write = sysprof_sample_write,
};
void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
{
trace_create_file("sysprof_sample_period", 0644,
d_tracer, NULL, &sysprof_sample_fops);
}

567
kernel/watchdog.c Normal file
View File

@@ -0,0 +1,567 @@
/*
* Detect hard and soft lockups on a system
*
* started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
*
* this code detects hard lockups: incidents in where on a CPU
* the kernel does not respond to anything except NMI.
*
* Note: Most of this code is borrowed heavily from softlockup.c,
* so thanks to Ingo for the initial implementation.
* Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks
* to those contributors as well.
*/
#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/nmi.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/lockdep.h>
#include <linux/notifier.h>
#include <linux/module.h>
#include <linux/sysctl.h>
#include <asm/irq_regs.h>
#include <linux/perf_event.h>
int watchdog_enabled;
int __read_mostly softlockup_thresh = 60;
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
static DEFINE_PER_CPU(bool, softlockup_touch_sync);
static DEFINE_PER_CPU(bool, soft_watchdog_warn);
#ifdef CONFIG_HARDLOCKUP_DETECTOR
static DEFINE_PER_CPU(bool, hard_watchdog_warn);
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
#endif
static int __read_mostly did_panic;
static int __initdata no_watchdog;
/* boot commands */
/*
* Should we panic when a soft-lockup or hard-lockup occurs:
*/
#ifdef CONFIG_HARDLOCKUP_DETECTOR
static int hardlockup_panic;
static int __init hardlockup_panic_setup(char *str)
{
if (!strncmp(str, "panic", 5))
hardlockup_panic = 1;
return 1;
}
__setup("nmi_watchdog=", hardlockup_panic_setup);
#endif
unsigned int __read_mostly softlockup_panic =
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
static int __init softlockup_panic_setup(char *str)
{
softlockup_panic = simple_strtoul(str, NULL, 0);
return 1;
}
__setup("softlockup_panic=", softlockup_panic_setup);
static int __init nowatchdog_setup(char *str)
{
no_watchdog = 1;
return 1;
}
__setup("nowatchdog", nowatchdog_setup);
/* deprecated */
static int __init nosoftlockup_setup(char *str)
{
no_watchdog = 1;
return 1;
}
__setup("nosoftlockup", nosoftlockup_setup);
/* */
/*
* Returns seconds, approximately. We don't need nanosecond
* resolution, and we don't need to waste time with a big divide when
* 2^30ns == 1.074s.
*/
static unsigned long get_timestamp(int this_cpu)
{
return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */
}
static unsigned long get_sample_period(void)
{
/*
* convert softlockup_thresh from seconds to ns
* the divide by 5 is to give hrtimer 5 chances to
* increment before the hardlockup detector generates
* a warning
*/
return softlockup_thresh / 5 * NSEC_PER_SEC;
}
/* Commands for resetting the watchdog */
static void __touch_watchdog(void)
{
int this_cpu = smp_processor_id();
__get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu);
}
void touch_softlockup_watchdog(void)
{
__get_cpu_var(watchdog_touch_ts) = 0;
}
EXPORT_SYMBOL(touch_softlockup_watchdog);
void touch_all_softlockup_watchdogs(void)
{
int cpu;
/*
* this is done lockless
* do we care if a 0 races with a timestamp?
* all it means is the softlock check starts one cycle later
*/
for_each_online_cpu(cpu)
per_cpu(watchdog_touch_ts, cpu) = 0;
}
#ifdef CONFIG_HARDLOCKUP_DETECTOR
void touch_nmi_watchdog(void)
{
__get_cpu_var(watchdog_nmi_touch) = true;
touch_softlockup_watchdog();
}
EXPORT_SYMBOL(touch_nmi_watchdog);
#endif
void touch_softlockup_watchdog_sync(void)
{
__raw_get_cpu_var(softlockup_touch_sync) = true;
__raw_get_cpu_var(watchdog_touch_ts) = 0;
}
#ifdef CONFIG_HARDLOCKUP_DETECTOR
/* watchdog detector functions */
static int is_hardlockup(void)
{
unsigned long hrint = __get_cpu_var(hrtimer_interrupts);
if (__get_cpu_var(hrtimer_interrupts_saved) == hrint)
return 1;
__get_cpu_var(hrtimer_interrupts_saved) = hrint;
return 0;
}
#endif
static int is_softlockup(unsigned long touch_ts)
{
unsigned long now = get_timestamp(smp_processor_id());
/* Warn about unreasonable delays: */
if (time_after(now, touch_ts + softlockup_thresh))
return now - touch_ts;
return 0;
}
static int
watchdog_panic(struct notifier_block *this, unsigned long event, void *ptr)
{
did_panic = 1;
return NOTIFY_DONE;
}
static struct notifier_block panic_block = {
.notifier_call = watchdog_panic,
};
#ifdef CONFIG_HARDLOCKUP_DETECTOR
static struct perf_event_attr wd_hw_attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
.size = sizeof(struct perf_event_attr),
.pinned = 1,
.disabled = 1,
};
/* Callback function for perf event subsystem */
void watchdog_overflow_callback(struct perf_event *event, int nmi,
struct perf_sample_data *data,
struct pt_regs *regs)
{
if (__get_cpu_var(watchdog_nmi_touch) == true) {
__get_cpu_var(watchdog_nmi_touch) = false;
return;
}
/* check for a hardlockup
* This is done by making sure our timer interrupt
* is incrementing. The timer interrupt should have
* fired multiple times before we overflow'd. If it hasn't
* then this is a good indication the cpu is stuck
*/
if (is_hardlockup()) {
int this_cpu = smp_processor_id();
/* only print hardlockups once */
if (__get_cpu_var(hard_watchdog_warn) == true)
return;
if (hardlockup_panic)
panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
else
WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
__get_cpu_var(hard_watchdog_warn) = true;
return;
}
__get_cpu_var(hard_watchdog_warn) = false;
return;
}
static void watchdog_interrupt_count(void)
{
__get_cpu_var(hrtimer_interrupts)++;
}
#else
static inline void watchdog_interrupt_count(void) { return; }
#endif /* CONFIG_HARDLOCKUP_DETECTOR */
/* watchdog kicker functions */
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts);
struct pt_regs *regs = get_irq_regs();
int duration;
/* kick the hardlockup detector */
watchdog_interrupt_count();
/* kick the softlockup detector */
wake_up_process(__get_cpu_var(softlockup_watchdog));
/* .. and repeat */
hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
if (touch_ts == 0) {
if (unlikely(__get_cpu_var(softlockup_touch_sync))) {
/*
* If the time stamp was touched atomically
* make sure the scheduler tick is up to date.
*/
__get_cpu_var(softlockup_touch_sync) = false;
sched_clock_tick();
}
__touch_watchdog();
return HRTIMER_RESTART;
}
/* check for a softlockup
* This is done by making sure a high priority task is
* being scheduled. The task touches the watchdog to
* indicate it is getting cpu time. If it hasn't then
* this is a good indication some task is hogging the cpu
*/
duration = is_softlockup(touch_ts);
if (unlikely(duration)) {
/* only warn once */
if (__get_cpu_var(soft_watchdog_warn) == true)
return HRTIMER_RESTART;
printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
smp_processor_id(), duration,
current->comm, task_pid_nr(current));
print_modules();
print_irqtrace_events(current);
if (regs)
show_regs(regs);
else
dump_stack();
if (softlockup_panic)
panic("softlockup: hung tasks");
__get_cpu_var(soft_watchdog_warn) = true;
} else
__get_cpu_var(soft_watchdog_warn) = false;
return HRTIMER_RESTART;
}
/*
* The watchdog thread - touches the timestamp.
*/
static int watchdog(void *unused)
{
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
sched_setscheduler(current, SCHED_FIFO, &param);
/* initialize timestamp */
__touch_watchdog();
/* kick off the timer for the hardlockup detector */
/* done here because hrtimer_start can only pin to smp_processor_id() */
hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()),
HRTIMER_MODE_REL_PINNED);
set_current_state(TASK_INTERRUPTIBLE);
/*
* Run briefly once per second to reset the softlockup timestamp.
* If this gets delayed for more than 60 seconds then the
* debug-printout triggers in watchdog_timer_fn().
*/
while (!kthread_should_stop()) {
__touch_watchdog();
schedule();
if (kthread_should_stop())
break;
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
return 0;
}
#ifdef CONFIG_HARDLOCKUP_DETECTOR
static int watchdog_nmi_enable(int cpu)
{
struct perf_event_attr *wd_attr;
struct perf_event *event = per_cpu(watchdog_ev, cpu);
/* is it already setup and enabled? */
if (event && event->state > PERF_EVENT_STATE_OFF)
goto out;
/* it is setup but not enabled */
if (event != NULL)
goto out_enable;
/* Try to register using hardware perf events */
wd_attr = &wd_hw_attr;
wd_attr->sample_period = hw_nmi_get_sample_period();
event = perf_event_create_kernel_counter(wd_attr, cpu, -1, watchdog_overflow_callback);
if (!IS_ERR(event)) {
printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
goto out_save;
}
printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event);
return -1;
/* success path */
out_save:
per_cpu(watchdog_ev, cpu) = event;
out_enable:
perf_event_enable(per_cpu(watchdog_ev, cpu));
out:
return 0;
}
static void watchdog_nmi_disable(int cpu)
{
struct perf_event *event = per_cpu(watchdog_ev, cpu);
if (event) {
perf_event_disable(event);
per_cpu(watchdog_ev, cpu) = NULL;
/* should be in cleanup, but blocks oprofile */
perf_event_release_kernel(event);
}
return;
}
#else
static int watchdog_nmi_enable(int cpu) { return 0; }
static void watchdog_nmi_disable(int cpu) { return; }
#endif /* CONFIG_HARDLOCKUP_DETECTOR */
/* prepare/enable/disable routines */
static int watchdog_prepare_cpu(int cpu)
{
struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
WARN_ON(per_cpu(softlockup_watchdog, cpu));
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = watchdog_timer_fn;
return 0;
}
static int watchdog_enable(int cpu)
{
struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
/* enable the perf event */
if (watchdog_nmi_enable(cpu) != 0)
return -1;
/* create the watchdog thread */
if (!p) {
p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu);
if (IS_ERR(p)) {
printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
return -1;
}
kthread_bind(p, cpu);
per_cpu(watchdog_touch_ts, cpu) = 0;
per_cpu(softlockup_watchdog, cpu) = p;
wake_up_process(p);
}
return 0;
}
static void watchdog_disable(int cpu)
{
struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
/*
* cancel the timer first to stop incrementing the stats
* and waking up the kthread
*/
hrtimer_cancel(hrtimer);
/* disable the perf event */
watchdog_nmi_disable(cpu);
/* stop the watchdog thread */
if (p) {
per_cpu(softlockup_watchdog, cpu) = NULL;
kthread_stop(p);
}
/* if any cpu succeeds, watchdog is considered enabled for the system */
watchdog_enabled = 1;
}
static void watchdog_enable_all_cpus(void)
{
int cpu;
int result = 0;
for_each_online_cpu(cpu)
result += watchdog_enable(cpu);
if (result)
printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n");
}
static void watchdog_disable_all_cpus(void)
{
int cpu;
for_each_online_cpu(cpu)
watchdog_disable(cpu);
/* if all watchdogs are disabled, then they are disabled for the system */
watchdog_enabled = 0;
}
/* sysctl functions */
#ifdef CONFIG_SYSCTL
/*
* proc handler for /proc/sys/kernel/nmi_watchdog
*/
int proc_dowatchdog_enabled(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
proc_dointvec(table, write, buffer, length, ppos);
if (watchdog_enabled)
watchdog_enable_all_cpus();
else
watchdog_disable_all_cpus();
return 0;
}
int proc_dowatchdog_thresh(struct ctl_table *table, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
}
#endif /* CONFIG_SYSCTL */
/*
* Create/destroy watchdog threads as CPUs come and go:
*/
static int __cpuinit
cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
int hotcpu = (unsigned long)hcpu;
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
if (watchdog_prepare_cpu(hotcpu))
return NOTIFY_BAD;
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
if (watchdog_enable(hotcpu))
return NOTIFY_BAD;
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
watchdog_disable(hotcpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
watchdog_disable(hotcpu);
break;
#endif /* CONFIG_HOTPLUG_CPU */
}
return NOTIFY_OK;
}
static struct notifier_block __cpuinitdata cpu_nfb = {
.notifier_call = cpu_callback
};
static int __init spawn_watchdog_task(void)
{
void *cpu = (void *)(long)smp_processor_id();
int err;
if (no_watchdog)
return 0;
err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
WARN_ON(err == NOTIFY_BAD);
cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
register_cpu_notifier(&cpu_nfb);
atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
return 0;
}
early_initcall(spawn_watchdog_task);