Merge branch 'perf/core' into perf/urgent

Merge reason: Switch from pre-merge topical split to the post-merge urgent track

Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Ingo Molnar
2010-03-04 11:47:50 +01:00
599 changed files with 13756 additions and 5933 deletions

View File

@@ -530,8 +530,25 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
return -EINVAL;
WARN_ON(!atomic_read(&pi_state->refcount));
WARN_ON(pid && pi_state->owner &&
pi_state->owner->pid != pid);
/*
* When pi_state->owner is NULL then the owner died
* and another waiter is on the fly. pi_state->owner
* is fixed up by the task which acquires
* pi_state->rt_mutex.
*
* We do not check for pid == 0 which can happen when
* the owner died and robust_list_exit() cleared the
* TID.
*/
if (pid && pi_state->owner) {
/*
* Bail out if user space manipulated the
* futex value.
*/
if (pid != task_pid_vnr(pi_state->owner))
return -EINVAL;
}
atomic_inc(&pi_state->refcount);
*ps = pi_state;
@@ -758,6 +775,13 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
if (!pi_state)
return -EINVAL;
/*
* If current does not own the pi_state then the futex is
* inconsistent and user space fiddled with the futex value.
*/
if (pi_state->owner != current)
return -EINVAL;
raw_spin_lock(&pi_state->pi_mutex.wait_lock);
new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
@@ -1971,7 +1995,7 @@ retry_private:
/* Unqueue and drop the lock */
unqueue_me_pi(&q);
goto out;
goto out_put_key;
out_unlock_put_key:
queue_unlock(&q, hb);

View File

@@ -413,17 +413,17 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
*
* @return a set of per_cpu pointers to perf events
*/
struct perf_event **
struct perf_event * __percpu *
register_wide_hw_breakpoint(struct perf_event_attr *attr,
perf_overflow_handler_t triggered)
{
struct perf_event **cpu_events, **pevent, *bp;
struct perf_event * __percpu *cpu_events, **pevent, *bp;
long err;
int cpu;
cpu_events = alloc_percpu(typeof(*cpu_events));
if (!cpu_events)
return ERR_PTR(-ENOMEM);
return (void __percpu __force *)ERR_PTR(-ENOMEM);
get_online_cpus();
for_each_online_cpu(cpu) {
@@ -451,7 +451,7 @@ fail:
put_online_cpus();
free_percpu(cpu_events);
return ERR_PTR(err);
return (void __percpu __force *)ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
@@ -459,7 +459,7 @@ EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
* unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
* @cpu_events: the per cpu set of events to unregister
*/
void unregister_wide_hw_breakpoint(struct perf_event **cpu_events)
void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
{
int cpu;
struct perf_event **pevent;

View File

@@ -80,7 +80,7 @@ int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask)
buffer = kmalloc(size, gfp_mask);
if (!buffer) {
_kfifo_init(fifo, 0, 0);
_kfifo_init(fifo, NULL, 0);
return -ENOMEM;
}
@@ -97,6 +97,7 @@ EXPORT_SYMBOL(kfifo_alloc);
void kfifo_free(struct kfifo *fifo)
{
kfree(fifo->buffer);
_kfifo_init(fifo, NULL, 0);
}
EXPORT_SYMBOL(kfifo_free);

View File

@@ -599,7 +599,7 @@ static void kgdb_wait(struct pt_regs *regs)
/* Signal the primary CPU that we are done: */
atomic_set(&cpu_in_kgdb[cpu], 0);
touch_softlockup_watchdog();
touch_softlockup_watchdog_sync();
clocksource_touch_watchdog();
local_irq_restore(flags);
}
@@ -1453,7 +1453,7 @@ acquirelock:
(kgdb_info[cpu].task &&
kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
atomic_set(&kgdb_active, -1);
touch_softlockup_watchdog();
touch_softlockup_watchdog_sync();
clocksource_touch_watchdog();
local_irq_restore(flags);
@@ -1553,7 +1553,7 @@ kgdb_restore:
}
/* Free kgdb_active */
atomic_set(&kgdb_active, -1);
touch_softlockup_watchdog();
touch_softlockup_watchdog_sync();
clocksource_touch_watchdog();
local_irq_restore(flags);

View File

@@ -44,6 +44,7 @@
#include <linux/debugfs.h>
#include <linux/kdebug.h>
#include <linux/memory.h>
#include <linux/ftrace.h>
#include <asm-generic/sections.h>
#include <asm/cacheflush.h>
@@ -93,6 +94,7 @@ static struct kprobe_blackpoint kprobe_blacklist[] = {
{"native_get_debugreg",},
{"irq_entries_start",},
{"common_interrupt",},
{"mcount",}, /* mcount can be called from everywhere */
{NULL} /* Terminator */
};
@@ -124,30 +126,6 @@ static LIST_HEAD(kprobe_insn_pages);
static int kprobe_garbage_slots;
static int collect_garbage_slots(void);
static int __kprobes check_safety(void)
{
int ret = 0;
#if defined(CONFIG_PREEMPT) && defined(CONFIG_FREEZER)
ret = freeze_processes();
if (ret == 0) {
struct task_struct *p, *q;
do_each_thread(p, q) {
if (p != current && p->state == TASK_RUNNING &&
p->pid != 0) {
printk("Check failed: %s is running\n",p->comm);
ret = -1;
goto loop_end;
}
} while_each_thread(p, q);
}
loop_end:
thaw_processes();
#else
synchronize_sched();
#endif
return ret;
}
/**
* __get_insn_slot() - Find a slot on an executable page for an instruction.
* We allocate an executable page if there's no room on existing ones.
@@ -235,9 +213,8 @@ static int __kprobes collect_garbage_slots(void)
{
struct kprobe_insn_page *kip, *next;
/* Ensure no-one is preepmted on the garbages */
if (check_safety())
return -EAGAIN;
/* Ensure no-one is interrupted on the garbages */
synchronize_sched();
list_for_each_entry_safe(kip, next, &kprobe_insn_pages, list) {
int i;
@@ -728,7 +705,8 @@ int __kprobes register_kprobe(struct kprobe *p)
preempt_disable();
if (!kernel_text_address((unsigned long) p->addr) ||
in_kprobes_functions((unsigned long) p->addr)) {
in_kprobes_functions((unsigned long) p->addr) ||
ftrace_text_reserved(p->addr, p->addr)) {
preempt_enable();
return -EINVAL;
}

File diff suppressed because it is too large Load Diff

View File

@@ -2794,7 +2794,13 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
*/
prev_state = prev->state;
finish_arch_switch(prev);
perf_event_task_sched_in(current, cpu_of(rq));
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
local_irq_disable();
#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
perf_event_task_sched_in(current);
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
local_irq_enable();
#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
finish_lock_switch(rq, prev);
fire_sched_in_preempt_notifiers(current);
@@ -5309,7 +5315,7 @@ void scheduler_tick(void)
curr->sched_class->task_tick(rq, curr, 0);
raw_spin_unlock(&rq->lock);
perf_event_task_tick(curr, cpu);
perf_event_task_tick(curr);
#ifdef CONFIG_SMP
rq->idle_at_tick = idle_cpu(cpu);
@@ -5523,7 +5529,7 @@ need_resched_nonpreemptible:
if (likely(prev != next)) {
sched_info_switch(prev, next);
perf_event_task_sched_out(prev, next, cpu);
perf_event_task_sched_out(prev, next);
rq->nr_switches++;
rq->curr = next;

View File

@@ -500,22 +500,17 @@ EXPORT_SYMBOL(tasklet_kill);
*/
/*
* The trampoline is called when the hrtimer expires. If this is
* called from the hrtimer interrupt then we schedule the tasklet as
* the timer callback function expects to run in softirq context. If
* it's called in softirq context anyway (i.e. high resolution timers
* disabled) then the hrtimer callback is called right away.
* The trampoline is called when the hrtimer expires. It schedules a tasklet
* to run __tasklet_hrtimer_trampoline() which in turn will call the intended
* hrtimer callback, but from softirq context.
*/
static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
{
struct tasklet_hrtimer *ttimer =
container_of(timer, struct tasklet_hrtimer, timer);
if (hrtimer_is_hres_active(timer)) {
tasklet_hi_schedule(&ttimer->tasklet);
return HRTIMER_NORESTART;
}
return ttimer->function(timer);
tasklet_hi_schedule(&ttimer->tasklet);
return HRTIMER_NORESTART;
}
/*

View File

@@ -25,6 +25,7 @@ static DEFINE_SPINLOCK(print_lock);
static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */
static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */
static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
static DEFINE_PER_CPU(bool, softlock_touch_sync);
static int __read_mostly did_panic;
int __read_mostly softlockup_thresh = 60;
@@ -79,6 +80,12 @@ void touch_softlockup_watchdog(void)
}
EXPORT_SYMBOL(touch_softlockup_watchdog);
void touch_softlockup_watchdog_sync(void)
{
__raw_get_cpu_var(softlock_touch_sync) = true;
__raw_get_cpu_var(softlockup_touch_ts) = 0;
}
void touch_all_softlockup_watchdogs(void)
{
int cpu;
@@ -118,6 +125,14 @@ void softlockup_tick(void)
}
if (touch_ts == 0) {
if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) {
/*
* If the time stamp was touched atomically
* make sure the scheduler tick is up to date.
*/
per_cpu(softlock_touch_sync, this_cpu) = false;
sched_clock_tick();
}
__touch_softlockup_watchdog();
return;
}

View File

@@ -222,6 +222,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
if (which > PRIO_USER || which < PRIO_PROCESS)
return -EINVAL;
rcu_read_lock();
read_lock(&tasklist_lock);
switch (which) {
case PRIO_PROCESS:
@@ -267,6 +268,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
}
out_unlock:
read_unlock(&tasklist_lock);
rcu_read_unlock();
return retval;
}

View File

@@ -880,6 +880,7 @@ void getboottime(struct timespec *ts)
set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec);
}
EXPORT_SYMBOL_GPL(getboottime);
/**
* monotonic_to_bootbased - Convert the monotonic time to boot based.
@@ -889,6 +890,7 @@ void monotonic_to_bootbased(struct timespec *ts)
{
*ts = timespec_add_safe(*ts, total_sleep_time);
}
EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
unsigned long get_seconds(void)
{

View File

@@ -51,7 +51,9 @@ endif
obj-$(CONFIG_EVENT_TRACING) += trace_events.o
obj-$(CONFIG_EVENT_TRACING) += trace_export.o
obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
ifeq ($(CONFIG_PERF_EVENTS),y)
obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o
endif
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o

View File

@@ -22,7 +22,6 @@
#include <linux/hardirq.h>
#include <linux/kthread.h>
#include <linux/uaccess.h>
#include <linux/kprobes.h>
#include <linux/ftrace.h>
#include <linux/sysctl.h>
#include <linux/ctype.h>
@@ -898,36 +897,6 @@ static struct dyn_ftrace *ftrace_free_records;
} \
}
#ifdef CONFIG_KPROBES
static int frozen_record_count;
static inline void freeze_record(struct dyn_ftrace *rec)
{
if (!(rec->flags & FTRACE_FL_FROZEN)) {
rec->flags |= FTRACE_FL_FROZEN;
frozen_record_count++;
}
}
static inline void unfreeze_record(struct dyn_ftrace *rec)
{
if (rec->flags & FTRACE_FL_FROZEN) {
rec->flags &= ~FTRACE_FL_FROZEN;
frozen_record_count--;
}
}
static inline int record_frozen(struct dyn_ftrace *rec)
{
return rec->flags & FTRACE_FL_FROZEN;
}
#else
# define freeze_record(rec) ({ 0; })
# define unfreeze_record(rec) ({ 0; })
# define record_frozen(rec) ({ 0; })
#endif /* CONFIG_KPROBES */
static void ftrace_free_rec(struct dyn_ftrace *rec)
{
rec->freelist = ftrace_free_records;
@@ -1025,6 +994,21 @@ static void ftrace_bug(int failed, unsigned long ip)
}
/* Return 1 if the address range is reserved for ftrace */
int ftrace_text_reserved(void *start, void *end)
{
struct dyn_ftrace *rec;
struct ftrace_page *pg;
do_for_each_ftrace_rec(pg, rec) {
if (rec->ip <= (unsigned long)end &&
rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start)
return 1;
} while_for_each_ftrace_rec();
return 0;
}
static int
__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
{
@@ -1076,14 +1060,6 @@ static void ftrace_replace_code(int enable)
!(rec->flags & FTRACE_FL_CONVERTED))
continue;
/* ignore updates to this record's mcount site */
if (get_kprobe((void *)rec->ip)) {
freeze_record(rec);
continue;
} else {
unfreeze_record(rec);
}
failed = __ftrace_replace_code(rec, enable);
if (failed) {
rec->flags |= FTRACE_FL_FAILED;

View File

@@ -6,14 +6,12 @@
*/
#include <linux/module.h>
#include <linux/kprobes.h>
#include "trace.h"
char *perf_trace_buf;
EXPORT_SYMBOL_GPL(perf_trace_buf);
char *perf_trace_buf_nmi;
EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
static char *perf_trace_buf;
static char *perf_trace_buf_nmi;
typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
@@ -120,3 +118,47 @@ void ftrace_profile_disable(int event_id)
}
mutex_unlock(&event_mutex);
}
__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
int *rctxp, unsigned long *irq_flags)
{
struct trace_entry *entry;
char *trace_buf, *raw_data;
int pc, cpu;
pc = preempt_count();
/* Protect the per cpu buffer, begin the rcu read side */
local_irq_save(*irq_flags);
*rctxp = perf_swevent_get_recursion_context();
if (*rctxp < 0)
goto err_recursion;
cpu = smp_processor_id();
if (in_nmi())
trace_buf = rcu_dereference(perf_trace_buf_nmi);
else
trace_buf = rcu_dereference(perf_trace_buf);
if (!trace_buf)
goto err;
raw_data = per_cpu_ptr(trace_buf, cpu);
/* zero the dead bytes from align to not leak stack to user */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
entry = (struct trace_entry *)raw_data;
tracing_generic_entry_update(entry, *irq_flags, pc);
entry->type = type;
return raw_data;
err:
perf_swevent_put_recursion_context(*rctxp);
err_recursion:
local_irq_restore(*irq_flags);
return NULL;
}
EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare);

View File

@@ -1371,7 +1371,7 @@ out_unlock:
return err;
}
#ifdef CONFIG_EVENT_PROFILE
#ifdef CONFIG_PERF_EVENTS
void ftrace_profile_free_filter(struct perf_event *event)
{
@@ -1439,5 +1439,5 @@ out_unlock:
return err;
}
#endif /* CONFIG_EVENT_PROFILE */
#endif /* CONFIG_PERF_EVENTS */

View File

@@ -91,11 +91,6 @@ static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
return retval;
}
static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
{
return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
}
static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
void *dummy)
{
@@ -231,9 +226,7 @@ static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
{
int ret = -EINVAL;
if (ff->func == fetch_argument)
ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data);
else if (ff->func == fetch_register) {
if (ff->func == fetch_register) {
const char *name;
name = regs_query_register_name((unsigned int)((long)ff->data));
ret = snprintf(buf, n, "%%%s", name);
@@ -489,14 +482,6 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
}
} else
ret = -EINVAL;
} else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) {
ret = strict_strtoul(arg + 3, 10, &param);
if (ret || param > PARAM_MAX_ARGS)
ret = -EINVAL;
else {
ff->func = fetch_argument;
ff->data = (void *)param;
}
} else
ret = -EINVAL;
return ret;
@@ -611,7 +596,6 @@ static int create_trace_probe(int argc, char **argv)
* - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
* - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
* Fetch args:
* $argN : fetch Nth of function argument. (N:0-)
* $retval : fetch return value
* $stack : fetch stack address
* $stackN : fetch Nth of stack (N:0-)
@@ -689,7 +673,7 @@ static int create_trace_probe(int argc, char **argv)
return -EINVAL;
}
/* an address specified */
ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr);
ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
if (ret) {
pr_info("Failed to parse address.\n");
return ret;
@@ -958,7 +942,7 @@ static const struct file_operations kprobe_profile_ops = {
};
/* Kprobe handler */
static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
{
struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
struct kprobe_trace_entry *entry;
@@ -978,7 +962,7 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
irq_flags, pc);
if (!event)
return 0;
return;
entry = ring_buffer_event_data(event);
entry->nargs = tp->nr_args;
@@ -988,11 +972,10 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
if (!filter_current_check_discard(buffer, call, entry, event))
trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
return 0;
}
/* Kretprobe handler */
static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
@@ -1011,7 +994,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
irq_flags, pc);
if (!event)
return 0;
return;
entry = ring_buffer_event_data(event);
entry->nargs = tp->nr_args;
@@ -1022,8 +1005,6 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
if (!filter_current_check_discard(buffer, call, entry, event))
trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
return 0;
}
/* Event entry printers */
@@ -1250,137 +1231,67 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call,
", REC->" FIELD_STRING_RETIP);
}
#ifdef CONFIG_EVENT_PROFILE
#ifdef CONFIG_PERF_EVENTS
/* Kprobe profile handler */
static __kprobes int kprobe_profile_func(struct kprobe *kp,
static __kprobes void kprobe_profile_func(struct kprobe *kp,
struct pt_regs *regs)
{
struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
struct ftrace_event_call *call = &tp->call;
struct kprobe_trace_entry *entry;
struct trace_entry *ent;
int size, __size, i, pc, __cpu;
int size, __size, i;
unsigned long irq_flags;
char *trace_buf;
char *raw_data;
int rctx;
pc = preempt_count();
__size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
"profile buffer not large enough"))
return 0;
return;
/*
* Protect the non nmi buffer
* This also protects the rcu read side
*/
local_irq_save(irq_flags);
entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
if (!entry)
return;
rctx = perf_swevent_get_recursion_context();
if (rctx < 0)
goto end_recursion;
__cpu = smp_processor_id();
if (in_nmi())
trace_buf = rcu_dereference(perf_trace_buf_nmi);
else
trace_buf = rcu_dereference(perf_trace_buf);
if (!trace_buf)
goto end;
raw_data = per_cpu_ptr(trace_buf, __cpu);
/* Zero dead bytes from alignment to avoid buffer leak to userspace */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
entry = (struct kprobe_trace_entry *)raw_data;
ent = &entry->ent;
tracing_generic_entry_update(ent, irq_flags, pc);
ent->type = call->id;
entry->nargs = tp->nr_args;
entry->ip = (unsigned long)kp->addr;
for (i = 0; i < tp->nr_args; i++)
entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
perf_tp_event(call->id, entry->ip, 1, entry, size);
end:
perf_swevent_put_recursion_context(rctx);
end_recursion:
local_irq_restore(irq_flags);
return 0;
ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags);
}
/* Kretprobe profile handler */
static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
struct ftrace_event_call *call = &tp->call;
struct kretprobe_trace_entry *entry;
struct trace_entry *ent;
int size, __size, i, pc, __cpu;
int size, __size, i;
unsigned long irq_flags;
char *trace_buf;
char *raw_data;
int rctx;
pc = preempt_count();
__size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
"profile buffer not large enough"))
return 0;
return;
/*
* Protect the non nmi buffer
* This also protects the rcu read side
*/
local_irq_save(irq_flags);
entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
if (!entry)
return;
rctx = perf_swevent_get_recursion_context();
if (rctx < 0)
goto end_recursion;
__cpu = smp_processor_id();
if (in_nmi())
trace_buf = rcu_dereference(perf_trace_buf_nmi);
else
trace_buf = rcu_dereference(perf_trace_buf);
if (!trace_buf)
goto end;
raw_data = per_cpu_ptr(trace_buf, __cpu);
/* Zero dead bytes from alignment to avoid buffer leak to userspace */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
entry = (struct kretprobe_trace_entry *)raw_data;
ent = &entry->ent;
tracing_generic_entry_update(ent, irq_flags, pc);
ent->type = call->id;
entry->nargs = tp->nr_args;
entry->func = (unsigned long)tp->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
for (i = 0; i < tp->nr_args; i++)
entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
end:
perf_swevent_put_recursion_context(rctx);
end_recursion:
local_irq_restore(irq_flags);
return 0;
ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags);
}
static int probe_profile_enable(struct ftrace_event_call *call)
@@ -1408,7 +1319,7 @@ static void probe_profile_disable(struct ftrace_event_call *call)
disable_kprobe(&tp->rp.kp);
}
}
#endif /* CONFIG_EVENT_PROFILE */
#endif /* CONFIG_PERF_EVENTS */
static __kprobes
@@ -1418,10 +1329,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
if (tp->flags & TP_FLAG_TRACE)
kprobe_trace_func(kp, regs);
#ifdef CONFIG_EVENT_PROFILE
#ifdef CONFIG_PERF_EVENTS
if (tp->flags & TP_FLAG_PROFILE)
kprobe_profile_func(kp, regs);
#endif /* CONFIG_EVENT_PROFILE */
#endif
return 0; /* We don't tweek kernel, so just return 0 */
}
@@ -1432,10 +1343,10 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
if (tp->flags & TP_FLAG_TRACE)
kretprobe_trace_func(ri, regs);
#ifdef CONFIG_EVENT_PROFILE
#ifdef CONFIG_PERF_EVENTS
if (tp->flags & TP_FLAG_PROFILE)
kretprobe_profile_func(ri, regs);
#endif /* CONFIG_EVENT_PROFILE */
#endif
return 0; /* We don't tweek kernel, so just return 0 */
}
@@ -1464,7 +1375,7 @@ static int register_probe_event(struct trace_probe *tp)
call->regfunc = probe_event_enable;
call->unregfunc = probe_event_disable;
#ifdef CONFIG_EVENT_PROFILE
#ifdef CONFIG_PERF_EVENTS
call->profile_enable = probe_profile_enable;
call->profile_disable = probe_profile_disable;
#endif
@@ -1523,28 +1434,67 @@ static int kprobe_trace_selftest_target(int a1, int a2, int a3,
static __init int kprobe_trace_self_tests_init(void)
{
int ret;
int ret, warn = 0;
int (*target)(int, int, int, int, int, int);
struct trace_probe *tp;
target = kprobe_trace_selftest_target;
pr_info("Testing kprobe tracing: ");
ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
"$arg1 $arg2 $arg3 $arg4 $stack $stack0");
if (WARN_ON_ONCE(ret))
pr_warning("error enabling function entry\n");
"$stack $stack0 +0($stack)");
if (WARN_ON_ONCE(ret)) {
pr_warning("error on probing function entry.\n");
warn++;
} else {
/* Enable trace point */
tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
if (WARN_ON_ONCE(tp == NULL)) {
pr_warning("error on getting new probe.\n");
warn++;
} else
probe_event_enable(&tp->call);
}
ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
"$retval");
if (WARN_ON_ONCE(ret))
pr_warning("error enabling function return\n");
if (WARN_ON_ONCE(ret)) {
pr_warning("error on probing function return.\n");
warn++;
} else {
/* Enable trace point */
tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
if (WARN_ON_ONCE(tp == NULL)) {
pr_warning("error on getting new probe.\n");
warn++;
} else
probe_event_enable(&tp->call);
}
if (warn)
goto end;
ret = target(1, 2, 3, 4, 5, 6);
cleanup_all_probes();
ret = command_trace_probe("-:testprobe");
if (WARN_ON_ONCE(ret)) {
pr_warning("error on deleting a probe.\n");
warn++;
}
pr_cont("OK\n");
ret = command_trace_probe("-:testprobe2");
if (WARN_ON_ONCE(ret)) {
pr_warning("error on deleting a probe.\n");
warn++;
}
end:
cleanup_all_probes();
if (warn)
pr_cont("NG: Some tests are failed. Please check them.\n");
else
pr_cont("OK\n");
return 0;
}

View File

@@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
unsigned long val, flags;
char buf[64];
int ret;
int cpu;
if (count >= sizeof(buf))
return -EINVAL;
@@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
return ret;
local_irq_save(flags);
/*
* In case we trace inside arch_spin_lock() or after (NMI),
* we will cause circular lock, so we also need to increase
* the percpu trace_active here.
*/
cpu = smp_processor_id();
per_cpu(trace_active, cpu)++;
arch_spin_lock(&max_stack_lock);
*ptr = val;
arch_spin_unlock(&max_stack_lock);
per_cpu(trace_active, cpu)--;
local_irq_restore(flags);
return count;
@@ -206,7 +218,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
static void *t_start(struct seq_file *m, loff_t *pos)
{
int cpu;
local_irq_disable();
cpu = smp_processor_id();
per_cpu(trace_active, cpu)++;
arch_spin_lock(&max_stack_lock);
if (*pos == 0)
@@ -217,7 +235,13 @@ static void *t_start(struct seq_file *m, loff_t *pos)
static void t_stop(struct seq_file *m, void *p)
{
int cpu;
arch_spin_unlock(&max_stack_lock);
cpu = smp_processor_id();
per_cpu(trace_active, cpu)--;
local_irq_enable();
}

View File

@@ -421,7 +421,7 @@ int __init init_ftrace_syscalls(void)
}
core_initcall(init_ftrace_syscalls);
#ifdef CONFIG_EVENT_PROFILE
#ifdef CONFIG_PERF_EVENTS
static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
@@ -433,12 +433,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec;
unsigned long flags;
char *trace_buf;
char *raw_data;
int syscall_nr;
int rctx;
int size;
int cpu;
syscall_nr = syscall_get_nr(current, regs);
if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
@@ -457,37 +454,15 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
"profile buffer not large enough"))
return;
/* Protect the per cpu buffer, begin the rcu read side */
local_irq_save(flags);
rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size,
sys_data->enter_event->id, &rctx, &flags);
if (!rec)
return;
rctx = perf_swevent_get_recursion_context();
if (rctx < 0)
goto end_recursion;
cpu = smp_processor_id();
trace_buf = rcu_dereference(perf_trace_buf);
if (!trace_buf)
goto end;
raw_data = per_cpu_ptr(trace_buf, cpu);
/* zero the dead bytes from align to not leak stack to user */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
rec = (struct syscall_trace_enter *) raw_data;
tracing_generic_entry_update(&rec->ent, 0, 0);
rec->ent.type = sys_data->enter_event->id;
rec->nr = syscall_nr;
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
(unsigned long *)&rec->args);
perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
end:
perf_swevent_put_recursion_context(rctx);
end_recursion:
local_irq_restore(flags);
ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
}
int prof_sysenter_enable(struct ftrace_event_call *call)
@@ -531,11 +506,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
struct syscall_trace_exit *rec;
unsigned long flags;
int syscall_nr;
char *trace_buf;
char *raw_data;
int rctx;
int size;
int cpu;
syscall_nr = syscall_get_nr(current, regs);
if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
@@ -557,38 +529,15 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
"exit event has grown above profile buffer size"))
return;
/* Protect the per cpu buffer, begin the rcu read side */
local_irq_save(flags);
rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size,
sys_data->exit_event->id, &rctx, &flags);
if (!rec)
return;
rctx = perf_swevent_get_recursion_context();
if (rctx < 0)
goto end_recursion;
cpu = smp_processor_id();
trace_buf = rcu_dereference(perf_trace_buf);
if (!trace_buf)
goto end;
raw_data = per_cpu_ptr(trace_buf, cpu);
/* zero the dead bytes from align to not leak stack to user */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
rec = (struct syscall_trace_exit *)raw_data;
tracing_generic_entry_update(&rec->ent, 0, 0);
rec->ent.type = sys_data->exit_event->id;
rec->nr = syscall_nr;
rec->ret = syscall_get_return_value(current, regs);
perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size);
end:
perf_swevent_put_recursion_context(rctx);
end_recursion:
local_irq_restore(flags);
ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
}
int prof_sysexit_enable(struct ftrace_event_call *call)
@@ -626,6 +575,5 @@ void prof_sysexit_disable(struct ftrace_event_call *call)
mutex_unlock(&syscall_trace_lock);
}
#endif
#endif /* CONFIG_PERF_EVENTS */