Merge branch 'linus' into x86/mce3

Conflicts:
	arch/x86/kernel/cpu/mcheck/mce_64.c
	arch/x86/kernel/irq.c

Merge reason: Resolve the conflicts above.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Ingo Molnar
2009-06-11 23:31:52 +02:00
1275 changed files with 87001 additions and 27373 deletions

View File

@@ -93,8 +93,10 @@ obj-$(CONFIG_LATENCYTOP) += latencytop.o
obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
obj-$(CONFIG_FUNCTION_TRACER) += trace/
obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_X86_DS) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_SLOW_WORK) += slow-work.o
obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is

View File

@@ -92,23 +92,18 @@ extern int initcall_debug;
static async_cookie_t __lowest_in_progress(struct list_head *running)
{
struct async_entry *entry;
async_cookie_t ret = next_cookie; /* begin with "infinity" value */
if (!list_empty(running)) {
entry = list_first_entry(running,
struct async_entry, list);
ret = entry->cookie;
return entry->cookie;
}
if (!list_empty(&async_pending)) {
list_for_each_entry(entry, &async_pending, list)
if (entry->running == running) {
ret = entry->cookie;
break;
}
}
list_for_each_entry(entry, &async_pending, list)
if (entry->running == running)
return entry->cookie;
return ret;
return next_cookie; /* "infinity" value */
}
static async_cookie_t lowest_in_progress(struct list_head *running)

View File

@@ -882,6 +882,17 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
}
asmlinkage long
compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
struct compat_siginfo __user *uinfo)
{
siginfo_t info;
if (copy_siginfo_from_user32(&info, uinfo))
return -EFAULT;
return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
}
#ifdef __ARCH_WANT_COMPAT_SYS_TIME
/* compat_time_t is a 32 bit "long" and needs to get converted. */

View File

@@ -1857,7 +1857,7 @@ struct cgroup_subsys cpuset_subsys = {
int __init cpuset_init_early(void)
{
alloc_bootmem_cpumask_var(&top_cpuset.cpus_allowed);
alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_NOWAIT);
top_cpuset.mems_generation = cpuset_mems_generation++;
return 0;

View File

@@ -167,7 +167,7 @@ EXPORT_SYMBOL(prepare_creds);
/*
* Prepare credentials for current to perform an execve()
* - The caller must hold current->cred_exec_mutex
* - The caller must hold current->cred_guard_mutex
*/
struct cred *prepare_exec_creds(void)
{
@@ -276,7 +276,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
struct cred *new;
int ret;
mutex_init(&p->cred_exec_mutex);
mutex_init(&p->cred_guard_mutex);
if (
#ifdef CONFIG_KEYS

View File

@@ -48,7 +48,8 @@
#include <linux/tracehook.h>
#include <linux/fs_struct.h>
#include <linux/init_task.h>
#include <trace/sched.h>
#include <linux/perf_counter.h>
#include <trace/events/sched.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -56,10 +57,6 @@
#include <asm/mmu_context.h>
#include "cred-internals.h"
DEFINE_TRACE(sched_process_free);
DEFINE_TRACE(sched_process_exit);
DEFINE_TRACE(sched_process_wait);
static void exit_mm(struct task_struct * tsk);
static void __unhash_process(struct task_struct *p)
@@ -158,6 +155,9 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
{
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
#ifdef CONFIG_PERF_COUNTERS
WARN_ON_ONCE(tsk->perf_counter_ctxp);
#endif
trace_sched_process_free(tsk);
put_task_struct(tsk);
}
@@ -174,6 +174,7 @@ repeat:
atomic_dec(&__task_cred(p)->user->processes);
proc_flush_task(p);
write_lock_irq(&tasklist_lock);
tracehook_finish_release_task(p);
__exit_signal(p);
@@ -975,16 +976,19 @@ NORET_TYPE void do_exit(long code)
module_put(tsk->binfmt->module);
proc_exit_connector(tsk);
/*
* Flush inherited counters to the parent - before the parent
* gets woken up by child-exit notifications.
*/
perf_counter_exit_task(tsk);
exit_notify(tsk, group_dead);
#ifdef CONFIG_NUMA
mpol_put(tsk->mempolicy);
tsk->mempolicy = NULL;
#endif
#ifdef CONFIG_FUTEX
/*
* This must happen late, after the PID is not
* hashed anymore:
*/
if (unlikely(!list_empty(&tsk->pi_state_list)))
exit_pi_state_list(tsk);
if (unlikely(current->pi_state_cache))
@@ -1476,6 +1480,7 @@ static int wait_consider_task(struct task_struct *parent, int ptrace,
*/
if (*notask_error)
*notask_error = ret;
return 0;
}
if (likely(!ptrace) && unlikely(p->ptrace)) {

View File

@@ -61,8 +61,8 @@
#include <linux/proc_fs.h>
#include <linux/blkdev.h>
#include <linux/fs_struct.h>
#include <trace/sched.h>
#include <linux/magic.h>
#include <linux/perf_counter.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -71,6 +71,8 @@
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include <trace/events/sched.h>
/*
* Protected counters by write_lock_irq(&tasklist_lock)
*/
@@ -83,8 +85,6 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
DEFINE_TRACE(sched_process_fork);
int nr_processes(void)
{
int cpu;
@@ -982,6 +982,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
if (!p)
goto fork_out;
ftrace_graph_init_task(p);
rt_mutex_init_task(p);
#ifdef CONFIG_PROVE_LOCKING
@@ -1089,12 +1091,16 @@ static struct task_struct *copy_process(unsigned long clone_flags,
#ifdef CONFIG_DEBUG_MUTEXES
p->blocked_on = NULL; /* not blocked yet */
#endif
if (unlikely(current->ptrace))
ptrace_fork(p, clone_flags);
p->bts = NULL;
/* Perform scheduler related setup. Assign this task to a CPU. */
sched_fork(p, clone_flags);
retval = perf_counter_init_task(p);
if (retval)
goto bad_fork_cleanup_policy;
if ((retval = audit_alloc(p)))
goto bad_fork_cleanup_policy;
/* copy all the process information */
@@ -1131,8 +1137,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
}
}
ftrace_graph_init_task(p);
p->pid = pid_nr(pid);
p->tgid = p->pid;
if (clone_flags & CLONE_THREAD)
@@ -1141,7 +1145,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
if (current->nsproxy != p->nsproxy) {
retval = ns_cgroup_clone(p, pid);
if (retval)
goto bad_fork_free_graph;
goto bad_fork_free_pid;
}
p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
@@ -1233,7 +1237,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
spin_unlock(&current->sighand->siglock);
write_unlock_irq(&tasklist_lock);
retval = -ERESTARTNOINTR;
goto bad_fork_free_graph;
goto bad_fork_free_pid;
}
if (clone_flags & CLONE_THREAD) {
@@ -1268,8 +1272,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
cgroup_post_fork(p);
return p;
bad_fork_free_graph:
ftrace_graph_exit_task(p);
bad_fork_free_pid:
if (pid != &init_struct_pid)
free_pid(pid);
@@ -1293,6 +1295,7 @@ bad_fork_cleanup_semundo:
bad_fork_cleanup_audit:
audit_free(p);
bad_fork_cleanup_policy:
perf_counter_free_task(p);
#ifdef CONFIG_NUMA
mpol_put(p->mempolicy);
bad_fork_cleanup_cgroup:
@@ -1406,10 +1409,16 @@ long do_fork(unsigned long clone_flags,
if (clone_flags & CLONE_VFORK) {
p->vfork_done = &vfork;
init_completion(&vfork);
} else if (!(clone_flags & CLONE_VM)) {
/*
* vfork will do an exec which will call
* set_task_comm()
*/
perf_counter_fork(p);
}
audit_finish_fork(p);
tracehook_report_clone(trace, regs, clone_flags, nr, p);
tracehook_report_clone(regs, clone_flags, nr, p);
/*
* We set PF_STARTING at creation in case tracing wants to

File diff suppressed because it is too large Load Diff

View File

@@ -18,8 +18,8 @@
#include <linux/kernel_stat.h>
#include <linux/rculist.h>
#include <linux/hash.h>
#include <trace/irq.h>
#include <linux/bootmem.h>
#include <trace/events/irq.h>
#include "internals.h"
@@ -150,6 +150,7 @@ int __init early_irq_init(void)
{
struct irq_desc *desc;
int legacy_count;
int node;
int i;
init_irq_default_affinity();
@@ -160,20 +161,20 @@ int __init early_irq_init(void)
desc = irq_desc_legacy;
legacy_count = ARRAY_SIZE(irq_desc_legacy);
node = first_online_node;
/* allocate irq_desc_ptrs array based on nr_irqs */
irq_desc_ptrs = alloc_bootmem(nr_irqs * sizeof(void *));
irq_desc_ptrs = kcalloc(nr_irqs, sizeof(void *), GFP_NOWAIT);
/* allocate based on nr_cpu_ids */
/* FIXME: invert kstat_irgs, and it'd be a per_cpu_alloc'd thing */
kstat_irqs_legacy = alloc_bootmem(NR_IRQS_LEGACY * nr_cpu_ids *
sizeof(int));
kstat_irqs_legacy = kzalloc_node(NR_IRQS_LEGACY * nr_cpu_ids *
sizeof(int), GFP_NOWAIT, node);
for (i = 0; i < legacy_count; i++) {
desc[i].irq = i;
desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids;
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
alloc_desc_masks(&desc[i], 0, true);
alloc_desc_masks(&desc[i], node, true);
init_desc_masks(&desc[i]);
irq_desc_ptrs[i] = desc + i;
}
@@ -355,9 +356,6 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action)
"but no thread function available.", irq, action->name);
}
DEFINE_TRACE(irq_handler_entry);
DEFINE_TRACE(irq_handler_exit);
/**
* handle_IRQ_event - irq action chain handler
* @irq: the interrupt number

View File

@@ -13,7 +13,7 @@
#include <linux/file.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <trace/sched.h>
#include <trace/events/sched.h>
#define KTHREAD_NICE_LEVEL (-5)
@@ -21,9 +21,6 @@ static DEFINE_SPINLOCK(kthread_create_lock);
static LIST_HEAD(kthread_create_list);
struct task_struct *kthreadd_task;
DEFINE_TRACE(sched_kthread_stop);
DEFINE_TRACE(sched_kthread_stop_ret);
struct kthread_create_info
{
/* Information passed to kthread() from kthreadd. */

View File

@@ -42,12 +42,14 @@
#include <linux/hash.h>
#include <linux/ftrace.h>
#include <linux/stringify.h>
#include <trace/lockdep.h>
#include <asm/sections.h>
#include "lockdep_internals.h"
#define CREATE_TRACE_POINTS
#include <trace/events/lockdep.h>
#ifdef CONFIG_PROVE_LOCKING
int prove_locking = 1;
module_param(prove_locking, int, 0644);
@@ -2935,8 +2937,6 @@ void lock_set_class(struct lockdep_map *lock, const char *name,
}
EXPORT_SYMBOL_GPL(lock_set_class);
DEFINE_TRACE(lock_acquire);
/*
* We are not always called with irqs disabled - do that here,
* and also avoid lockdep recursion:
@@ -2963,8 +2963,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
}
EXPORT_SYMBOL_GPL(lock_acquire);
DEFINE_TRACE(lock_release);
void lock_release(struct lockdep_map *lock, int nested,
unsigned long ip)
{
@@ -3105,6 +3103,8 @@ found_it:
hlock->holdtime_stamp = now;
}
trace_lock_acquired(lock, ip, waittime);
stats = get_lock_stats(hlock_class(hlock));
if (waittime) {
if (hlock->read)
@@ -3120,8 +3120,6 @@ found_it:
lock->ip = ip;
}
DEFINE_TRACE(lock_contended);
void lock_contended(struct lockdep_map *lock, unsigned long ip)
{
unsigned long flags;
@@ -3143,14 +3141,10 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
}
EXPORT_SYMBOL_GPL(lock_contended);
DEFINE_TRACE(lock_acquired);
void lock_acquired(struct lockdep_map *lock, unsigned long ip)
{
unsigned long flags;
trace_lock_acquired(lock, ip);
if (unlikely(!lock_stat))
return;

View File

@@ -18,6 +18,7 @@
*/
#include <linux/module.h>
#include <linux/moduleloader.h>
#include <linux/ftrace_event.h>
#include <linux/init.h>
#include <linux/kallsyms.h>
#include <linux/fs.h>
@@ -52,6 +53,7 @@
#include <linux/ftrace.h>
#include <linux/async.h>
#include <linux/percpu.h>
#include <linux/kmemleak.h>
#if 0
#define DEBUGP printk
@@ -72,6 +74,9 @@ DEFINE_MUTEX(module_mutex);
EXPORT_SYMBOL_GPL(module_mutex);
static LIST_HEAD(modules);
/* Block module loading/unloading? */
int modules_disabled = 0;
/* Waiting for a module to finish initializing? */
static DECLARE_WAIT_QUEUE_HEAD(module_wq);
@@ -429,6 +434,7 @@ static void *percpu_modalloc(unsigned long size, unsigned long align,
unsigned long extra;
unsigned int i;
void *ptr;
int cpu;
if (align > PAGE_SIZE) {
printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
@@ -458,6 +464,11 @@ static void *percpu_modalloc(unsigned long size, unsigned long align,
if (!split_block(i, size))
return NULL;
/* add the per-cpu scanning areas */
for_each_possible_cpu(cpu)
kmemleak_alloc(ptr + per_cpu_offset(cpu), size, 0,
GFP_KERNEL);
/* Mark allocated */
pcpu_size[i] = -pcpu_size[i];
return ptr;
@@ -472,6 +483,7 @@ static void percpu_modfree(void *freeme)
{
unsigned int i;
void *ptr = __per_cpu_start + block_size(pcpu_size[0]);
int cpu;
/* First entry is core kernel percpu data. */
for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
@@ -483,6 +495,10 @@ static void percpu_modfree(void *freeme)
BUG();
free:
/* remove the per-cpu scanning areas */
for_each_possible_cpu(cpu)
kmemleak_free(freeme + per_cpu_offset(cpu));
/* Merge with previous? */
if (pcpu_size[i-1] >= 0) {
pcpu_size[i-1] += pcpu_size[i];
@@ -777,7 +793,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
char name[MODULE_NAME_LEN];
int ret, forced = 0;
if (!capable(CAP_SYS_MODULE))
if (!capable(CAP_SYS_MODULE) || modules_disabled)
return -EPERM;
if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0)
@@ -1489,9 +1505,6 @@ static void free_module(struct module *mod)
/* Free any allocated parameters. */
destroy_params(mod->kp, mod->num_kp);
/* release any pointers to mcount in this module */
ftrace_release(mod->module_core, mod->core_size);
/* This may be NULL, but that's OK */
module_free(mod, mod->module_init);
kfree(mod->args);
@@ -1878,6 +1891,36 @@ static void *module_alloc_update_bounds(unsigned long size)
return ret;
}
#ifdef CONFIG_DEBUG_KMEMLEAK
static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
Elf_Shdr *sechdrs, char *secstrings)
{
unsigned int i;
/* only scan the sections containing data */
kmemleak_scan_area(mod->module_core, (unsigned long)mod -
(unsigned long)mod->module_core,
sizeof(struct module), GFP_KERNEL);
for (i = 1; i < hdr->e_shnum; i++) {
if (!(sechdrs[i].sh_flags & SHF_ALLOC))
continue;
if (strncmp(secstrings + sechdrs[i].sh_name, ".data", 5) != 0
&& strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0)
continue;
kmemleak_scan_area(mod->module_core, sechdrs[i].sh_addr -
(unsigned long)mod->module_core,
sechdrs[i].sh_size, GFP_KERNEL);
}
}
#else
static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
Elf_Shdr *sechdrs, char *secstrings)
{
}
#endif
/* Allocate and load the module: note that size of section 0 is always
zero, and we rely on this for optional sections. */
static noinline struct module *load_module(void __user *umod,
@@ -1892,11 +1935,9 @@ static noinline struct module *load_module(void __user *umod,
unsigned int symindex = 0;
unsigned int strindex = 0;
unsigned int modindex, versindex, infoindex, pcpuindex;
unsigned int num_mcount;
struct module *mod;
long err = 0;
void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
unsigned long *mseg;
mm_segment_t old_fs;
DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
@@ -2050,6 +2091,12 @@ static noinline struct module *load_module(void __user *umod,
/* Do the allocs. */
ptr = module_alloc_update_bounds(mod->core_size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. Just mark it as not being a
* leak.
*/
kmemleak_not_leak(ptr);
if (!ptr) {
err = -ENOMEM;
goto free_percpu;
@@ -2058,6 +2105,13 @@ static noinline struct module *load_module(void __user *umod,
mod->module_core = ptr;
ptr = module_alloc_update_bounds(mod->init_size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. This block doesn't need to be
* scanned as it contains data and code that will be freed
* after the module is initialized.
*/
kmemleak_ignore(ptr);
if (!ptr && mod->init_size) {
err = -ENOMEM;
goto free_core;
@@ -2088,6 +2142,7 @@ static noinline struct module *load_module(void __user *umod,
}
/* Module has been moved. */
mod = (void *)sechdrs[modindex].sh_addr;
kmemleak_load_module(mod, hdr, sechdrs, secstrings);
#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t),
@@ -2172,7 +2227,19 @@ static noinline struct module *load_module(void __user *umod,
sizeof(*mod->tracepoints),
&mod->num_tracepoints);
#endif
#ifdef CONFIG_EVENT_TRACING
mod->trace_events = section_objs(hdr, sechdrs, secstrings,
"_ftrace_events",
sizeof(*mod->trace_events),
&mod->num_trace_events);
#endif
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
/* sechdrs[0].sh_size is always zero */
mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings,
"__mcount_loc",
sizeof(*mod->ftrace_callsites),
&mod->num_ftrace_callsites);
#endif
#ifdef CONFIG_MODVERSIONS
if ((mod->num_syms && !mod->crcs)
|| (mod->num_gpl_syms && !mod->gpl_crcs)
@@ -2237,11 +2304,6 @@ static noinline struct module *load_module(void __user *umod,
dynamic_debug_setup(debug, num_debug);
}
/* sechdrs[0].sh_size is always zero */
mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc",
sizeof(*mseg), &num_mcount);
ftrace_init_module(mod, mseg, mseg + num_mcount);
err = module_finalize(hdr, sechdrs, mod);
if (err < 0)
goto cleanup;
@@ -2302,7 +2364,6 @@ static noinline struct module *load_module(void __user *umod,
cleanup:
kobject_del(&mod->mkobj.kobj);
kobject_put(&mod->mkobj.kobj);
ftrace_release(mod->module_core, mod->core_size);
free_unload:
module_unload_free(mod);
#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
@@ -2336,7 +2397,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
int ret = 0;
/* Must have permission */
if (!capable(CAP_SYS_MODULE))
if (!capable(CAP_SYS_MODULE) || modules_disabled)
return -EPERM;
/* Only one module load at a time, please */

View File

@@ -89,7 +89,7 @@ __mutex_lock_slowpath(atomic_t *lock_count);
*
* This function is similar to (but not equivalent to) down().
*/
void inline __sched mutex_lock(struct mutex *lock)
void __sched mutex_lock(struct mutex *lock)
{
might_sleep();
/*
@@ -249,7 +249,9 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
/* didnt get the lock, go to sleep: */
spin_unlock_mutex(&lock->wait_lock, flags);
__schedule();
preempt_enable_no_resched();
schedule();
preempt_disable();
spin_lock_mutex(&lock->wait_lock, flags);
}
@@ -471,5 +473,28 @@ int __sched mutex_trylock(struct mutex *lock)
return ret;
}
EXPORT_SYMBOL(mutex_trylock);
/**
* atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
* @cnt: the atomic which we are to dec
* @lock: the mutex to return holding if we dec to 0
*
* return true and hold lock if we dec to 0, return false otherwise
*/
int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
{
/* dec if we can't possibly hit 0 */
if (atomic_add_unless(cnt, -1, 1))
return 0;
/* we might hit 0, so take the lock */
mutex_lock(lock);
if (!atomic_dec_and_test(cnt)) {
/* when we actually did the dec, we didn't hit 0 */
mutex_unlock(lock);
return 0;
}
/* we hit 0, and we hold the lock */
return 1;
}
EXPORT_SYMBOL(atomic_dec_and_mutex_lock);

4260
kernel/perf_counter.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -111,12 +111,6 @@ int __ref profile_init(void)
/* only text is profiled */
prof_len = (_etext - _stext) >> prof_shift;
buffer_bytes = prof_len*sizeof(atomic_t);
if (!slab_is_available()) {
prof_buffer = alloc_bootmem(buffer_bytes);
alloc_bootmem_cpumask_var(&prof_cpu_mask);
cpumask_copy(prof_cpu_mask, cpu_possible_mask);
return 0;
}
if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))
return -ENOMEM;

View File

@@ -24,16 +24,6 @@
#include <linux/uaccess.h>
/*
* Initialize a new task whose father had been ptraced.
*
* Called from copy_process().
*/
void ptrace_fork(struct task_struct *child, unsigned long clone_flags)
{
arch_ptrace_fork(child, clone_flags);
}
/*
* ptrace a task: make the debugger its new parent and
* move it to the ptrace list.
@@ -185,10 +175,11 @@ int ptrace_attach(struct task_struct *task)
if (same_thread_group(task, current))
goto out;
/* Protect exec's credential calculations against our interference;
* SUID, SGID and LSM creds get determined differently under ptrace.
/* Protect the target's credential calculations against our
* interference; SUID, SGID and LSM creds get determined differently
* under ptrace.
*/
retval = mutex_lock_interruptible(&task->cred_exec_mutex);
retval = mutex_lock_interruptible(&task->cred_guard_mutex);
if (retval < 0)
goto out;
@@ -232,7 +223,7 @@ repeat:
bad:
write_unlock_irqrestore(&tasklist_lock, flags);
task_unlock(task);
mutex_unlock(&task->cred_exec_mutex);
mutex_unlock(&task->cred_guard_mutex);
out:
return retval;
}
@@ -304,6 +295,8 @@ int ptrace_detach(struct task_struct *child, unsigned int data)
if (child->ptrace) {
child->exit_code = data;
dead = __ptrace_detach(current, child);
if (!child->exit_state)
wake_up_process(child);
}
write_unlock_irq(&tasklist_lock);

View File

@@ -1356,17 +1356,11 @@ static int rcu_sched_grace_period(void *arg)
rcu_ctrlblk.sched_sleep = rcu_sched_sleeping;
spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
ret = 0;
ret = 0; /* unused */
__wait_event_interruptible(rcu_ctrlblk.sched_wq,
rcu_ctrlblk.sched_sleep != rcu_sched_sleeping,
ret);
/*
* Signals would prevent us from sleeping, and we cannot
* do much with them in any case. So flush them.
*/
if (ret)
flush_signals(current);
couldsleepnext = 0;
} while (!kthread_should_stop());

View File

@@ -1259,31 +1259,44 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
check_cpu_stall(rsp, rdp);
/* Is the RCU core waiting for a quiescent state from this CPU? */
if (rdp->qs_pending)
if (rdp->qs_pending) {
rdp->n_rp_qs_pending++;
return 1;
}
/* Does this CPU have callbacks ready to invoke? */
if (cpu_has_callbacks_ready_to_invoke(rdp))
if (cpu_has_callbacks_ready_to_invoke(rdp)) {
rdp->n_rp_cb_ready++;
return 1;
}
/* Has RCU gone idle with this CPU needing another grace period? */
if (cpu_needs_another_gp(rsp, rdp))
if (cpu_needs_another_gp(rsp, rdp)) {
rdp->n_rp_cpu_needs_gp++;
return 1;
}
/* Has another RCU grace period completed? */
if (ACCESS_ONCE(rsp->completed) != rdp->completed) /* outside of lock */
if (ACCESS_ONCE(rsp->completed) != rdp->completed) { /* outside lock */
rdp->n_rp_gp_completed++;
return 1;
}
/* Has a new RCU grace period started? */
if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) /* outside of lock */
if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) { /* outside lock */
rdp->n_rp_gp_started++;
return 1;
}
/* Has an RCU GP gone long enough to send resched IPIs &c? */
if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) &&
((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0))
((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) {
rdp->n_rp_need_fqs++;
return 1;
}
/* nothing to do */
rdp->n_rp_need_nothing++;
return 0;
}

View File

@@ -213,7 +213,63 @@ static struct file_operations rcugp_fops = {
.release = single_release,
};
static struct dentry *rcudir, *datadir, *datadir_csv, *hierdir, *gpdir;
static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
{
seq_printf(m, "%3d%cnp=%ld "
"qsp=%ld cbr=%ld cng=%ld gpc=%ld gps=%ld nf=%ld nn=%ld\n",
rdp->cpu,
cpu_is_offline(rdp->cpu) ? '!' : ' ',
rdp->n_rcu_pending,
rdp->n_rp_qs_pending,
rdp->n_rp_cb_ready,
rdp->n_rp_cpu_needs_gp,
rdp->n_rp_gp_completed,
rdp->n_rp_gp_started,
rdp->n_rp_need_fqs,
rdp->n_rp_need_nothing);
}
static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp)
{
int cpu;
struct rcu_data *rdp;
for_each_possible_cpu(cpu) {
rdp = rsp->rda[cpu];
if (rdp->beenonline)
print_one_rcu_pending(m, rdp);
}
}
static int show_rcu_pending(struct seq_file *m, void *unused)
{
seq_puts(m, "rcu:\n");
print_rcu_pendings(m, &rcu_state);
seq_puts(m, "rcu_bh:\n");
print_rcu_pendings(m, &rcu_bh_state);
return 0;
}
static int rcu_pending_open(struct inode *inode, struct file *file)
{
return single_open(file, show_rcu_pending, NULL);
}
static struct file_operations rcu_pending_fops = {
.owner = THIS_MODULE,
.open = rcu_pending_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static struct dentry *rcudir;
static struct dentry *datadir;
static struct dentry *datadir_csv;
static struct dentry *gpdir;
static struct dentry *hierdir;
static struct dentry *rcu_pendingdir;
static int __init rcuclassic_trace_init(void)
{
rcudir = debugfs_create_dir("rcu", NULL);
@@ -238,6 +294,11 @@ static int __init rcuclassic_trace_init(void)
NULL, &rcuhier_fops);
if (!hierdir)
goto free_out;
rcu_pendingdir = debugfs_create_file("rcu_pending", 0444, rcudir,
NULL, &rcu_pending_fops);
if (!rcu_pendingdir)
goto free_out;
return 0;
free_out:
if (datadir)
@@ -257,6 +318,7 @@ static void __exit rcuclassic_trace_cleanup(void)
debugfs_remove(datadir_csv);
debugfs_remove(gpdir);
debugfs_remove(hierdir);
debugfs_remove(rcu_pendingdir);
debugfs_remove(rcudir);
}

View File

@@ -300,7 +300,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
* assigned pending owner [which might not have taken the
* lock yet]:
*/
static inline int try_to_steal_lock(struct rt_mutex *lock)
static inline int try_to_steal_lock(struct rt_mutex *lock,
struct task_struct *task)
{
struct task_struct *pendowner = rt_mutex_owner(lock);
struct rt_mutex_waiter *next;
@@ -309,11 +310,11 @@ static inline int try_to_steal_lock(struct rt_mutex *lock)
if (!rt_mutex_owner_pending(lock))
return 0;
if (pendowner == current)
if (pendowner == task)
return 1;
spin_lock_irqsave(&pendowner->pi_lock, flags);
if (current->prio >= pendowner->prio) {
if (task->prio >= pendowner->prio) {
spin_unlock_irqrestore(&pendowner->pi_lock, flags);
return 0;
}
@@ -338,21 +339,21 @@ static inline int try_to_steal_lock(struct rt_mutex *lock)
* We are going to steal the lock and a waiter was
* enqueued on the pending owners pi_waiters queue. So
* we have to enqueue this waiter into
* current->pi_waiters list. This covers the case,
* where current is boosted because it holds another
* task->pi_waiters list. This covers the case,
* where task is boosted because it holds another
* lock and gets unboosted because the booster is
* interrupted, so we would delay a waiter with higher
* priority as current->normal_prio.
* priority as task->normal_prio.
*
* Note: in the rare case of a SCHED_OTHER task changing
* its priority and thus stealing the lock, next->task
* might be current:
* might be task:
*/
if (likely(next->task != current)) {
spin_lock_irqsave(&current->pi_lock, flags);
plist_add(&next->pi_list_entry, &current->pi_waiters);
__rt_mutex_adjust_prio(current);
spin_unlock_irqrestore(&current->pi_lock, flags);
if (likely(next->task != task)) {
spin_lock_irqsave(&task->pi_lock, flags);
plist_add(&next->pi_list_entry, &task->pi_waiters);
__rt_mutex_adjust_prio(task);
spin_unlock_irqrestore(&task->pi_lock, flags);
}
return 1;
}
@@ -389,7 +390,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock)
*/
mark_rt_mutex_waiters(lock);
if (rt_mutex_owner(lock) && !try_to_steal_lock(lock))
if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current))
return 0;
/* We got the lock. */
@@ -411,6 +412,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock)
*/
static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter,
struct task_struct *task,
int detect_deadlock)
{
struct task_struct *owner = rt_mutex_owner(lock);
@@ -418,21 +420,21 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
unsigned long flags;
int chain_walk = 0, res;
spin_lock_irqsave(&current->pi_lock, flags);
__rt_mutex_adjust_prio(current);
waiter->task = current;
spin_lock_irqsave(&task->pi_lock, flags);
__rt_mutex_adjust_prio(task);
waiter->task = task;
waiter->lock = lock;
plist_node_init(&waiter->list_entry, current->prio);
plist_node_init(&waiter->pi_list_entry, current->prio);
plist_node_init(&waiter->list_entry, task->prio);
plist_node_init(&waiter->pi_list_entry, task->prio);
/* Get the top priority waiter on the lock */
if (rt_mutex_has_waiters(lock))
top_waiter = rt_mutex_top_waiter(lock);
plist_add(&waiter->list_entry, &lock->wait_list);
current->pi_blocked_on = waiter;
task->pi_blocked_on = waiter;
spin_unlock_irqrestore(&current->pi_lock, flags);
spin_unlock_irqrestore(&task->pi_lock, flags);
if (waiter == rt_mutex_top_waiter(lock)) {
spin_lock_irqsave(&owner->pi_lock, flags);
@@ -460,7 +462,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
spin_unlock(&lock->wait_lock);
res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
current);
task);
spin_lock(&lock->wait_lock);
@@ -605,6 +607,85 @@ void rt_mutex_adjust_pi(struct task_struct *task)
rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
}
/**
* __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
* @lock: the rt_mutex to take
* @state: the state the task should block in (TASK_INTERRUPTIBLE
* or TASK_UNINTERRUPTIBLE)
* @timeout: the pre-initialized and started timer, or NULL for none
* @waiter: the pre-initialized rt_mutex_waiter
* @detect_deadlock: passed to task_blocks_on_rt_mutex
*
* lock->wait_lock must be held by the caller.
*/
static int __sched
__rt_mutex_slowlock(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
struct rt_mutex_waiter *waiter,
int detect_deadlock)
{
int ret = 0;
for (;;) {
/* Try to acquire the lock: */
if (try_to_take_rt_mutex(lock))
break;
/*
* TASK_INTERRUPTIBLE checks for signals and
* timeout. Ignored otherwise.
*/
if (unlikely(state == TASK_INTERRUPTIBLE)) {
/* Signal pending? */
if (signal_pending(current))
ret = -EINTR;
if (timeout && !timeout->task)
ret = -ETIMEDOUT;
if (ret)
break;
}
/*
* waiter->task is NULL the first time we come here and
* when we have been woken up by the previous owner
* but the lock got stolen by a higher prio task.
*/
if (!waiter->task) {
ret = task_blocks_on_rt_mutex(lock, waiter, current,
detect_deadlock);
/*
* If we got woken up by the owner then start loop
* all over without going into schedule to try
* to get the lock now:
*/
if (unlikely(!waiter->task)) {
/*
* Reset the return value. We might
* have returned with -EDEADLK and the
* owner released the lock while we
* were walking the pi chain.
*/
ret = 0;
continue;
}
if (unlikely(ret))
break;
}
spin_unlock(&lock->wait_lock);
debug_rt_mutex_print_deadlock(waiter);
if (waiter->task)
schedule_rt_mutex(lock);
spin_lock(&lock->wait_lock);
set_current_state(state);
}
return ret;
}
/*
* Slow path lock function:
*/
@@ -636,62 +717,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
timeout->task = NULL;
}
for (;;) {
/* Try to acquire the lock: */
if (try_to_take_rt_mutex(lock))
break;
/*
* TASK_INTERRUPTIBLE checks for signals and
* timeout. Ignored otherwise.
*/
if (unlikely(state == TASK_INTERRUPTIBLE)) {
/* Signal pending? */
if (signal_pending(current))
ret = -EINTR;
if (timeout && !timeout->task)
ret = -ETIMEDOUT;
if (ret)
break;
}
/*
* waiter.task is NULL the first time we come here and
* when we have been woken up by the previous owner
* but the lock got stolen by a higher prio task.
*/
if (!waiter.task) {
ret = task_blocks_on_rt_mutex(lock, &waiter,
detect_deadlock);
/*
* If we got woken up by the owner then start loop
* all over without going into schedule to try
* to get the lock now:
*/
if (unlikely(!waiter.task)) {
/*
* Reset the return value. We might
* have returned with -EDEADLK and the
* owner released the lock while we
* were walking the pi chain.
*/
ret = 0;
continue;
}
if (unlikely(ret))
break;
}
spin_unlock(&lock->wait_lock);
debug_rt_mutex_print_deadlock(&waiter);
if (waiter.task)
schedule_rt_mutex(lock);
spin_lock(&lock->wait_lock);
set_current_state(state);
}
ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
detect_deadlock);
set_current_state(TASK_RUNNING);
@@ -864,9 +891,9 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock,
EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
/**
* rt_mutex_lock_interruptible_ktime - lock a rt_mutex interruptible
* the timeout structure is provided
* by the caller
* rt_mutex_timed_lock - lock a rt_mutex interruptible
* the timeout structure is provided
* by the caller
*
* @lock: the rt_mutex to be locked
* @timeout: timeout structure or NULL (no timeout)
@@ -913,7 +940,7 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock)
}
EXPORT_SYMBOL_GPL(rt_mutex_unlock);
/***
/**
* rt_mutex_destroy - mark a mutex unusable
* @lock: the mutex to be destroyed
*
@@ -985,6 +1012,59 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
rt_mutex_deadlock_account_unlock(proxy_owner);
}
/**
* rt_mutex_start_proxy_lock() - Start lock acquisition for another task
* @lock: the rt_mutex to take
* @waiter: the pre-initialized rt_mutex_waiter
* @task: the task to prepare
* @detect_deadlock: perform deadlock detection (1) or not (0)
*
* Returns:
* 0 - task blocked on lock
* 1 - acquired the lock for task, caller should wake it up
* <0 - error
*
* Special API call for FUTEX_REQUEUE_PI support.
*/
int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter,
struct task_struct *task, int detect_deadlock)
{
int ret;
spin_lock(&lock->wait_lock);
mark_rt_mutex_waiters(lock);
if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) {
/* We got the lock for task. */
debug_rt_mutex_lock(lock);
rt_mutex_set_owner(lock, task, 0);
rt_mutex_deadlock_account_lock(lock, task);
return 1;
}
ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
if (ret && !waiter->task) {
/*
* Reset the return value. We might have
* returned with -EDEADLK and the owner
* released the lock while we were walking the
* pi chain. Let the waiter sort it out.
*/
ret = 0;
}
spin_unlock(&lock->wait_lock);
debug_rt_mutex_print_deadlock(waiter);
return ret;
}
/**
* rt_mutex_next_owner - return the next owner of the lock
*
@@ -1004,3 +1084,57 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
return rt_mutex_top_waiter(lock)->task;
}
/**
* rt_mutex_finish_proxy_lock() - Complete lock acquisition
* @lock: the rt_mutex we were woken on
* @to: the timeout, null if none. hrtimer should already have
* been started.
* @waiter: the pre-initialized rt_mutex_waiter
* @detect_deadlock: perform deadlock detection (1) or not (0)
*
* Complete the lock acquisition started our behalf by another thread.
*
* Returns:
* 0 - success
* <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK
*
* Special API call for PI-futex requeue support
*/
int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
struct hrtimer_sleeper *to,
struct rt_mutex_waiter *waiter,
int detect_deadlock)
{
int ret;
spin_lock(&lock->wait_lock);
set_current_state(TASK_INTERRUPTIBLE);
ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter,
detect_deadlock);
set_current_state(TASK_RUNNING);
if (unlikely(waiter->task))
remove_waiter(lock, waiter);
/*
* try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
* have to fix that up.
*/
fixup_rt_mutex_waiters(lock);
spin_unlock(&lock->wait_lock);
/*
* Readjust priority, when we did not get the lock. We might have been
* the pending owner and boosted. Since we did not take the lock, the
* PI boost has to go.
*/
if (unlikely(ret))
rt_mutex_adjust_prio(current);
return ret;
}

View File

@@ -120,6 +120,14 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
struct task_struct *proxy_owner);
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
struct task_struct *proxy_owner);
extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter,
struct task_struct *task,
int detect_deadlock);
extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
struct hrtimer_sleeper *to,
struct rt_mutex_waiter *waiter,
int detect_deadlock);
#ifdef CONFIG_DEBUG_RT_MUTEXES
# include "rtmutex-debug.h"

View File

@@ -39,6 +39,7 @@
#include <linux/completion.h>
#include <linux/kernel_stat.h>
#include <linux/debug_locks.h>
#include <linux/perf_counter.h>
#include <linux/security.h>
#include <linux/notifier.h>
#include <linux/profile.h>
@@ -68,17 +69,18 @@
#include <linux/pagemap.h>
#include <linux/hrtimer.h>
#include <linux/tick.h>
#include <linux/bootmem.h>
#include <linux/debugfs.h>
#include <linux/ctype.h>
#include <linux/ftrace.h>
#include <trace/sched.h>
#include <asm/tlb.h>
#include <asm/irq_regs.h>
#include "sched_cpupri.h"
#define CREATE_TRACE_POINTS
#include <trace/events/sched.h>
/*
* Convert user-nice values [ -20 ... 0 ... 19 ]
* to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
@@ -118,12 +120,6 @@
*/
#define RUNTIME_INF ((u64)~0ULL)
DEFINE_TRACE(sched_wait_task);
DEFINE_TRACE(sched_wakeup);
DEFINE_TRACE(sched_wakeup_new);
DEFINE_TRACE(sched_switch);
DEFINE_TRACE(sched_migrate_task);
#ifdef CONFIG_SMP
static void double_rq_lock(struct rq *rq1, struct rq *rq2);
@@ -584,6 +580,7 @@ struct rq {
struct load_weight load;
unsigned long nr_load_updates;
u64 nr_switches;
u64 nr_migrations_in;
struct cfs_rq cfs;
struct rt_rq rt;
@@ -630,6 +627,10 @@ struct rq {
struct list_head migration_queue;
#endif
/* calc_load related fields */
unsigned long calc_load_update;
long calc_load_active;
#ifdef CONFIG_SCHED_HRTICK
#ifdef CONFIG_SMP
int hrtick_csd_pending;
@@ -692,7 +693,7 @@ static inline int cpu_of(struct rq *rq)
#define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
static inline void update_rq_clock(struct rq *rq)
inline void update_rq_clock(struct rq *rq)
{
rq->clock = sched_clock_cpu(cpu_of(rq));
}
@@ -1728,6 +1729,8 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
}
#endif
static void calc_load_account_active(struct rq *this_rq);
#include "sched_stats.h"
#include "sched_idletask.c"
#include "sched_fair.c"
@@ -1958,7 +1961,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
clock_offset = old_rq->clock - new_rq->clock;
trace_sched_migrate_task(p, task_cpu(p), new_cpu);
trace_sched_migrate_task(p, new_cpu);
#ifdef CONFIG_SCHEDSTATS
if (p->se.wait_start)
@@ -1967,12 +1970,16 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
p->se.sleep_start -= clock_offset;
if (p->se.block_start)
p->se.block_start -= clock_offset;
#endif
if (old_cpu != new_cpu) {
schedstat_inc(p, se.nr_migrations);
p->se.nr_migrations++;
new_rq->nr_migrations_in++;
#ifdef CONFIG_SCHEDSTATS
if (task_hot(p, old_rq->clock, NULL))
schedstat_inc(p, se.nr_forced2_migrations);
}
#endif
perf_counter_task_migration(p, new_cpu);
}
p->se.vruntime -= old_cfsrq->min_vruntime -
new_cfsrq->min_vruntime;
@@ -2014,6 +2021,49 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
return 1;
}
/*
* wait_task_context_switch - wait for a thread to complete at least one
* context switch.
*
* @p must not be current.
*/
void wait_task_context_switch(struct task_struct *p)
{
unsigned long nvcsw, nivcsw, flags;
int running;
struct rq *rq;
nvcsw = p->nvcsw;
nivcsw = p->nivcsw;
for (;;) {
/*
* The runqueue is assigned before the actual context
* switch. We need to take the runqueue lock.
*
* We could check initially without the lock but it is
* very likely that we need to take the lock in every
* iteration.
*/
rq = task_rq_lock(p, &flags);
running = task_running(rq, p);
task_rq_unlock(rq, &flags);
if (likely(!running))
break;
/*
* The switch count is incremented before the actual
* context switch. We thus wait for two switches to be
* sure at least one completed.
*/
if ((p->nvcsw - nvcsw) > 1)
break;
if ((p->nivcsw - nivcsw) > 1)
break;
cpu_relax();
}
}
/*
* wait_task_inactive - wait for a thread to unschedule.
*
@@ -2324,6 +2374,27 @@ static int sched_balance_self(int cpu, int flag)
#endif /* CONFIG_SMP */
/**
* task_oncpu_function_call - call a function on the cpu on which a task runs
* @p: the task to evaluate
* @func: the function to be called
* @info: the function call argument
*
* Calls the function @func when the task is currently running. This might
* be on the current CPU, which just calls the function directly
*/
void task_oncpu_function_call(struct task_struct *p,
void (*func) (void *info), void *info)
{
int cpu;
preempt_disable();
cpu = task_cpu(p);
if (task_curr(p))
smp_call_function_single(cpu, func, info, 1);
preempt_enable();
}
/***
* try_to_wake_up - wake up a thread
* @p: the to-be-woken-up thread
@@ -2458,6 +2529,17 @@ out:
return success;
}
/**
* wake_up_process - Wake up a specific process
* @p: The process to be woken up.
*
* Attempt to wake up the nominated process and move it to the set of runnable
* processes. Returns 1 if the process was woken up, 0 if it was already
* running.
*
* It may be assumed that this function implies a write memory barrier before
* changing the task state if and only if any tasks are woken up.
*/
int wake_up_process(struct task_struct *p)
{
return try_to_wake_up(p, TASK_ALL, 0);
@@ -2480,6 +2562,7 @@ static void __sched_fork(struct task_struct *p)
p->se.exec_start = 0;
p->se.sum_exec_runtime = 0;
p->se.prev_sum_exec_runtime = 0;
p->se.nr_migrations = 0;
p->se.last_wakeup = 0;
p->se.avg_overlap = 0;
p->se.start_runtime = 0;
@@ -2710,6 +2793,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
*/
prev_state = prev->state;
finish_arch_switch(prev);
perf_counter_task_sched_in(current, cpu_of(rq));
finish_lock_switch(rq, prev);
#ifdef CONFIG_SMP
if (post_schedule)
@@ -2766,7 +2850,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
* combine the page table reload and the switch backend into
* one hypercall.
*/
arch_enter_lazy_cpu_mode();
arch_start_context_switch(prev);
if (unlikely(!mm)) {
next->active_mm = oldmm;
@@ -2856,19 +2940,81 @@ unsigned long nr_iowait(void)
return sum;
}
unsigned long nr_active(void)
/* Variables and functions for calc_load */
static atomic_long_t calc_load_tasks;
static unsigned long calc_load_update;
unsigned long avenrun[3];
EXPORT_SYMBOL(avenrun);
/**
* get_avenrun - get the load average array
* @loads: pointer to dest load array
* @offset: offset to add
* @shift: shift count to shift the result left
*
* These values are estimates at best, so no need for locking.
*/
void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
{
unsigned long i, running = 0, uninterruptible = 0;
loads[0] = (avenrun[0] + offset) << shift;
loads[1] = (avenrun[1] + offset) << shift;
loads[2] = (avenrun[2] + offset) << shift;
}
for_each_online_cpu(i) {
running += cpu_rq(i)->nr_running;
uninterruptible += cpu_rq(i)->nr_uninterruptible;
static unsigned long
calc_load(unsigned long load, unsigned long exp, unsigned long active)
{
load *= exp;
load += active * (FIXED_1 - exp);
return load >> FSHIFT;
}
/*
* calc_load - update the avenrun load estimates 10 ticks after the
* CPUs have updated calc_load_tasks.
*/
void calc_global_load(void)
{
unsigned long upd = calc_load_update + 10;
long active;
if (time_before(jiffies, upd))
return;
active = atomic_long_read(&calc_load_tasks);
active = active > 0 ? active * FIXED_1 : 0;
avenrun[0] = calc_load(avenrun[0], EXP_1, active);
avenrun[1] = calc_load(avenrun[1], EXP_5, active);
avenrun[2] = calc_load(avenrun[2], EXP_15, active);
calc_load_update += LOAD_FREQ;
}
/*
* Either called from update_cpu_load() or from a cpu going idle
*/
static void calc_load_account_active(struct rq *this_rq)
{
long nr_active, delta;
nr_active = this_rq->nr_running;
nr_active += (long) this_rq->nr_uninterruptible;
if (nr_active != this_rq->calc_load_active) {
delta = nr_active - this_rq->calc_load_active;
this_rq->calc_load_active = nr_active;
atomic_long_add(delta, &calc_load_tasks);
}
}
if (unlikely((long)uninterruptible < 0))
uninterruptible = 0;
return running + uninterruptible;
/*
* Externally visible per-cpu scheduler statistics:
* cpu_nr_migrations(cpu) - number of migrations into that cpu
*/
u64 cpu_nr_migrations(int cpu)
{
return cpu_rq(cpu)->nr_migrations_in;
}
/*
@@ -2899,6 +3045,11 @@ static void update_cpu_load(struct rq *this_rq)
new_load += scale-1;
this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i;
}
if (time_after_eq(jiffies, this_rq->calc_load_update)) {
this_rq->calc_load_update += LOAD_FREQ;
calc_load_account_active(this_rq);
}
}
#ifdef CONFIG_SMP
@@ -4240,10 +4391,126 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
static struct {
atomic_t load_balancer;
cpumask_var_t cpu_mask;
cpumask_var_t ilb_grp_nohz_mask;
} nohz ____cacheline_aligned = {
.load_balancer = ATOMIC_INIT(-1),
};
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
/**
* lowest_flag_domain - Return lowest sched_domain containing flag.
* @cpu: The cpu whose lowest level of sched domain is to
* be returned.
* @flag: The flag to check for the lowest sched_domain
* for the given cpu.
*
* Returns the lowest sched_domain of a cpu which contains the given flag.
*/
static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
{
struct sched_domain *sd;
for_each_domain(cpu, sd)
if (sd && (sd->flags & flag))
break;
return sd;
}
/**
* for_each_flag_domain - Iterates over sched_domains containing the flag.
* @cpu: The cpu whose domains we're iterating over.
* @sd: variable holding the value of the power_savings_sd
* for cpu.
* @flag: The flag to filter the sched_domains to be iterated.
*
* Iterates over all the scheduler domains for a given cpu that has the 'flag'
* set, starting from the lowest sched_domain to the highest.
*/
#define for_each_flag_domain(cpu, sd, flag) \
for (sd = lowest_flag_domain(cpu, flag); \
(sd && (sd->flags & flag)); sd = sd->parent)
/**
* is_semi_idle_group - Checks if the given sched_group is semi-idle.
* @ilb_group: group to be checked for semi-idleness
*
* Returns: 1 if the group is semi-idle. 0 otherwise.
*
* We define a sched_group to be semi idle if it has atleast one idle-CPU
* and atleast one non-idle CPU. This helper function checks if the given
* sched_group is semi-idle or not.
*/
static inline int is_semi_idle_group(struct sched_group *ilb_group)
{
cpumask_and(nohz.ilb_grp_nohz_mask, nohz.cpu_mask,
sched_group_cpus(ilb_group));
/*
* A sched_group is semi-idle when it has atleast one busy cpu
* and atleast one idle cpu.
*/
if (cpumask_empty(nohz.ilb_grp_nohz_mask))
return 0;
if (cpumask_equal(nohz.ilb_grp_nohz_mask, sched_group_cpus(ilb_group)))
return 0;
return 1;
}
/**
* find_new_ilb - Finds the optimum idle load balancer for nomination.
* @cpu: The cpu which is nominating a new idle_load_balancer.
*
* Returns: Returns the id of the idle load balancer if it exists,
* Else, returns >= nr_cpu_ids.
*
* This algorithm picks the idle load balancer such that it belongs to a
* semi-idle powersavings sched_domain. The idea is to try and avoid
* completely idle packages/cores just for the purpose of idle load balancing
* when there are other idle cpu's which are better suited for that job.
*/
static int find_new_ilb(int cpu)
{
struct sched_domain *sd;
struct sched_group *ilb_group;
/*
* Have idle load balancer selection from semi-idle packages only
* when power-aware load balancing is enabled
*/
if (!(sched_smt_power_savings || sched_mc_power_savings))
goto out_done;
/*
* Optimize for the case when we have no idle CPUs or only one
* idle CPU. Don't walk the sched_domain hierarchy in such cases
*/
if (cpumask_weight(nohz.cpu_mask) < 2)
goto out_done;
for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) {
ilb_group = sd->groups;
do {
if (is_semi_idle_group(ilb_group))
return cpumask_first(nohz.ilb_grp_nohz_mask);
ilb_group = ilb_group->next;
} while (ilb_group != sd->groups);
}
out_done:
return cpumask_first(nohz.cpu_mask);
}
#else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
static inline int find_new_ilb(int call_cpu)
{
return cpumask_first(nohz.cpu_mask);
}
#endif
/*
* This routine will try to nominate the ilb (idle load balancing)
* owner among the cpus whose ticks are stopped. ilb owner will do the idle
@@ -4298,8 +4565,24 @@ int select_nohz_load_balancer(int stop_tick)
/* make me the ilb owner */
if (atomic_cmpxchg(&nohz.load_balancer, -1, cpu) == -1)
return 1;
} else if (atomic_read(&nohz.load_balancer) == cpu)
} else if (atomic_read(&nohz.load_balancer) == cpu) {
int new_ilb;
if (!(sched_smt_power_savings ||
sched_mc_power_savings))
return 1;
/*
* Check to see if there is a more power-efficient
* ilb.
*/
new_ilb = find_new_ilb(cpu);
if (new_ilb < nr_cpu_ids && new_ilb != cpu) {
atomic_set(&nohz.load_balancer, -1);
resched_cpu(new_ilb);
return 0;
}
return 1;
}
} else {
if (!cpumask_test_cpu(cpu, nohz.cpu_mask))
return 0;
@@ -4468,15 +4751,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
}
if (atomic_read(&nohz.load_balancer) == -1) {
/*
* simple selection for now: Nominate the
* first cpu in the nohz list to be the next
* ilb owner.
*
* TBD: Traverse the sched domains and nominate
* the nearest cpu in the nohz.cpu_mask.
*/
int ilb = cpumask_first(nohz.cpu_mask);
int ilb = find_new_ilb(cpu);
if (ilb < nr_cpu_ids)
resched_cpu(ilb);
@@ -4840,6 +5115,8 @@ void scheduler_tick(void)
curr->sched_class->task_tick(rq, curr, 0);
spin_unlock(&rq->lock);
perf_counter_task_tick(curr, cpu);
#ifdef CONFIG_SMP
rq->idle_at_tick = idle_cpu(cpu);
trigger_load_balance(rq, cpu);
@@ -5007,13 +5284,15 @@ pick_next_task(struct rq *rq)
/*
* schedule() is the main scheduler function.
*/
asmlinkage void __sched __schedule(void)
asmlinkage void __sched schedule(void)
{
struct task_struct *prev, *next;
unsigned long *switch_count;
struct rq *rq;
int cpu;
need_resched:
preempt_disable();
cpu = smp_processor_id();
rq = cpu_rq(cpu);
rcu_qsctr_inc(cpu);
@@ -5053,6 +5332,7 @@ need_resched_nonpreemptible:
if (likely(prev != next)) {
sched_info_switch(prev, next);
perf_counter_task_sched_out(prev, next, cpu);
rq->nr_switches++;
rq->curr = next;
@@ -5070,15 +5350,9 @@ need_resched_nonpreemptible:
if (unlikely(reacquire_kernel_lock(current) < 0))
goto need_resched_nonpreemptible;
}
asmlinkage void __sched schedule(void)
{
need_resched:
preempt_disable();
__schedule();
preempt_enable_no_resched();
if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
if (need_resched())
goto need_resched;
}
EXPORT_SYMBOL(schedule);
@@ -5221,7 +5495,7 @@ EXPORT_SYMBOL(default_wake_function);
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
* zero in this (rare) case, and we handle it by continuing to scan the queue.
*/
void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, int sync, void *key)
{
wait_queue_t *curr, *next;
@@ -5241,6 +5515,9 @@ void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
* @mode: which threads
* @nr_exclusive: how many wake-one or wake-many threads to wake up
* @key: is directly passed to the wakeup function
*
* It may be assumed that this function implies a write memory barrier before
* changing the task state if and only if any tasks are woken up.
*/
void __wake_up(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, void *key)
@@ -5279,6 +5556,9 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
* with each other. This can prevent needless bouncing between CPUs.
*
* On UP it can prevent extra preemption.
*
* It may be assumed that this function implies a write memory barrier before
* changing the task state if and only if any tasks are woken up.
*/
void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, void *key)
@@ -5315,6 +5595,9 @@ EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
* awakened in the same order in which they were queued.
*
* See also complete_all(), wait_for_completion() and related routines.
*
* It may be assumed that this function implies a write memory barrier before
* changing the task state if and only if any tasks are woken up.
*/
void complete(struct completion *x)
{
@@ -5332,6 +5615,9 @@ EXPORT_SYMBOL(complete);
* @x: holds the state of this particular completion
*
* This will wake up all threads waiting on this particular completion event.
*
* It may be assumed that this function implies a write memory barrier before
* changing the task state if and only if any tasks are woken up.
*/
void complete_all(struct completion *x)
{
@@ -6490,8 +6776,9 @@ void sched_show_task(struct task_struct *p)
#ifdef CONFIG_DEBUG_STACK_USAGE
free = stack_not_used(p);
#endif
printk(KERN_CONT "%5lu %5d %6d\n", free,
task_pid_nr(p), task_pid_nr(p->real_parent));
printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
task_pid_nr(p), task_pid_nr(p->real_parent),
(unsigned long)task_thread_info(p)->flags);
show_stack(p, NULL);
}
@@ -6970,6 +7257,14 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
}
}
/*
* remove the tasks which were accounted by rq from calc_load_tasks.
*/
static void calc_global_load_remove(struct rq *rq)
{
atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
}
#endif /* CONFIG_HOTPLUG_CPU */
#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
@@ -7204,6 +7499,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
/* Update our root-domain */
rq = cpu_rq(cpu);
spin_lock_irqsave(&rq->lock, flags);
rq->calc_load_update = calc_load_update;
rq->calc_load_active = 0;
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
@@ -7243,7 +7540,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
cpuset_unlock();
migrate_nr_uninterruptible(rq);
BUG_ON(rq->nr_running != 0);
calc_global_load_remove(rq);
/*
* No need to migrate the tasks: it was best-effort if
* they didn't take sched_hotcpu_mutex. Just wake up
@@ -7279,8 +7576,10 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
return NOTIFY_OK;
}
/* Register at highest priority so that task migration (migrate_all_tasks)
* happens before everything else.
/*
* Register at high priority so that task migration (migrate_all_tasks)
* happens before everything else. This has to be lower priority than
* the notifier in the perf_counter subsystem, though.
*/
static struct notifier_block __cpuinitdata migration_notifier = {
.notifier_call = migration_call,
@@ -7525,24 +7824,21 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem)
{
gfp_t gfp = GFP_KERNEL;
memset(rd, 0, sizeof(*rd));
if (bootmem) {
alloc_bootmem_cpumask_var(&def_root_domain.span);
alloc_bootmem_cpumask_var(&def_root_domain.online);
alloc_bootmem_cpumask_var(&def_root_domain.rto_mask);
cpupri_init(&rd->cpupri, true);
return 0;
}
if (bootmem)
gfp = GFP_NOWAIT;
if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
if (!alloc_cpumask_var(&rd->span, gfp))
goto out;
if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
if (!alloc_cpumask_var(&rd->online, gfp))
goto free_span;
if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
if (!alloc_cpumask_var(&rd->rto_mask, gfp))
goto free_online;
if (cpupri_init(&rd->cpupri, false) != 0)
if (cpupri_init(&rd->cpupri, bootmem) != 0)
goto free_rto_mask;
return 0;
@@ -7753,8 +8049,9 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
/*
* The cpus mask in sched_group and sched_domain hangs off the end.
* FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space
* for nr_cpu_ids < CONFIG_NR_CPUS.
*
* ( See the the comments in include/linux/sched.h:struct sched_group
* and struct sched_domain. )
*/
struct static_sched_group {
struct sched_group sg;
@@ -7875,7 +8172,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
struct sched_domain *sd;
sd = &per_cpu(phys_domains, j).sd;
if (j != cpumask_first(sched_group_cpus(sd->groups))) {
if (j != group_first_cpu(sd->groups)) {
/*
* Only add "power" once for each
* physical package.
@@ -7953,7 +8250,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
WARN_ON(!sd || !sd->groups);
if (cpu != cpumask_first(sched_group_cpus(sd->groups)))
if (cpu != group_first_cpu(sd->groups))
return;
child = sd->child;
@@ -8865,7 +9162,7 @@ void __init sched_init(void)
* we use alloc_bootmem().
*/
if (alloc_size) {
ptr = (unsigned long)alloc_bootmem(alloc_size);
ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);
#ifdef CONFIG_FAIR_GROUP_SCHED
init_task_group.se = (struct sched_entity **)ptr;
@@ -8938,6 +9235,8 @@ void __init sched_init(void)
rq = cpu_rq(i);
spin_lock_init(&rq->lock);
rq->nr_running = 0;
rq->calc_load_active = 0;
rq->calc_load_update = jiffies + LOAD_FREQ;
init_cfs_rq(&rq->cfs, rq);
init_rt_rq(&rq->rt, rq);
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -8958,7 +9257,7 @@ void __init sched_init(void)
* 1024) and two child groups A0 and A1 (of weight 1024 each),
* then A0's share of the cpu resource is:
*
* A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
* A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
*
* We achieve this by letting init_task_group's tasks sit
* directly in rq->cfs (i.e init_task_group->se[] = NULL).
@@ -9045,20 +9344,26 @@ void __init sched_init(void)
* when this runqueue becomes "idle".
*/
init_idle(current, smp_processor_id());
calc_load_update = jiffies + LOAD_FREQ;
/*
* During early bootup we pretend to be a normal task:
*/
current->sched_class = &fair_sched_class;
/* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
alloc_bootmem_cpumask_var(&nohz_cpu_mask);
alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
#ifdef CONFIG_SMP
#ifdef CONFIG_NO_HZ
alloc_bootmem_cpumask_var(&nohz.cpu_mask);
alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);
alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT);
#endif
alloc_bootmem_cpumask_var(&cpu_isolated_map);
alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
#endif /* SMP */
perf_counter_init();
scheduler_running = 1;
}
@@ -9800,6 +10105,13 @@ static int sched_rt_global_constraints(void)
if (sysctl_sched_rt_period <= 0)
return -EINVAL;
/*
* There's always some RT tasks in the root group
* -- migration, kstopmachine etc..
*/
if (sysctl_sched_rt_runtime == 0)
return -EBUSY;
spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
for_each_possible_cpu(i) {
struct rt_rq *rt_rq = &cpu_rq(i)->rt;

View File

@@ -154,8 +154,12 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
*/
int __init_refok cpupri_init(struct cpupri *cp, bool bootmem)
{
gfp_t gfp = GFP_KERNEL;
int i;
if (bootmem)
gfp = GFP_NOWAIT;
memset(cp, 0, sizeof(*cp));
for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
@@ -163,9 +167,7 @@ int __init_refok cpupri_init(struct cpupri *cp, bool bootmem)
spin_lock_init(&vec->lock);
vec->count = 0;
if (bootmem)
alloc_bootmem_cpumask_var(&vec->mask);
else if (!alloc_cpumask_var(&vec->mask, GFP_KERNEL))
if (!zalloc_cpumask_var(&vec->mask, gfp))
goto cleanup;
}

View File

@@ -1487,17 +1487,10 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
find_matching_se(&se, &pse);
while (se) {
BUG_ON(!pse);
BUG_ON(!pse);
if (wakeup_preempt_entity(se, pse) == 1) {
resched_task(curr);
break;
}
se = parent_entity(se);
pse = parent_entity(pse);
}
if (wakeup_preempt_entity(se, pse) == 1)
resched_task(curr);
}
static struct task_struct *pick_next_task_fair(struct rq *rq)

View File

@@ -22,7 +22,8 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sy
static struct task_struct *pick_next_task_idle(struct rq *rq)
{
schedstat_inc(rq, sched_goidle);
/* adjust the active tasks as we might go into a long sleep */
calc_load_account_active(rq);
return rq->idle;
}

View File

@@ -1591,7 +1591,7 @@ static inline void init_sched_rt_class(void)
unsigned int i;
for_each_possible_cpu(i)
alloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
GFP_KERNEL, cpu_to_node(i));
}
#endif /* CONFIG_SMP */

View File

@@ -27,7 +27,7 @@
#include <linux/freezer.h>
#include <linux/pid_namespace.h>
#include <linux/nsproxy.h>
#include <trace/sched.h>
#include <trace/events/sched.h>
#include <asm/param.h>
#include <asm/uaccess.h>
@@ -41,8 +41,6 @@
static struct kmem_cache *sigqueue_cachep;
DEFINE_TRACE(sched_signal_send);
static void __user *sig_handler(struct task_struct *t, int sig)
{
return t->sighand->action[sig - 1].sa.sa_handler;
@@ -249,14 +247,19 @@ void flush_sigqueue(struct sigpending *queue)
/*
* Flush all pending signals for a task.
*/
void __flush_signals(struct task_struct *t)
{
clear_tsk_thread_flag(t, TIF_SIGPENDING);
flush_sigqueue(&t->pending);
flush_sigqueue(&t->signal->shared_pending);
}
void flush_signals(struct task_struct *t)
{
unsigned long flags;
spin_lock_irqsave(&t->sighand->siglock, flags);
clear_tsk_thread_flag(t, TIF_SIGPENDING);
flush_sigqueue(&t->pending);
flush_sigqueue(&t->signal->shared_pending);
__flush_signals(t);
spin_unlock_irqrestore(&t->sighand->siglock, flags);
}
@@ -2278,24 +2281,17 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
return kill_something_info(sig, &info, pid);
}
static int do_tkill(pid_t tgid, pid_t pid, int sig)
static int
do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
{
int error;
struct siginfo info;
struct task_struct *p;
unsigned long flags;
error = -ESRCH;
info.si_signo = sig;
info.si_errno = 0;
info.si_code = SI_TKILL;
info.si_pid = task_tgid_vnr(current);
info.si_uid = current_uid();
int error = -ESRCH;
rcu_read_lock();
p = find_task_by_vpid(pid);
if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) {
error = check_kill_permission(sig, &info, p);
error = check_kill_permission(sig, info, p);
/*
* The null signal is a permissions and process existence
* probe. No signal is actually delivered.
@@ -2305,7 +2301,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig)
* signal is private anyway.
*/
if (!error && sig && lock_task_sighand(p, &flags)) {
error = specific_send_sig_info(sig, &info, p);
error = specific_send_sig_info(sig, info, p);
unlock_task_sighand(p, &flags);
}
}
@@ -2314,6 +2310,19 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig)
return error;
}
static int do_tkill(pid_t tgid, pid_t pid, int sig)
{
struct siginfo info;
info.si_signo = sig;
info.si_errno = 0;
info.si_code = SI_TKILL;
info.si_pid = task_tgid_vnr(current);
info.si_uid = current_uid();
return do_send_specific(tgid, pid, sig, &info);
}
/**
* sys_tgkill - send signal to one specific thread
* @tgid: the thread group ID of the thread
@@ -2363,6 +2372,32 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
return kill_proc_info(sig, &info, pid);
}
long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
{
/* This is only valid for single tasks */
if (pid <= 0 || tgid <= 0)
return -EINVAL;
/* Not even root can pretend to send signals from the kernel.
Nor can they impersonate a kill(), which adds source info. */
if (info->si_code >= 0)
return -EPERM;
info->si_signo = sig;
return do_send_specific(tgid, pid, sig, info);
}
SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig,
siginfo_t __user *, uinfo)
{
siginfo_t info;
if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
return -EFAULT;
return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
}
int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
{
struct task_struct *t = current;

View File

@@ -372,8 +372,8 @@ static int slow_work_thread(void *_data)
vsmax *= atomic_read(&slow_work_thread_count);
vsmax /= 100;
prepare_to_wait(&slow_work_thread_wq, &wait,
TASK_INTERRUPTIBLE);
prepare_to_wait_exclusive(&slow_work_thread_wq, &wait,
TASK_INTERRUPTIBLE);
if (!freezing(current) &&
!slow_work_threads_should_exit &&
!slow_work_available(vsmax) &&

View File

@@ -52,7 +52,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
if (!alloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
cpu_to_node(cpu)))
return NOTIFY_BAD;
break;

View File

@@ -24,7 +24,9 @@
#include <linux/ftrace.h>
#include <linux/smp.h>
#include <linux/tick.h>
#include <trace/irq.h>
#define CREATE_TRACE_POINTS
#include <trace/events/irq.h>
#include <asm/irq.h>
/*
@@ -186,9 +188,6 @@ EXPORT_SYMBOL(local_bh_enable_ip);
*/
#define MAX_SOFTIRQ_RESTART 10
DEFINE_TRACE(softirq_entry);
DEFINE_TRACE(softirq_exit);
asmlinkage void __do_softirq(void)
{
struct softirq_action *h;

View File

@@ -14,6 +14,7 @@
#include <linux/prctl.h>
#include <linux/highuid.h>
#include <linux/fs.h>
#include <linux/perf_counter.h>
#include <linux/resource.h>
#include <linux/kernel.h>
#include <linux/kexec.h>
@@ -1793,6 +1794,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_SET_TSC:
error = SET_TSC_CTL(arg2);
break;
case PR_TASK_PERF_COUNTERS_DISABLE:
error = perf_counter_task_disable();
break;
case PR_TASK_PERF_COUNTERS_ENABLE:
error = perf_counter_task_enable();
break;
case PR_GET_TIMERSLACK:
error = current->timer_slack_ns;
break;

View File

@@ -175,3 +175,6 @@ cond_syscall(compat_sys_timerfd_settime);
cond_syscall(compat_sys_timerfd_gettime);
cond_syscall(sys_eventfd);
cond_syscall(sys_eventfd2);
/* performance counters: */
cond_syscall(sys_perf_counter_open);

View File

@@ -49,6 +49,7 @@
#include <linux/reboot.h>
#include <linux/ftrace.h>
#include <linux/slow-work.h>
#include <linux/perf_counter.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
@@ -114,6 +115,7 @@ static int ngroups_max = NGROUPS_MAX;
#ifdef CONFIG_MODULES
extern char modprobe_path[];
extern int modules_disabled;
#endif
#ifdef CONFIG_CHR_DEV_SG
extern int sg_big_buff;
@@ -534,6 +536,17 @@ static struct ctl_table kern_table[] = {
.proc_handler = &proc_dostring,
.strategy = &sysctl_string,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "modules_disabled",
.data = &modules_disabled,
.maxlen = sizeof(int),
.mode = 0644,
/* only handle a transition from default "0" to "1" */
.proc_handler = &proc_dointvec_minmax,
.extra1 = &one,
.extra2 = &one,
},
#endif
#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
{
@@ -729,6 +742,14 @@ static struct ctl_table kern_table[] = {
.mode = 0444,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "bootloader_version",
.data = &bootloader_version,
.maxlen = sizeof (int),
.mode = 0444,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "kstack_depth_to_print",
@@ -912,6 +933,32 @@ static struct ctl_table kern_table[] = {
.child = slow_work_sysctls,
},
#endif
#ifdef CONFIG_PERF_COUNTERS
{
.ctl_name = CTL_UNNUMBERED,
.procname = "perf_counter_paranoid",
.data = &sysctl_perf_counter_paranoid,
.maxlen = sizeof(sysctl_perf_counter_paranoid),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "perf_counter_mlock_kb",
.data = &sysctl_perf_counter_mlock,
.maxlen = sizeof(sysctl_perf_counter_mlock),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "perf_counter_max_sample_rate",
.data = &sysctl_perf_counter_sample_rate,
.maxlen = sizeof(sysctl_perf_counter_sample_rate),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
#endif
/*
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt
@@ -1225,7 +1272,6 @@ static struct ctl_table vm_table[] = {
.strategy = &sysctl_jiffies,
},
#endif
#ifdef CONFIG_SECURITY
{
.ctl_name = CTL_UNNUMBERED,
.procname = "mmap_min_addr",
@@ -1234,7 +1280,6 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = &proc_doulongvec_minmax,
},
#endif
#ifdef CONFIG_NUMA
{
.ctl_name = CTL_UNNUMBERED,

View File

@@ -402,9 +402,6 @@ int clocksource_register(struct clocksource *c)
unsigned long flags;
int ret;
/* save mult_orig on registration */
c->mult_orig = c->mult;
spin_lock_irqsave(&clocksource_lock, flags);
ret = clocksource_enqueue(c);
if (!ret)

View File

@@ -22,7 +22,7 @@
/*
* This read-write spinlock protects us from races in SMP while
* playing with xtime and avenrun.
* playing with xtime.
*/
__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
@@ -77,6 +77,10 @@ static void clocksource_forward_now(void)
clock->cycle_last = cycle_now;
nsec = cyc2ns(clock, cycle_delta);
/* If arch requires, add in gettimeoffset() */
nsec += arch_gettimeoffset();
timespec_add_ns(&xtime, nsec);
nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift;
@@ -111,6 +115,9 @@ void getnstimeofday(struct timespec *ts)
/* convert to nanoseconds: */
nsecs = cyc2ns(clock, cycle_delta);
/* If arch requires, add in gettimeoffset() */
nsecs += arch_gettimeoffset();
} while (read_seqretry(&xtime_lock, seq));
timespec_add_ns(ts, nsecs);

View File

@@ -37,6 +37,7 @@
#include <linux/delay.h>
#include <linux/tick.h>
#include <linux/kallsyms.h>
#include <linux/perf_counter.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -1123,47 +1124,6 @@ void update_process_times(int user_tick)
run_posix_cpu_timers(p);
}
/*
* Nr of active tasks - counted in fixed-point numbers
*/
static unsigned long count_active_tasks(void)
{
return nr_active() * FIXED_1;
}
/*
* Hmm.. Changed this, as the GNU make sources (load.c) seems to
* imply that avenrun[] is the standard name for this kind of thing.
* Nothing else seems to be standardized: the fractional size etc
* all seem to differ on different machines.
*
* Requires xtime_lock to access.
*/
unsigned long avenrun[3];
EXPORT_SYMBOL(avenrun);
/*
* calc_load - given tick count, update the avenrun load estimates.
* This is called while holding a write_lock on xtime_lock.
*/
static inline void calc_load(unsigned long ticks)
{
unsigned long active_tasks; /* fixed-point */
static int count = LOAD_FREQ;
count -= ticks;
if (unlikely(count < 0)) {
active_tasks = count_active_tasks();
do {
CALC_LOAD(avenrun[0], EXP_1, active_tasks);
CALC_LOAD(avenrun[1], EXP_5, active_tasks);
CALC_LOAD(avenrun[2], EXP_15, active_tasks);
count += LOAD_FREQ;
} while (count < 0);
}
}
/*
* This function runs timers and the timer-tq in bottom half context.
*/
@@ -1171,6 +1131,8 @@ static void run_timer_softirq(struct softirq_action *h)
{
struct tvec_base *base = __get_cpu_var(tvec_bases);
perf_counter_do_pending();
hrtimer_run_pending();
if (time_after_eq(jiffies, base->timer_jiffies))
@@ -1187,16 +1149,6 @@ void run_local_timers(void)
softlockup_tick();
}
/*
* Called by the timer interrupt. xtime_lock must already be taken
* by the timer IRQ!
*/
static inline void update_times(unsigned long ticks)
{
update_wall_time();
calc_load(ticks);
}
/*
* The 64-bit jiffies value is not atomic - you MUST NOT read it
* without sampling the sequence number in xtime_lock.
@@ -1206,7 +1158,8 @@ static inline void update_times(unsigned long ticks)
void do_timer(unsigned long ticks)
{
jiffies_64 += ticks;
update_times(ticks);
update_wall_time();
calc_global_load();
}
#ifdef __ARCH_WANT_SYS_ALARM
@@ -1407,37 +1360,17 @@ int do_sysinfo(struct sysinfo *info)
{
unsigned long mem_total, sav_total;
unsigned int mem_unit, bitcount;
unsigned long seq;
struct timespec tp;
memset(info, 0, sizeof(struct sysinfo));
do {
struct timespec tp;
seq = read_seqbegin(&xtime_lock);
ktime_get_ts(&tp);
monotonic_to_bootbased(&tp);
info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
/*
* This is annoying. The below is the same thing
* posix_get_clock_monotonic() does, but it wants to
* take the lock which we want to cover the loads stuff
* too.
*/
get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
getnstimeofday(&tp);
tp.tv_sec += wall_to_monotonic.tv_sec;
tp.tv_nsec += wall_to_monotonic.tv_nsec;
monotonic_to_bootbased(&tp);
if (tp.tv_nsec - NSEC_PER_SEC >= 0) {
tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
tp.tv_sec++;
}
info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
info->procs = nr_threads;
} while (read_seqretry(&xtime_lock, seq));
info->procs = nr_threads;
si_meminfo(info);
si_swapinfo(info);

View File

@@ -48,6 +48,21 @@ config FTRACE_NMI_ENTER
depends on HAVE_FTRACE_NMI_ENTER
default y
config EVENT_TRACING
select CONTEXT_SWITCH_TRACER
bool
config CONTEXT_SWITCH_TRACER
select MARKERS
bool
# All tracer options should select GENERIC_TRACER. For those options that are
# enabled by all tracers (context switch and event tracer) they select TRACING.
# This allows those options to appear when no other tracer is selected. But the
# options do not appear when something else selects it. We need the two options
# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
# hidding of the automatic options options.
config TRACING
bool
select DEBUG_FS
@@ -56,6 +71,11 @@ config TRACING
select TRACEPOINTS
select NOP_TRACER
select BINARY_PRINTF
select EVENT_TRACING
config GENERIC_TRACER
bool
select TRACING
#
# Minimum requirements an architecture has to meet for us to
@@ -73,14 +93,20 @@ config TRACING_SUPPORT
if TRACING_SUPPORT
menu "Tracers"
menuconfig FTRACE
bool "Tracers"
default y if DEBUG_KERNEL
help
Enable the kernel tracing infrastructure.
if FTRACE
config FUNCTION_TRACER
bool "Kernel Function Tracer"
depends on HAVE_FUNCTION_TRACER
select FRAME_POINTER
select KALLSYMS
select TRACING
select GENERIC_TRACER
select CONTEXT_SWITCH_TRACER
help
Enable the kernel to trace every kernel function. This is done
@@ -104,13 +130,14 @@ config FUNCTION_GRAPH_TRACER
the return value. This is done by setting the current return
address on the current task structure into a stack of calls.
config IRQSOFF_TRACER
bool "Interrupts-off Latency Tracer"
default n
depends on TRACE_IRQFLAGS_SUPPORT
depends on GENERIC_TIME
select TRACE_IRQFLAGS
select TRACING
select GENERIC_TRACER
select TRACER_MAX_TRACE
help
This option measures the time spent in irqs-off critical
@@ -131,7 +158,7 @@ config PREEMPT_TRACER
default n
depends on GENERIC_TIME
depends on PREEMPT
select TRACING
select GENERIC_TRACER
select TRACER_MAX_TRACE
help
This option measures the time spent in preemption off critical
@@ -150,7 +177,7 @@ config PREEMPT_TRACER
config SYSPROF_TRACER
bool "Sysprof Tracer"
depends on X86
select TRACING
select GENERIC_TRACER
select CONTEXT_SWITCH_TRACER
help
This tracer provides the trace needed by the 'Sysprof' userspace
@@ -158,40 +185,33 @@ config SYSPROF_TRACER
config SCHED_TRACER
bool "Scheduling Latency Tracer"
select TRACING
select GENERIC_TRACER
select CONTEXT_SWITCH_TRACER
select TRACER_MAX_TRACE
help
This tracer tracks the latency of the highest priority task
to be scheduled in, starting from the point it has woken up.
config CONTEXT_SWITCH_TRACER
bool "Trace process context switches"
select TRACING
select MARKERS
help
This tracer gets called from the context switch and records
all switching of tasks.
config EVENT_TRACER
bool "Trace various events in the kernel"
config ENABLE_DEFAULT_TRACERS
bool "Trace process context switches and events"
depends on !GENERIC_TRACER
select TRACING
help
This tracer hooks to various trace points in the kernel
allowing the user to pick and choose which trace point they
want to trace.
want to trace. It also includes the sched_switch tracer plugin.
config FTRACE_SYSCALLS
bool "Trace syscalls"
depends on HAVE_FTRACE_SYSCALLS
select TRACING
select GENERIC_TRACER
select KALLSYMS
help
Basic tracer to catch the syscall entry and exit events.
config BOOT_TRACER
bool "Trace boot initcalls"
select TRACING
select GENERIC_TRACER
select CONTEXT_SWITCH_TRACER
help
This tracer helps developers to optimize boot times: it records
@@ -207,8 +227,36 @@ config BOOT_TRACER
to enable this on bootup.
config TRACE_BRANCH_PROFILING
bool
select GENERIC_TRACER
choice
prompt "Branch Profiling"
default BRANCH_PROFILE_NONE
help
The branch profiling is a software profiler. It will add hooks
into the C conditionals to test which path a branch takes.
The likely/unlikely profiler only looks at the conditions that
are annotated with a likely or unlikely macro.
The "all branch" profiler will profile every if statement in the
kernel. This profiler will also enable the likely/unlikely
profiler as well.
Either of the above profilers add a bit of overhead to the system.
If unsure choose "No branch profiling".
config BRANCH_PROFILE_NONE
bool "No branch profiling"
help
No branch profiling. Branch profiling adds a bit of overhead.
Only enable it if you want to analyse the branching behavior.
Otherwise keep it disabled.
config PROFILE_ANNOTATED_BRANCHES
bool "Trace likely/unlikely profiler"
select TRACING
select TRACE_BRANCH_PROFILING
help
This tracer profiles all the the likely and unlikely macros
in the kernel. It will display the results in:
@@ -218,11 +266,9 @@ config TRACE_BRANCH_PROFILING
Note: this will add a significant overhead, only turn this
on if you need to profile the system's use of these macros.
Say N if unsure.
config PROFILE_ALL_BRANCHES
bool "Profile all if conditionals"
depends on TRACE_BRANCH_PROFILING
select TRACE_BRANCH_PROFILING
help
This tracer profiles all branch conditions. Every if ()
taken in the kernel is recorded whether it hit or miss.
@@ -230,11 +276,12 @@ config PROFILE_ALL_BRANCHES
/debugfs/tracing/profile_branch
This option also enables the likely/unlikely profiler.
This configuration, when enabled, will impose a great overhead
on the system. This should only be enabled when the system
is to be analyzed
Say N if unsure.
endchoice
config TRACING_BRANCHES
bool
@@ -261,7 +308,7 @@ config BRANCH_TRACER
config POWER_TRACER
bool "Trace power consumption behavior"
depends on X86
select TRACING
select GENERIC_TRACER
help
This tracer helps developers to analyze and optimize the kernels
power management decisions, specifically the C-state and P-state
@@ -295,14 +342,14 @@ config STACK_TRACER
config HW_BRANCH_TRACER
depends on HAVE_HW_BRANCH_TRACER
bool "Trace hw branches"
select TRACING
select GENERIC_TRACER
help
This tracer records all branches on the system in a circular
buffer giving access to the last N branches for each cpu.
config KMEMTRACE
bool "Trace SLAB allocations"
select TRACING
select GENERIC_TRACER
help
kmemtrace provides tracing for slab allocator functions, such as
kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
@@ -322,7 +369,7 @@ config KMEMTRACE
config WORKQUEUE_TRACER
bool "Trace workqueues"
select TRACING
select GENERIC_TRACER
help
The workqueue tracer provides some statistical informations
about each cpu workqueue thread such as the number of the
@@ -338,7 +385,7 @@ config BLK_DEV_IO_TRACE
select RELAY
select DEBUG_FS
select TRACEPOINTS
select TRACING
select GENERIC_TRACER
select STACKTRACE
help
Say Y here if you want to be able to trace the block layer actions
@@ -375,6 +422,20 @@ config DYNAMIC_FTRACE
were made. If so, it runs stop_machine (stops all CPUS)
and modifies the code to jump over the call to ftrace.
config FUNCTION_PROFILER
bool "Kernel function profiler"
depends on FUNCTION_TRACER
default n
help
This option enables the kernel function profiler. A file is created
in debugfs called function_profile_enabled which defaults to zero.
When a 1 is echoed into this file profiling begins, and when a
zero is entered, profiling stops. A file in the trace_stats
directory called functions, that show the list of functions that
have been hit and their counters.
If in doubt, say N
config FTRACE_MCOUNT_RECORD
def_bool y
depends on DYNAMIC_FTRACE
@@ -385,7 +446,7 @@ config FTRACE_SELFTEST
config FTRACE_STARTUP_TEST
bool "Perform a startup test on ftrace"
depends on TRACING
depends on GENERIC_TRACER
select FTRACE_SELFTEST
help
This option performs a series of startup tests on ftrace. On bootup
@@ -396,7 +457,7 @@ config FTRACE_STARTUP_TEST
config MMIOTRACE
bool "Memory mapped IO tracing"
depends on HAVE_MMIOTRACE_SUPPORT && PCI
select TRACING
select GENERIC_TRACER
help
Mmiotrace traces Memory Mapped I/O access and is meant for
debugging and reverse engineering. It is called from the ioremap
@@ -416,7 +477,23 @@ config MMIOTRACE_TEST
Say N, unless you absolutely know what you are doing.
endmenu
config RING_BUFFER_BENCHMARK
tristate "Ring buffer benchmark stress tester"
depends on RING_BUFFER
help
This option creates a test to stress the ring buffer and bench mark it.
It creates its own ring buffer such that it will not interfer with
any other users of the ring buffer (such as ftrace). It then creates
a producer and consumer that will run for 10 seconds and sleep for
10 seconds. Each interval it will print out the number of events
it recorded and give a rough estimate of how long each iteration took.
It does not disable interrupts or raise its priority, so it may be
affected by processes that are running.
If unsure, say N
endif # FTRACE
endif # TRACING_SUPPORT

View File

@@ -15,11 +15,17 @@ ifdef CONFIG_TRACING_BRANCHES
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
endif
#
# Make the trace clocks available generally: it's infrastructure
# relied on by ptrace for example:
#
obj-y += trace_clock.o
obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
obj-$(CONFIG_RING_BUFFER_BENCHMARK) += ring_buffer_benchmark.o
obj-$(CONFIG_TRACING) += trace.o
obj-$(CONFIG_TRACING) += trace_clock.o
obj-$(CONFIG_TRACING) += trace_output.o
obj-$(CONFIG_TRACING) += trace_stat.o
obj-$(CONFIG_TRACING) += trace_printk.o
@@ -39,12 +45,14 @@ obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
obj-$(CONFIG_POWER_TRACER) += trace_power.o
obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
obj-$(CONFIG_EVENT_TRACER) += trace_events.o
obj-$(CONFIG_EVENT_TRACER) += events.o
obj-$(CONFIG_EVENT_TRACER) += trace_export.o
obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
ifeq ($(CONFIG_BLOCK),y)
obj-$(CONFIG_EVENT_TRACING) += blktrace.o
endif
obj-$(CONFIG_EVENT_TRACING) += trace_events.o
obj-$(CONFIG_EVENT_TRACING) += trace_export.o
obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
libftrace-y := ftrace.o

View File

@@ -23,10 +23,14 @@
#include <linux/mutex.h>
#include <linux/debugfs.h>
#include <linux/time.h>
#include <trace/block.h>
#include <linux/uaccess.h>
#include <trace/events/block.h>
#include "trace_output.h"
#ifdef CONFIG_BLK_DEV_IO_TRACE
static unsigned int blktrace_seq __read_mostly = 1;
static struct trace_array *blk_tr;
@@ -147,7 +151,7 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
{
if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
return 1;
if (sector < bt->start_lba || sector > bt->end_lba)
if (sector && (sector < bt->start_lba || sector > bt->end_lba))
return 1;
if (bt->pid && pid != bt->pid)
return 1;
@@ -192,7 +196,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
what |= MASK_TC_BIT(rw, DISCARD);
pid = tsk->pid;
if (unlikely(act_log_check(bt, what, sector, pid)))
if (act_log_check(bt, what, sector, pid))
return;
cpu = raw_smp_processor_id();
@@ -262,6 +266,7 @@ static void blk_trace_free(struct blk_trace *bt)
{
debugfs_remove(bt->msg_file);
debugfs_remove(bt->dropped_file);
debugfs_remove(bt->dir);
relay_close(bt->rchan);
free_percpu(bt->sequence);
free_percpu(bt->msg_data);
@@ -403,11 +408,29 @@ static struct rchan_callbacks blk_relay_callbacks = {
.remove_buf_file = blk_remove_buf_file_callback,
};
static void blk_trace_setup_lba(struct blk_trace *bt,
struct block_device *bdev)
{
struct hd_struct *part = NULL;
if (bdev)
part = bdev->bd_part;
if (part) {
bt->start_lba = part->start_sect;
bt->end_lba = part->start_sect + part->nr_sects;
} else {
bt->start_lba = 0;
bt->end_lba = -1ULL;
}
}
/*
* Setup everything required to start tracing
*/
int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
struct blk_user_trace_setup *buts)
struct block_device *bdev,
struct blk_user_trace_setup *buts)
{
struct blk_trace *old_bt, *bt = NULL;
struct dentry *dir = NULL;
@@ -480,10 +503,13 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (!bt->act_mask)
bt->act_mask = (u16) -1;
bt->start_lba = buts->start_lba;
bt->end_lba = buts->end_lba;
if (!bt->end_lba)
bt->end_lba = -1ULL;
blk_trace_setup_lba(bt, bdev);
/* overwrite with user settings */
if (buts->start_lba)
bt->start_lba = buts->start_lba;
if (buts->end_lba)
bt->end_lba = buts->end_lba;
bt->pid = buts->pid;
bt->trace_state = Blktrace_setup;
@@ -505,6 +531,7 @@ err:
}
int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
struct block_device *bdev,
char __user *arg)
{
struct blk_user_trace_setup buts;
@@ -514,7 +541,7 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (ret)
return -EFAULT;
ret = do_blk_trace_setup(q, name, dev, &buts);
ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
if (ret)
return ret;
@@ -582,7 +609,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
switch (cmd) {
case BLKTRACESETUP:
bdevname(bdev, b);
ret = blk_trace_setup(q, b, bdev->bd_dev, arg);
ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
break;
case BLKTRACESTART:
start = 1;
@@ -642,12 +669,12 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
if (blk_pc_request(rq)) {
what |= BLK_TC_ACT(BLK_TC_PC);
__blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
rq->cmd_len, rq->cmd);
__blk_add_trace(bt, 0, blk_rq_bytes(rq), rw,
what, rq->errors, rq->cmd_len, rq->cmd);
} else {
what |= BLK_TC_ACT(BLK_TC_FS);
__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
rw, what, rq->errors, 0, NULL);
__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rw,
what, rq->errors, 0, NULL);
}
}
@@ -809,7 +836,6 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
* @bio: the source bio
* @dev: target device
* @from: source sector
* @to: target sector
*
* Description:
* Device mapper or raid target sometimes need to split a bio because
@@ -817,7 +843,7 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
*
**/
static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
dev_t dev, sector_t from, sector_t to)
dev_t dev, sector_t from)
{
struct blk_trace *bt = q->blk_trace;
struct blk_io_trace_remap r;
@@ -825,12 +851,13 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
if (likely(!bt))
return;
r.device = cpu_to_be32(dev);
r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
r.sector = cpu_to_be64(to);
r.device_from = cpu_to_be32(dev);
r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev);
r.sector_from = cpu_to_be64(from);
__blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
!bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE),
sizeof(r), &r);
}
/**
@@ -854,11 +881,11 @@ void blk_add_driver_data(struct request_queue *q,
return;
if (blk_pc_request(rq))
__blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
rq->errors, len, data);
__blk_add_trace(bt, 0, blk_rq_bytes(rq), 0,
BLK_TA_DRV_DATA, rq->errors, len, data);
else
__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
0, BLK_TA_DRV_DATA, rq->errors, len, data);
__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0,
BLK_TA_DRV_DATA, rq->errors, len, data);
}
EXPORT_SYMBOL_GPL(blk_add_driver_data);
@@ -971,6 +998,16 @@ static inline const void *pdu_start(const struct trace_entry *ent)
return te_blk_io_trace(ent) + 1;
}
static inline u32 t_action(const struct trace_entry *ent)
{
return te_blk_io_trace(ent)->action;
}
static inline u32 t_bytes(const struct trace_entry *ent)
{
return te_blk_io_trace(ent)->bytes;
}
static inline u32 t_sec(const struct trace_entry *ent)
{
return te_blk_io_trace(ent)->bytes >> 9;
@@ -996,11 +1033,11 @@ static void get_pdu_remap(const struct trace_entry *ent,
struct blk_io_trace_remap *r)
{
const struct blk_io_trace_remap *__r = pdu_start(ent);
__u64 sector = __r->sector;
__u64 sector_from = __r->sector_from;
r->device = be32_to_cpu(__r->device);
r->device_from = be32_to_cpu(__r->device_from);
r->sector = be64_to_cpu(sector);
r->device_to = be32_to_cpu(__r->device_to);
r->sector_from = be64_to_cpu(sector_from);
}
typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
@@ -1031,36 +1068,98 @@ static int blk_log_action(struct trace_iterator *iter, const char *act)
MAJOR(t->device), MINOR(t->device), act, rwbs);
}
static int blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent)
{
const unsigned char *pdu_buf;
int pdu_len;
int i, end, ret;
pdu_buf = pdu_start(ent);
pdu_len = te_blk_io_trace(ent)->pdu_len;
if (!pdu_len)
return 1;
/* find the last zero that needs to be printed */
for (end = pdu_len - 1; end >= 0; end--)
if (pdu_buf[end])
break;
end++;
if (!trace_seq_putc(s, '('))
return 0;
for (i = 0; i < pdu_len; i++) {
ret = trace_seq_printf(s, "%s%02x",
i == 0 ? "" : " ", pdu_buf[i]);
if (!ret)
return ret;
/*
* stop when the rest is just zeroes and indicate so
* with a ".." appended
*/
if (i == end && end != pdu_len - 1)
return trace_seq_puts(s, " ..) ");
}
return trace_seq_puts(s, ") ");
}
static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent)
{
char cmd[TASK_COMM_LEN];
trace_find_cmdline(ent->pid, cmd);
if (t_sec(ent))
return trace_seq_printf(s, "%llu + %u [%s]\n",
t_sector(ent), t_sec(ent), cmd);
return trace_seq_printf(s, "[%s]\n", cmd);
if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
int ret;
ret = trace_seq_printf(s, "%u ", t_bytes(ent));
if (!ret)
return 0;
ret = blk_log_dump_pdu(s, ent);
if (!ret)
return 0;
return trace_seq_printf(s, "[%s]\n", cmd);
} else {
if (t_sec(ent))
return trace_seq_printf(s, "%llu + %u [%s]\n",
t_sector(ent), t_sec(ent), cmd);
return trace_seq_printf(s, "[%s]\n", cmd);
}
}
static int blk_log_with_error(struct trace_seq *s,
const struct trace_entry *ent)
{
if (t_sec(ent))
return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent),
t_sec(ent), t_error(ent));
return trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent));
if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
int ret;
ret = blk_log_dump_pdu(s, ent);
if (ret)
return trace_seq_printf(s, "[%d]\n", t_error(ent));
return 0;
} else {
if (t_sec(ent))
return trace_seq_printf(s, "%llu + %u [%d]\n",
t_sector(ent),
t_sec(ent), t_error(ent));
return trace_seq_printf(s, "%llu [%d]\n",
t_sector(ent), t_error(ent));
}
}
static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent)
{
struct blk_io_trace_remap r = { .device = 0, };
struct blk_io_trace_remap r = { .device_from = 0, };
get_pdu_remap(ent, &r);
return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
t_sector(ent),
t_sec(ent), MAJOR(r.device), MINOR(r.device),
(unsigned long long)r.sector);
t_sector(ent), t_sec(ent),
MAJOR(r.device_from), MINOR(r.device_from),
(unsigned long long)r.sector_from);
}
static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent)
@@ -1117,7 +1216,6 @@ static void blk_tracer_print_header(struct seq_file *m)
static void blk_tracer_start(struct trace_array *tr)
{
blk_tracer_enabled = true;
trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
}
static int blk_tracer_init(struct trace_array *tr)
@@ -1130,7 +1228,6 @@ static int blk_tracer_init(struct trace_array *tr)
static void blk_tracer_stop(struct trace_array *tr)
{
blk_tracer_enabled = false;
trace_flags |= TRACE_ITER_CONTEXT_INFO;
}
static void blk_tracer_reset(struct trace_array *tr)
@@ -1182,7 +1279,7 @@ static enum print_line_t print_one_line(struct trace_iterator *iter,
}
if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act)))
ret = trace_seq_printf(s, "Bad pc action %x\n", what);
ret = trace_seq_printf(s, "Unknown action %x\n", what);
else {
ret = log_action(iter, what2act[what].act[long_act]);
if (ret)
@@ -1195,9 +1292,6 @@ out:
static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
int flags)
{
if (!trace_print_context(iter))
return TRACE_TYPE_PARTIAL_LINE;
return print_one_line(iter, false);
}
@@ -1232,6 +1326,18 @@ static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
return print_one_line(iter, true);
}
static int blk_tracer_set_flag(u32 old_flags, u32 bit, int set)
{
/* don't output context-info for blk_classic output */
if (bit == TRACE_BLK_OPT_CLASSIC) {
if (set)
trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
else
trace_flags |= TRACE_ITER_CONTEXT_INFO;
}
return 0;
}
static struct tracer blk_tracer __read_mostly = {
.name = "blk",
.init = blk_tracer_init,
@@ -1241,6 +1347,7 @@ static struct tracer blk_tracer __read_mostly = {
.print_header = blk_tracer_print_header,
.print_line = blk_tracer_print_line,
.flags = &blk_tracer_flags,
.set_flag = blk_tracer_set_flag,
};
static struct trace_event trace_blk_event = {
@@ -1285,7 +1392,8 @@ static int blk_trace_remove_queue(struct request_queue *q)
/*
* Setup everything required to start tracing
*/
static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
static int blk_trace_setup_queue(struct request_queue *q,
struct block_device *bdev)
{
struct blk_trace *old_bt, *bt = NULL;
int ret = -ENOMEM;
@@ -1298,9 +1406,10 @@ static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
if (!bt->msg_data)
goto free_bt;
bt->dev = dev;
bt->dev = bdev->bd_dev;
bt->act_mask = (u16)-1;
bt->end_lba = -1ULL;
blk_trace_setup_lba(bt, bdev);
old_bt = xchg(&q->blk_trace, bt);
if (old_bt != NULL) {
@@ -1517,7 +1626,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
if (attr == &dev_attr_enable) {
if (value)
ret = blk_trace_setup_queue(q, bdev->bd_dev);
ret = blk_trace_setup_queue(q, bdev);
else
ret = blk_trace_remove_queue(q);
goto out_unlock_bdev;
@@ -1525,7 +1634,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
ret = 0;
if (q->blk_trace == NULL)
ret = blk_trace_setup_queue(q, bdev->bd_dev);
ret = blk_trace_setup_queue(q, bdev);
if (ret == 0) {
if (attr == &dev_attr_act_mask)
@@ -1548,3 +1657,77 @@ out:
return ret ? ret : count;
}
int blk_trace_init_sysfs(struct device *dev)
{
return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
}
#endif /* CONFIG_BLK_DEV_IO_TRACE */
#ifdef CONFIG_EVENT_TRACING
void blk_dump_cmd(char *buf, struct request *rq)
{
int i, end;
int len = rq->cmd_len;
unsigned char *cmd = rq->cmd;
if (!blk_pc_request(rq)) {
buf[0] = '\0';
return;
}
for (end = len - 1; end >= 0; end--)
if (cmd[end])
break;
end++;
for (i = 0; i < len; i++) {
buf += sprintf(buf, "%s%02x", i == 0 ? "" : " ", cmd[i]);
if (i == end && end != len - 1) {
sprintf(buf, " ..");
break;
}
}
}
void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
{
int i = 0;
if (rw & WRITE)
rwbs[i++] = 'W';
else if (rw & 1 << BIO_RW_DISCARD)
rwbs[i++] = 'D';
else if (bytes)
rwbs[i++] = 'R';
else
rwbs[i++] = 'N';
if (rw & 1 << BIO_RW_AHEAD)
rwbs[i++] = 'A';
if (rw & 1 << BIO_RW_BARRIER)
rwbs[i++] = 'B';
if (rw & 1 << BIO_RW_SYNCIO)
rwbs[i++] = 'S';
if (rw & 1 << BIO_RW_META)
rwbs[i++] = 'M';
rwbs[i] = '\0';
}
void blk_fill_rwbs_rq(char *rwbs, struct request *rq)
{
int rw = rq->cmd_flags & 0x03;
int bytes;
if (blk_discard_rq(rq))
rw |= (1 << BIO_RW_DISCARD);
bytes = blk_rq_bytes(rq);
blk_fill_rwbs(rwbs, rw, bytes);
}
#endif /* CONFIG_EVENT_TRACING */

View File

@@ -1,14 +0,0 @@
/*
* This is the place to register all trace points as events.
*/
#include <linux/stringify.h>
#include <trace/trace_events.h>
#include "trace_output.h"
#include "trace_events_stage_1.h"
#include "trace_events_stage_2.h"
#include "trace_events_stage_3.h"

View File

@@ -29,11 +29,13 @@
#include <linux/list.h>
#include <linux/hash.h>
#include <trace/sched.h>
#include <trace/events/sched.h>
#include <asm/ftrace.h>
#include <asm/setup.h>
#include "trace.h"
#include "trace_output.h"
#include "trace_stat.h"
#define FTRACE_WARN_ON(cond) \
do { \
@@ -68,7 +70,7 @@ static DEFINE_MUTEX(ftrace_lock);
static struct ftrace_ops ftrace_list_end __read_mostly =
{
.func = ftrace_stub,
.func = ftrace_stub,
};
static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
@@ -240,6 +242,580 @@ static void ftrace_update_pid_func(void)
#endif
}
#ifdef CONFIG_FUNCTION_PROFILER
struct ftrace_profile {
struct hlist_node node;
unsigned long ip;
unsigned long counter;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
unsigned long long time;
#endif
};
struct ftrace_profile_page {
struct ftrace_profile_page *next;
unsigned long index;
struct ftrace_profile records[];
};
struct ftrace_profile_stat {
atomic_t disabled;
struct hlist_head *hash;
struct ftrace_profile_page *pages;
struct ftrace_profile_page *start;
struct tracer_stat stat;
};
#define PROFILE_RECORDS_SIZE \
(PAGE_SIZE - offsetof(struct ftrace_profile_page, records))
#define PROFILES_PER_PAGE \
(PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
static int ftrace_profile_bits __read_mostly;
static int ftrace_profile_enabled __read_mostly;
/* ftrace_profile_lock - synchronize the enable and disable of the profiler */
static DEFINE_MUTEX(ftrace_profile_lock);
static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats);
#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */
static void *
function_stat_next(void *v, int idx)
{
struct ftrace_profile *rec = v;
struct ftrace_profile_page *pg;
pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK);
again:
rec++;
if ((void *)rec >= (void *)&pg->records[pg->index]) {
pg = pg->next;
if (!pg)
return NULL;
rec = &pg->records[0];
if (!rec->counter)
goto again;
}
return rec;
}
static void *function_stat_start(struct tracer_stat *trace)
{
struct ftrace_profile_stat *stat =
container_of(trace, struct ftrace_profile_stat, stat);
if (!stat || !stat->start)
return NULL;
return function_stat_next(&stat->start->records[0], 0);
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/* function graph compares on total time */
static int function_stat_cmp(void *p1, void *p2)
{
struct ftrace_profile *a = p1;
struct ftrace_profile *b = p2;
if (a->time < b->time)
return -1;
if (a->time > b->time)
return 1;
else
return 0;
}
#else
/* not function graph compares against hits */
static int function_stat_cmp(void *p1, void *p2)
{
struct ftrace_profile *a = p1;
struct ftrace_profile *b = p2;
if (a->counter < b->counter)
return -1;
if (a->counter > b->counter)
return 1;
else
return 0;
}
#endif
static int function_stat_headers(struct seq_file *m)
{
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
seq_printf(m, " Function "
"Hit Time Avg\n"
" -------- "
"--- ---- ---\n");
#else
seq_printf(m, " Function Hit\n"
" -------- ---\n");
#endif
return 0;
}
static int function_stat_show(struct seq_file *m, void *v)
{
struct ftrace_profile *rec = v;
char str[KSYM_SYMBOL_LEN];
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static DEFINE_MUTEX(mutex);
static struct trace_seq s;
unsigned long long avg;
#endif
kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
seq_printf(m, " %-30.30s %10lu", str, rec->counter);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
seq_printf(m, " ");
avg = rec->time;
do_div(avg, rec->counter);
mutex_lock(&mutex);
trace_seq_init(&s);
trace_print_graph_duration(rec->time, &s);
trace_seq_puts(&s, " ");
trace_print_graph_duration(avg, &s);
trace_print_seq(m, &s);
mutex_unlock(&mutex);
#endif
seq_putc(m, '\n');
return 0;
}
static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
{
struct ftrace_profile_page *pg;
pg = stat->pages = stat->start;
while (pg) {
memset(pg->records, 0, PROFILE_RECORDS_SIZE);
pg->index = 0;
pg = pg->next;
}
memset(stat->hash, 0,
FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head));
}
int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
{
struct ftrace_profile_page *pg;
int functions;
int pages;
int i;
/* If we already allocated, do nothing */
if (stat->pages)
return 0;
stat->pages = (void *)get_zeroed_page(GFP_KERNEL);
if (!stat->pages)
return -ENOMEM;
#ifdef CONFIG_DYNAMIC_FTRACE
functions = ftrace_update_tot_cnt;
#else
/*
* We do not know the number of functions that exist because
* dynamic tracing is what counts them. With past experience
* we have around 20K functions. That should be more than enough.
* It is highly unlikely we will execute every function in
* the kernel.
*/
functions = 20000;
#endif
pg = stat->start = stat->pages;
pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE);
for (i = 0; i < pages; i++) {
pg->next = (void *)get_zeroed_page(GFP_KERNEL);
if (!pg->next)
goto out_free;
pg = pg->next;
}
return 0;
out_free:
pg = stat->start;
while (pg) {
unsigned long tmp = (unsigned long)pg;
pg = pg->next;
free_page(tmp);
}
free_page((unsigned long)stat->pages);
stat->pages = NULL;
stat->start = NULL;
return -ENOMEM;
}
static int ftrace_profile_init_cpu(int cpu)
{
struct ftrace_profile_stat *stat;
int size;
stat = &per_cpu(ftrace_profile_stats, cpu);
if (stat->hash) {
/* If the profile is already created, simply reset it */
ftrace_profile_reset(stat);
return 0;
}
/*
* We are profiling all functions, but usually only a few thousand
* functions are hit. We'll make a hash of 1024 items.
*/
size = FTRACE_PROFILE_HASH_SIZE;
stat->hash = kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL);
if (!stat->hash)
return -ENOMEM;
if (!ftrace_profile_bits) {
size--;
for (; size; size >>= 1)
ftrace_profile_bits++;
}
/* Preallocate the function profiling pages */
if (ftrace_profile_pages_init(stat) < 0) {
kfree(stat->hash);
stat->hash = NULL;
return -ENOMEM;
}
return 0;
}
static int ftrace_profile_init(void)
{
int cpu;
int ret = 0;
for_each_online_cpu(cpu) {
ret = ftrace_profile_init_cpu(cpu);
if (ret)
break;
}
return ret;
}
/* interrupts must be disabled */
static struct ftrace_profile *
ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
{
struct ftrace_profile *rec;
struct hlist_head *hhd;
struct hlist_node *n;
unsigned long key;
key = hash_long(ip, ftrace_profile_bits);
hhd = &stat->hash[key];
if (hlist_empty(hhd))
return NULL;
hlist_for_each_entry_rcu(rec, n, hhd, node) {
if (rec->ip == ip)
return rec;
}
return NULL;
}
static void ftrace_add_profile(struct ftrace_profile_stat *stat,
struct ftrace_profile *rec)
{
unsigned long key;
key = hash_long(rec->ip, ftrace_profile_bits);
hlist_add_head_rcu(&rec->node, &stat->hash[key]);
}
/*
* The memory is already allocated, this simply finds a new record to use.
*/
static struct ftrace_profile *
ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip)
{
struct ftrace_profile *rec = NULL;
/* prevent recursion (from NMIs) */
if (atomic_inc_return(&stat->disabled) != 1)
goto out;
/*
* Try to find the function again since an NMI
* could have added it
*/
rec = ftrace_find_profiled_func(stat, ip);
if (rec)
goto out;
if (stat->pages->index == PROFILES_PER_PAGE) {
if (!stat->pages->next)
goto out;
stat->pages = stat->pages->next;
}
rec = &stat->pages->records[stat->pages->index++];
rec->ip = ip;
ftrace_add_profile(stat, rec);
out:
atomic_dec(&stat->disabled);
return rec;
}
static void
function_profile_call(unsigned long ip, unsigned long parent_ip)
{
struct ftrace_profile_stat *stat;
struct ftrace_profile *rec;
unsigned long flags;
if (!ftrace_profile_enabled)
return;
local_irq_save(flags);
stat = &__get_cpu_var(ftrace_profile_stats);
if (!stat->hash || !ftrace_profile_enabled)
goto out;
rec = ftrace_find_profiled_func(stat, ip);
if (!rec) {
rec = ftrace_profile_alloc(stat, ip);
if (!rec)
goto out;
}
rec->counter++;
out:
local_irq_restore(flags);
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static int profile_graph_entry(struct ftrace_graph_ent *trace)
{
function_profile_call(trace->func, 0);
return 1;
}
static void profile_graph_return(struct ftrace_graph_ret *trace)
{
struct ftrace_profile_stat *stat;
unsigned long long calltime;
struct ftrace_profile *rec;
unsigned long flags;
local_irq_save(flags);
stat = &__get_cpu_var(ftrace_profile_stats);
if (!stat->hash || !ftrace_profile_enabled)
goto out;
calltime = trace->rettime - trace->calltime;
if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) {
int index;
index = trace->depth;
/* Append this call time to the parent time to subtract */
if (index)
current->ret_stack[index - 1].subtime += calltime;
if (current->ret_stack[index].subtime < calltime)
calltime -= current->ret_stack[index].subtime;
else
calltime = 0;
}
rec = ftrace_find_profiled_func(stat, trace->func);
if (rec)
rec->time += calltime;
out:
local_irq_restore(flags);
}
static int register_ftrace_profiler(void)
{
return register_ftrace_graph(&profile_graph_return,
&profile_graph_entry);
}
static void unregister_ftrace_profiler(void)
{
unregister_ftrace_graph();
}
#else
static struct ftrace_ops ftrace_profile_ops __read_mostly =
{
.func = function_profile_call,
};
static int register_ftrace_profiler(void)
{
return register_ftrace_function(&ftrace_profile_ops);
}
static void unregister_ftrace_profiler(void)
{
unregister_ftrace_function(&ftrace_profile_ops);
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
static ssize_t
ftrace_profile_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
unsigned long val;
char buf[64]; /* big enough to hold a number */
int ret;
if (cnt >= sizeof(buf))
return -EINVAL;
if (copy_from_user(&buf, ubuf, cnt))
return -EFAULT;
buf[cnt] = 0;
ret = strict_strtoul(buf, 10, &val);
if (ret < 0)
return ret;
val = !!val;
mutex_lock(&ftrace_profile_lock);
if (ftrace_profile_enabled ^ val) {
if (val) {
ret = ftrace_profile_init();
if (ret < 0) {
cnt = ret;
goto out;
}
ret = register_ftrace_profiler();
if (ret < 0) {
cnt = ret;
goto out;
}
ftrace_profile_enabled = 1;
} else {
ftrace_profile_enabled = 0;
/*
* unregister_ftrace_profiler calls stop_machine
* so this acts like an synchronize_sched.
*/
unregister_ftrace_profiler();
}
}
out:
mutex_unlock(&ftrace_profile_lock);
filp->f_pos += cnt;
return cnt;
}
static ssize_t
ftrace_profile_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char buf[64]; /* big enough to hold a number */
int r;
r = sprintf(buf, "%u\n", ftrace_profile_enabled);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
static const struct file_operations ftrace_profile_fops = {
.open = tracing_open_generic,
.read = ftrace_profile_read,
.write = ftrace_profile_write,
};
/* used to initialize the real stat files */
static struct tracer_stat function_stats __initdata = {
.name = "functions",
.stat_start = function_stat_start,
.stat_next = function_stat_next,
.stat_cmp = function_stat_cmp,
.stat_headers = function_stat_headers,
.stat_show = function_stat_show
};
static void ftrace_profile_debugfs(struct dentry *d_tracer)
{
struct ftrace_profile_stat *stat;
struct dentry *entry;
char *name;
int ret;
int cpu;
for_each_possible_cpu(cpu) {
stat = &per_cpu(ftrace_profile_stats, cpu);
/* allocate enough for function name + cpu number */
name = kmalloc(32, GFP_KERNEL);
if (!name) {
/*
* The files created are permanent, if something happens
* we still do not free memory.
*/
kfree(stat);
WARN(1,
"Could not allocate stat file for cpu %d\n",
cpu);
return;
}
stat->stat = function_stats;
snprintf(name, 32, "function%d", cpu);
stat->stat.name = name;
ret = register_stat_tracer(&stat->stat);
if (ret) {
WARN(1,
"Could not register function stat for cpu %d\n",
cpu);
kfree(name);
return;
}
}
entry = debugfs_create_file("function_profile_enabled", 0644,
d_tracer, NULL, &ftrace_profile_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'function_profile_enabled' entry\n");
}
#else /* CONFIG_FUNCTION_PROFILER */
static void ftrace_profile_debugfs(struct dentry *d_tracer)
{
}
#endif /* CONFIG_FUNCTION_PROFILER */
/* set when tracing only a pid */
struct pid *ftrace_pid_trace;
static struct pid * const ftrace_swapper_pid = &init_struct_pid;
@@ -261,7 +837,6 @@ struct ftrace_func_probe {
struct rcu_head rcu;
};
enum {
FTRACE_ENABLE_CALLS = (1 << 0),
FTRACE_DISABLE_CALLS = (1 << 1),
@@ -346,30 +921,6 @@ static void ftrace_free_rec(struct dyn_ftrace *rec)
rec->flags |= FTRACE_FL_FREE;
}
void ftrace_release(void *start, unsigned long size)
{
struct dyn_ftrace *rec;
struct ftrace_page *pg;
unsigned long s = (unsigned long)start;
unsigned long e = s + size;
if (ftrace_disabled || !start)
return;
mutex_lock(&ftrace_lock);
do_for_each_ftrace_rec(pg, rec) {
if ((rec->ip >= s) && (rec->ip < e)) {
/*
* rec->ip is changed in ftrace_free_rec()
* It should not between s and e if record was freed.
*/
FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
ftrace_free_rec(rec);
}
} while_for_each_ftrace_rec();
mutex_unlock(&ftrace_lock);
}
static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
{
struct dyn_ftrace *rec;
@@ -1408,7 +1959,7 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
static struct ftrace_ops trace_probe_ops __read_mostly =
{
.func = function_trace_probe_call,
.func = function_trace_probe_call,
};
static int ftrace_probe_registered;
@@ -1823,6 +2374,45 @@ void ftrace_set_notrace(unsigned char *buf, int len, int reset)
ftrace_set_regex(buf, len, reset, 0);
}
/*
* command line interface to allow users to set filters on boot up.
*/
#define FTRACE_FILTER_SIZE COMMAND_LINE_SIZE
static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata;
static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata;
static int __init set_ftrace_notrace(char *str)
{
strncpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE);
return 1;
}
__setup("ftrace_notrace=", set_ftrace_notrace);
static int __init set_ftrace_filter(char *str)
{
strncpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE);
return 1;
}
__setup("ftrace_filter=", set_ftrace_filter);
static void __init set_ftrace_early_filter(char *buf, int enable)
{
char *func;
while (buf) {
func = strsep(&buf, ",");
ftrace_set_regex(func, strlen(func), 0, enable);
}
}
static void __init set_ftrace_early_filters(void)
{
if (ftrace_filter_buf[0])
set_ftrace_early_filter(ftrace_filter_buf, 1);
if (ftrace_notrace_buf[0])
set_ftrace_early_filter(ftrace_notrace_buf, 0);
}
static int
ftrace_regex_release(struct inode *inode, struct file *file, int enable)
{
@@ -2128,38 +2718,23 @@ static const struct file_operations ftrace_graph_fops = {
static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
{
struct dentry *entry;
entry = debugfs_create_file("available_filter_functions", 0444,
d_tracer, NULL, &ftrace_avail_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'available_filter_functions' entry\n");
trace_create_file("available_filter_functions", 0444,
d_tracer, NULL, &ftrace_avail_fops);
entry = debugfs_create_file("failures", 0444,
d_tracer, NULL, &ftrace_failures_fops);
if (!entry)
pr_warning("Could not create debugfs 'failures' entry\n");
trace_create_file("failures", 0444,
d_tracer, NULL, &ftrace_failures_fops);
entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer,
NULL, &ftrace_filter_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'set_ftrace_filter' entry\n");
trace_create_file("set_ftrace_filter", 0644, d_tracer,
NULL, &ftrace_filter_fops);
entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer,
trace_create_file("set_ftrace_notrace", 0644, d_tracer,
NULL, &ftrace_notrace_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'set_ftrace_notrace' entry\n");
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
entry = debugfs_create_file("set_graph_function", 0444, d_tracer,
trace_create_file("set_graph_function", 0444, d_tracer,
NULL,
&ftrace_graph_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'set_graph_function' entry\n");
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
return 0;
@@ -2197,14 +2772,72 @@ static int ftrace_convert_nops(struct module *mod,
return 0;
}
void ftrace_init_module(struct module *mod,
unsigned long *start, unsigned long *end)
#ifdef CONFIG_MODULES
void ftrace_release(void *start, void *end)
{
struct dyn_ftrace *rec;
struct ftrace_page *pg;
unsigned long s = (unsigned long)start;
unsigned long e = (unsigned long)end;
if (ftrace_disabled || !start || start == end)
return;
mutex_lock(&ftrace_lock);
do_for_each_ftrace_rec(pg, rec) {
if ((rec->ip >= s) && (rec->ip < e)) {
/*
* rec->ip is changed in ftrace_free_rec()
* It should not between s and e if record was freed.
*/
FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
ftrace_free_rec(rec);
}
} while_for_each_ftrace_rec();
mutex_unlock(&ftrace_lock);
}
static void ftrace_init_module(struct module *mod,
unsigned long *start, unsigned long *end)
{
if (ftrace_disabled || start == end)
return;
ftrace_convert_nops(mod, start, end);
}
static int ftrace_module_notify(struct notifier_block *self,
unsigned long val, void *data)
{
struct module *mod = data;
switch (val) {
case MODULE_STATE_COMING:
ftrace_init_module(mod, mod->ftrace_callsites,
mod->ftrace_callsites +
mod->num_ftrace_callsites);
break;
case MODULE_STATE_GOING:
ftrace_release(mod->ftrace_callsites,
mod->ftrace_callsites +
mod->num_ftrace_callsites);
break;
}
return 0;
}
#else
static int ftrace_module_notify(struct notifier_block *self,
unsigned long val, void *data)
{
return 0;
}
#endif /* CONFIG_MODULES */
struct notifier_block ftrace_module_nb = {
.notifier_call = ftrace_module_notify,
.priority = 0,
};
extern unsigned long __start_mcount_loc[];
extern unsigned long __stop_mcount_loc[];
@@ -2236,6 +2869,12 @@ void __init ftrace_init(void)
__start_mcount_loc,
__stop_mcount_loc);
ret = register_module_notifier(&ftrace_module_nb);
if (ret)
pr_warning("Failed to register trace ftrace module notifier\n");
set_ftrace_early_filters();
return;
failed:
ftrace_disabled = 1;
@@ -2417,7 +3056,6 @@ static const struct file_operations ftrace_pid_fops = {
static __init int ftrace_init_debugfs(void)
{
struct dentry *d_tracer;
struct dentry *entry;
d_tracer = tracing_init_dentry();
if (!d_tracer)
@@ -2425,11 +3063,11 @@ static __init int ftrace_init_debugfs(void)
ftrace_init_dyn_debugfs(d_tracer);
entry = debugfs_create_file("set_ftrace_pid", 0644, d_tracer,
NULL, &ftrace_pid_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'set_ftrace_pid' entry\n");
trace_create_file("set_ftrace_pid", 0644, d_tracer,
NULL, &ftrace_pid_fops);
ftrace_profile_debugfs(d_tracer);
return 0;
}
fs_initcall(ftrace_init_debugfs);
@@ -2538,7 +3176,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static atomic_t ftrace_graph_active;
static int ftrace_graph_active;
static struct notifier_block ftrace_suspend_notifier;
int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
@@ -2580,12 +3218,12 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
}
if (t->ret_stack == NULL) {
t->curr_ret_stack = -1;
/* Make sure IRQs see the -1 first: */
barrier();
t->ret_stack = ret_stack_list[start++];
atomic_set(&t->tracing_graph_pause, 0);
atomic_set(&t->trace_overrun, 0);
t->curr_ret_stack = -1;
/* Make sure the tasks see the -1 first: */
smp_wmb();
t->ret_stack = ret_stack_list[start++];
}
} while_each_thread(g, t);
@@ -2643,8 +3281,10 @@ static int start_graph_tracing(void)
return -ENOMEM;
/* The cpu_boot init_task->ret_stack will never be freed */
for_each_online_cpu(cpu)
ftrace_graph_init_task(idle_task(cpu));
for_each_online_cpu(cpu) {
if (!idle_task(cpu)->ret_stack)
ftrace_graph_init_task(idle_task(cpu));
}
do {
ret = alloc_retstack_tasklist(ret_stack_list);
@@ -2690,7 +3330,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
mutex_lock(&ftrace_lock);
/* we currently allow only one tracer registered at a time */
if (atomic_read(&ftrace_graph_active)) {
if (ftrace_graph_active) {
ret = -EBUSY;
goto out;
}
@@ -2698,10 +3338,10 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
register_pm_notifier(&ftrace_suspend_notifier);
atomic_inc(&ftrace_graph_active);
ftrace_graph_active++;
ret = start_graph_tracing();
if (ret) {
atomic_dec(&ftrace_graph_active);
ftrace_graph_active--;
goto out;
}
@@ -2719,10 +3359,10 @@ void unregister_ftrace_graph(void)
{
mutex_lock(&ftrace_lock);
if (!unlikely(atomic_read(&ftrace_graph_active)))
if (unlikely(!ftrace_graph_active))
goto out;
atomic_dec(&ftrace_graph_active);
ftrace_graph_active--;
unregister_trace_sched_switch(ftrace_graph_probe_sched_switch);
ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
ftrace_graph_entry = ftrace_graph_entry_stub;
@@ -2736,18 +3376,25 @@ void unregister_ftrace_graph(void)
/* Allocate a return stack for newly created task */
void ftrace_graph_init_task(struct task_struct *t)
{
if (atomic_read(&ftrace_graph_active)) {
t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
/* Make sure we do not use the parent ret_stack */
t->ret_stack = NULL;
if (ftrace_graph_active) {
struct ftrace_ret_stack *ret_stack;
ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
* sizeof(struct ftrace_ret_stack),
GFP_KERNEL);
if (!t->ret_stack)
if (!ret_stack)
return;
t->curr_ret_stack = -1;
atomic_set(&t->tracing_graph_pause, 0);
atomic_set(&t->trace_overrun, 0);
t->ftrace_timestamp = 0;
} else
t->ret_stack = NULL;
/* make curr_ret_stack visable before we add the ret_stack */
smp_wmb();
t->ret_stack = ret_stack;
}
}
void ftrace_graph_exit_task(struct task_struct *t)

View File

@@ -12,7 +12,7 @@
#include <linux/dcache.h>
#include <linux/fs.h>
#include <trace/kmemtrace.h>
#include <linux/kmemtrace.h>
#include "trace_output.h"
#include "trace.h"
@@ -42,6 +42,7 @@ static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
gfp_t gfp_flags,
int node)
{
struct ftrace_event_call *call = &event_kmem_alloc;
struct trace_array *tr = kmemtrace_array;
struct kmemtrace_alloc_entry *entry;
struct ring_buffer_event *event;
@@ -62,7 +63,8 @@ static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
entry->gfp_flags = gfp_flags;
entry->node = node;
ring_buffer_unlock_commit(tr->buffer, event);
if (!filter_check_discard(call, entry, tr->buffer, event))
ring_buffer_unlock_commit(tr->buffer, event);
trace_wake_up();
}
@@ -71,6 +73,7 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
unsigned long call_site,
const void *ptr)
{
struct ftrace_event_call *call = &event_kmem_free;
struct trace_array *tr = kmemtrace_array;
struct kmemtrace_free_entry *entry;
struct ring_buffer_event *event;
@@ -86,7 +89,8 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
entry->call_site = call_site;
entry->ptr = ptr;
ring_buffer_unlock_commit(tr->buffer, event);
if (!filter_check_discard(call, entry, tr->buffer, event))
ring_buffer_unlock_commit(tr->buffer, event);
trace_wake_up();
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,416 @@
/*
* ring buffer tester and benchmark
*
* Copyright (C) 2009 Steven Rostedt <srostedt@redhat.com>
*/
#include <linux/ring_buffer.h>
#include <linux/completion.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/time.h>
struct rb_page {
u64 ts;
local_t commit;
char data[4080];
};
/* run time and sleep time in seconds */
#define RUN_TIME 10
#define SLEEP_TIME 10
/* number of events for writer to wake up the reader */
static int wakeup_interval = 100;
static int reader_finish;
static struct completion read_start;
static struct completion read_done;
static struct ring_buffer *buffer;
static struct task_struct *producer;
static struct task_struct *consumer;
static unsigned long read;
static int disable_reader;
module_param(disable_reader, uint, 0644);
MODULE_PARM_DESC(disable_reader, "only run producer");
static int read_events;
static int kill_test;
#define KILL_TEST() \
do { \
if (!kill_test) { \
kill_test = 1; \
WARN_ON(1); \
} \
} while (0)
enum event_status {
EVENT_FOUND,
EVENT_DROPPED,
};
static enum event_status read_event(int cpu)
{
struct ring_buffer_event *event;
int *entry;
u64 ts;
event = ring_buffer_consume(buffer, cpu, &ts);
if (!event)
return EVENT_DROPPED;
entry = ring_buffer_event_data(event);
if (*entry != cpu) {
KILL_TEST();
return EVENT_DROPPED;
}
read++;
return EVENT_FOUND;
}
static enum event_status read_page(int cpu)
{
struct ring_buffer_event *event;
struct rb_page *rpage;
unsigned long commit;
void *bpage;
int *entry;
int ret;
int inc;
int i;
bpage = ring_buffer_alloc_read_page(buffer);
if (!bpage)
return EVENT_DROPPED;
ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1);
if (ret >= 0) {
rpage = bpage;
commit = local_read(&rpage->commit);
for (i = 0; i < commit && !kill_test; i += inc) {
if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) {
KILL_TEST();
break;
}
inc = -1;
event = (void *)&rpage->data[i];
switch (event->type_len) {
case RINGBUF_TYPE_PADDING:
/* We don't expect any padding */
KILL_TEST();
break;
case RINGBUF_TYPE_TIME_EXTEND:
inc = 8;
break;
case 0:
entry = ring_buffer_event_data(event);
if (*entry != cpu) {
KILL_TEST();
break;
}
read++;
if (!event->array[0]) {
KILL_TEST();
break;
}
inc = event->array[0];
break;
default:
entry = ring_buffer_event_data(event);
if (*entry != cpu) {
KILL_TEST();
break;
}
read++;
inc = ((event->type_len + 1) * 4);
}
if (kill_test)
break;
if (inc <= 0) {
KILL_TEST();
break;
}
}
}
ring_buffer_free_read_page(buffer, bpage);
if (ret < 0)
return EVENT_DROPPED;
return EVENT_FOUND;
}
static void ring_buffer_consumer(void)
{
/* toggle between reading pages and events */
read_events ^= 1;
read = 0;
while (!reader_finish && !kill_test) {
int found;
do {
int cpu;
found = 0;
for_each_online_cpu(cpu) {
enum event_status stat;
if (read_events)
stat = read_event(cpu);
else
stat = read_page(cpu);
if (kill_test)
break;
if (stat == EVENT_FOUND)
found = 1;
}
} while (found && !kill_test);
set_current_state(TASK_INTERRUPTIBLE);
if (reader_finish)
break;
schedule();
__set_current_state(TASK_RUNNING);
}
reader_finish = 0;
complete(&read_done);
}
static void ring_buffer_producer(void)
{
struct timeval start_tv;
struct timeval end_tv;
unsigned long long time;
unsigned long long entries;
unsigned long long overruns;
unsigned long missed = 0;
unsigned long hit = 0;
unsigned long avg;
int cnt = 0;
/*
* Hammer the buffer for 10 secs (this may
* make the system stall)
*/
pr_info("Starting ring buffer hammer\n");
do_gettimeofday(&start_tv);
do {
struct ring_buffer_event *event;
int *entry;
event = ring_buffer_lock_reserve(buffer, 10);
if (!event) {
missed++;
} else {
hit++;
entry = ring_buffer_event_data(event);
*entry = smp_processor_id();
ring_buffer_unlock_commit(buffer, event);
}
do_gettimeofday(&end_tv);
cnt++;
if (consumer && !(cnt % wakeup_interval))
wake_up_process(consumer);
#ifndef CONFIG_PREEMPT
/*
* If we are a non preempt kernel, the 10 second run will
* stop everything while it runs. Instead, we will call
* cond_resched and also add any time that was lost by a
* rescedule.
*
* Do a cond resched at the same frequency we would wake up
* the reader.
*/
if (cnt % wakeup_interval)
cond_resched();
#endif
} while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test);
pr_info("End ring buffer hammer\n");
if (consumer) {
/* Init both completions here to avoid races */
init_completion(&read_start);
init_completion(&read_done);
/* the completions must be visible before the finish var */
smp_wmb();
reader_finish = 1;
/* finish var visible before waking up the consumer */
smp_wmb();
wake_up_process(consumer);
wait_for_completion(&read_done);
}
time = end_tv.tv_sec - start_tv.tv_sec;
time *= USEC_PER_SEC;
time += (long long)((long)end_tv.tv_usec - (long)start_tv.tv_usec);
entries = ring_buffer_entries(buffer);
overruns = ring_buffer_overruns(buffer);
if (kill_test)
pr_info("ERROR!\n");
pr_info("Time: %lld (usecs)\n", time);
pr_info("Overruns: %lld\n", overruns);
if (disable_reader)
pr_info("Read: (reader disabled)\n");
else
pr_info("Read: %ld (by %s)\n", read,
read_events ? "events" : "pages");
pr_info("Entries: %lld\n", entries);
pr_info("Total: %lld\n", entries + overruns + read);
pr_info("Missed: %ld\n", missed);
pr_info("Hit: %ld\n", hit);
/* Convert time from usecs to millisecs */
do_div(time, USEC_PER_MSEC);
if (time)
hit /= (long)time;
else
pr_info("TIME IS ZERO??\n");
pr_info("Entries per millisec: %ld\n", hit);
if (hit) {
/* Calculate the average time in nanosecs */
avg = NSEC_PER_MSEC / hit;
pr_info("%ld ns per entry\n", avg);
}
if (missed) {
if (time)
missed /= (long)time;
pr_info("Total iterations per millisec: %ld\n", hit + missed);
/* it is possible that hit + missed will overflow and be zero */
if (!(hit + missed)) {
pr_info("hit + missed overflowed and totalled zero!\n");
hit--; /* make it non zero */
}
/* Caculate the average time in nanosecs */
avg = NSEC_PER_MSEC / (hit + missed);
pr_info("%ld ns per entry\n", avg);
}
}
static void wait_to_die(void)
{
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop()) {
schedule();
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
}
static int ring_buffer_consumer_thread(void *arg)
{
while (!kthread_should_stop() && !kill_test) {
complete(&read_start);
ring_buffer_consumer();
set_current_state(TASK_INTERRUPTIBLE);
if (kthread_should_stop() || kill_test)
break;
schedule();
__set_current_state(TASK_RUNNING);
}
__set_current_state(TASK_RUNNING);
if (kill_test)
wait_to_die();
return 0;
}
static int ring_buffer_producer_thread(void *arg)
{
init_completion(&read_start);
while (!kthread_should_stop() && !kill_test) {
ring_buffer_reset(buffer);
if (consumer) {
smp_wmb();
wake_up_process(consumer);
wait_for_completion(&read_start);
}
ring_buffer_producer();
pr_info("Sleeping for 10 secs\n");
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(HZ * SLEEP_TIME);
__set_current_state(TASK_RUNNING);
}
if (kill_test)
wait_to_die();
return 0;
}
static int __init ring_buffer_benchmark_init(void)
{
int ret;
/* make a one meg buffer in overwite mode */
buffer = ring_buffer_alloc(1000000, RB_FL_OVERWRITE);
if (!buffer)
return -ENOMEM;
if (!disable_reader) {
consumer = kthread_create(ring_buffer_consumer_thread,
NULL, "rb_consumer");
ret = PTR_ERR(consumer);
if (IS_ERR(consumer))
goto out_fail;
}
producer = kthread_run(ring_buffer_producer_thread,
NULL, "rb_producer");
ret = PTR_ERR(producer);
if (IS_ERR(producer))
goto out_kill;
return 0;
out_kill:
if (consumer)
kthread_stop(consumer);
out_fail:
ring_buffer_free(buffer);
return ret;
}
static void __exit ring_buffer_benchmark_exit(void)
{
kthread_stop(producer);
if (consumer)
kthread_stop(consumer);
ring_buffer_free(buffer);
}
module_init(ring_buffer_benchmark_init);
module_exit(ring_buffer_benchmark_exit);
MODULE_AUTHOR("Steven Rostedt");
MODULE_DESCRIPTION("ring_buffer_benchmark");
MODULE_LICENSE("GPL");

View File

@@ -171,6 +171,13 @@ static struct trace_array global_trace;
static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
int filter_current_check_discard(struct ftrace_event_call *call, void *rec,
struct ring_buffer_event *event)
{
return filter_check_discard(call, rec, global_trace.buffer, event);
}
EXPORT_SYMBOL_GPL(filter_current_check_discard);
cycle_t ftrace_now(int cpu)
{
u64 ts;
@@ -255,7 +262,8 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
/* trace_flags holds trace_options default values */
unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME;
TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
TRACE_ITER_GRAPH_TIME;
/**
* trace_wake_up - wake up tasks waiting for trace input
@@ -317,6 +325,7 @@ static const char *trace_options[] = {
"latency-format",
"global-clock",
"sleep-time",
"graph-time",
NULL
};
@@ -402,17 +411,6 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
return cnt;
}
static void
trace_print_seq(struct seq_file *m, struct trace_seq *s)
{
int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
s->buffer[len] = 0;
seq_puts(m, s->buffer);
trace_seq_init(s);
}
/**
* update_max_tr - snapshot all trace buffers from global_trace to max_tr
* @tr: tracer
@@ -641,6 +639,16 @@ void tracing_reset_online_cpus(struct trace_array *tr)
tracing_reset(tr, cpu);
}
void tracing_reset_current(int cpu)
{
tracing_reset(&global_trace, cpu);
}
void tracing_reset_current_online_cpus(void)
{
tracing_reset_online_cpus(&global_trace);
}
#define SAVED_CMDLINES 128
#define NO_CMDLINE_MAP UINT_MAX
static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
@@ -800,6 +808,7 @@ void trace_find_cmdline(int pid, char comm[])
return;
}
preempt_disable();
__raw_spin_lock(&trace_cmdline_lock);
map = map_pid_to_cmdline[pid];
if (map != NO_CMDLINE_MAP)
@@ -808,6 +817,7 @@ void trace_find_cmdline(int pid, char comm[])
strcpy(comm, "<...>");
__raw_spin_unlock(&trace_cmdline_lock);
preempt_enable();
}
void tracing_record_cmdline(struct task_struct *tsk)
@@ -840,7 +850,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
}
struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
unsigned char type,
int type,
unsigned long len,
unsigned long flags, int pc)
{
@@ -883,30 +893,40 @@ void trace_buffer_unlock_commit(struct trace_array *tr,
}
struct ring_buffer_event *
trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
trace_current_buffer_lock_reserve(int type, unsigned long len,
unsigned long flags, int pc)
{
return trace_buffer_lock_reserve(&global_trace,
type, len, flags, pc);
}
EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
unsigned long flags, int pc)
{
return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
__trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
}
EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
unsigned long flags, int pc)
{
return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
__trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
}
EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
void trace_current_buffer_discard_commit(struct ring_buffer_event *event)
{
ring_buffer_discard_commit(global_trace.buffer, event);
}
EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
void
trace_function(struct trace_array *tr,
unsigned long ip, unsigned long parent_ip, unsigned long flags,
int pc)
{
struct ftrace_event_call *call = &event_function;
struct ring_buffer_event *event;
struct ftrace_entry *entry;
@@ -921,7 +941,9 @@ trace_function(struct trace_array *tr,
entry = ring_buffer_event_data(event);
entry->ip = ip;
entry->parent_ip = parent_ip;
ring_buffer_unlock_commit(tr->buffer, event);
if (!filter_check_discard(call, entry, tr->buffer, event))
ring_buffer_unlock_commit(tr->buffer, event);
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -930,6 +952,7 @@ static int __trace_graph_entry(struct trace_array *tr,
unsigned long flags,
int pc)
{
struct ftrace_event_call *call = &event_funcgraph_entry;
struct ring_buffer_event *event;
struct ftrace_graph_ent_entry *entry;
@@ -942,7 +965,8 @@ static int __trace_graph_entry(struct trace_array *tr,
return 0;
entry = ring_buffer_event_data(event);
entry->graph_ent = *trace;
ring_buffer_unlock_commit(global_trace.buffer, event);
if (!filter_current_check_discard(call, entry, event))
ring_buffer_unlock_commit(global_trace.buffer, event);
return 1;
}
@@ -952,6 +976,7 @@ static void __trace_graph_return(struct trace_array *tr,
unsigned long flags,
int pc)
{
struct ftrace_event_call *call = &event_funcgraph_exit;
struct ring_buffer_event *event;
struct ftrace_graph_ret_entry *entry;
@@ -964,7 +989,8 @@ static void __trace_graph_return(struct trace_array *tr,
return;
entry = ring_buffer_event_data(event);
entry->ret = *trace;
ring_buffer_unlock_commit(global_trace.buffer, event);
if (!filter_current_check_discard(call, entry, event))
ring_buffer_unlock_commit(global_trace.buffer, event);
}
#endif
@@ -982,6 +1008,7 @@ static void __ftrace_trace_stack(struct trace_array *tr,
int skip, int pc)
{
#ifdef CONFIG_STACKTRACE
struct ftrace_event_call *call = &event_kernel_stack;
struct ring_buffer_event *event;
struct stack_entry *entry;
struct stack_trace trace;
@@ -999,7 +1026,8 @@ static void __ftrace_trace_stack(struct trace_array *tr,
trace.entries = entry->caller;
save_stack_trace(&trace);
ring_buffer_unlock_commit(tr->buffer, event);
if (!filter_check_discard(call, entry, tr->buffer, event))
ring_buffer_unlock_commit(tr->buffer, event);
#endif
}
@@ -1024,6 +1052,7 @@ static void ftrace_trace_userstack(struct trace_array *tr,
unsigned long flags, int pc)
{
#ifdef CONFIG_STACKTRACE
struct ftrace_event_call *call = &event_user_stack;
struct ring_buffer_event *event;
struct userstack_entry *entry;
struct stack_trace trace;
@@ -1045,7 +1074,8 @@ static void ftrace_trace_userstack(struct trace_array *tr,
trace.entries = entry->caller;
save_stack_trace_user(&trace);
ring_buffer_unlock_commit(tr->buffer, event);
if (!filter_check_discard(call, entry, tr->buffer, event))
ring_buffer_unlock_commit(tr->buffer, event);
#endif
}
@@ -1089,6 +1119,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
struct task_struct *next,
unsigned long flags, int pc)
{
struct ftrace_event_call *call = &event_context_switch;
struct ring_buffer_event *event;
struct ctx_switch_entry *entry;
@@ -1104,7 +1135,9 @@ tracing_sched_switch_trace(struct trace_array *tr,
entry->next_prio = next->prio;
entry->next_state = next->state;
entry->next_cpu = task_cpu(next);
trace_buffer_unlock_commit(tr, event, flags, pc);
if (!filter_check_discard(call, entry, tr->buffer, event))
trace_buffer_unlock_commit(tr, event, flags, pc);
}
void
@@ -1113,6 +1146,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
struct task_struct *curr,
unsigned long flags, int pc)
{
struct ftrace_event_call *call = &event_wakeup;
struct ring_buffer_event *event;
struct ctx_switch_entry *entry;
@@ -1129,7 +1163,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
entry->next_state = wakee->state;
entry->next_cpu = task_cpu(wakee);
ring_buffer_unlock_commit(tr->buffer, event);
if (!filter_check_discard(call, entry, tr->buffer, event))
ring_buffer_unlock_commit(tr->buffer, event);
ftrace_trace_stack(tr, flags, 6, pc);
ftrace_trace_userstack(tr, flags, pc);
}
@@ -1230,11 +1265,13 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
static u32 trace_buf[TRACE_BUF_SIZE];
struct ftrace_event_call *call = &event_bprint;
struct ring_buffer_event *event;
struct trace_array *tr = &global_trace;
struct trace_array_cpu *data;
struct bprint_entry *entry;
unsigned long flags;
int disable;
int resched;
int cpu, len = 0, size, pc;
@@ -1249,7 +1286,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
cpu = raw_smp_processor_id();
data = tr->data[cpu];
if (unlikely(atomic_read(&data->disabled)))
disable = atomic_inc_return(&data->disabled);
if (unlikely(disable != 1))
goto out;
/* Lockdep uses trace_printk for lock tracing */
@@ -1269,13 +1307,15 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
entry->fmt = fmt;
memcpy(entry->buf, trace_buf, sizeof(u32) * len);
ring_buffer_unlock_commit(tr->buffer, event);
if (!filter_check_discard(call, entry, tr->buffer, event))
ring_buffer_unlock_commit(tr->buffer, event);
out_unlock:
__raw_spin_unlock(&trace_buf_lock);
local_irq_restore(flags);
out:
atomic_dec_return(&data->disabled);
ftrace_preempt_enable(resched);
unpause_graph_tracing();
@@ -1288,12 +1328,14 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
static char trace_buf[TRACE_BUF_SIZE];
struct ftrace_event_call *call = &event_print;
struct ring_buffer_event *event;
struct trace_array *tr = &global_trace;
struct trace_array_cpu *data;
int cpu, len = 0, size, pc;
struct print_entry *entry;
unsigned long irq_flags;
int disable;
if (tracing_disabled || tracing_selftest_running)
return 0;
@@ -1303,7 +1345,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
cpu = raw_smp_processor_id();
data = tr->data[cpu];
if (unlikely(atomic_read(&data->disabled)))
disable = atomic_inc_return(&data->disabled);
if (unlikely(disable != 1))
goto out;
pause_graph_tracing();
@@ -1323,13 +1366,15 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
memcpy(&entry->buf, trace_buf, len);
entry->buf[len] = 0;
ring_buffer_unlock_commit(tr->buffer, event);
if (!filter_check_discard(call, entry, tr->buffer, event))
ring_buffer_unlock_commit(tr->buffer, event);
out_unlock:
__raw_spin_unlock(&trace_buf_lock);
raw_local_irq_restore(irq_flags);
unpause_graph_tracing();
out:
atomic_dec_return(&data->disabled);
preempt_enable_notrace();
return len;
@@ -1526,12 +1571,14 @@ static void *s_start(struct seq_file *m, loff_t *pos)
p = s_next(m, p, &l);
}
trace_event_read_lock();
return p;
}
static void s_stop(struct seq_file *m, void *p)
{
atomic_dec(&trace_record_cmdline_disabled);
trace_event_read_unlock();
}
static void print_lat_help_header(struct seq_file *m)
@@ -1774,6 +1821,7 @@ static int trace_empty(struct trace_iterator *iter)
return 1;
}
/* Called with trace_event_read_lock() held. */
static enum print_line_t print_trace_line(struct trace_iterator *iter)
{
enum print_line_t ret;
@@ -2396,6 +2444,56 @@ static const struct file_operations tracing_readme_fops = {
.read = tracing_readme_read,
};
static ssize_t
tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char *buf_comm;
char *file_buf;
char *buf;
int len = 0;
int pid;
int i;
file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
if (!file_buf)
return -ENOMEM;
buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
if (!buf_comm) {
kfree(file_buf);
return -ENOMEM;
}
buf = file_buf;
for (i = 0; i < SAVED_CMDLINES; i++) {
int r;
pid = map_cmdline_to_pid[i];
if (pid == -1 || pid == NO_CMDLINE_MAP)
continue;
trace_find_cmdline(pid, buf_comm);
r = sprintf(buf, "%d %s\n", pid, buf_comm);
buf += r;
len += r;
}
len = simple_read_from_buffer(ubuf, cnt, ppos,
file_buf, len);
kfree(file_buf);
kfree(buf_comm);
return len;
}
static const struct file_operations tracing_saved_cmdlines_fops = {
.open = tracing_open_generic,
.read = tracing_saved_cmdlines_read,
};
static ssize_t
tracing_ctrl_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
@@ -2728,6 +2826,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
/* trace pipe does not show start of buffer */
cpumask_setall(iter->started);
if (trace_flags & TRACE_ITER_LATENCY_FMT)
iter->iter_flags |= TRACE_FILE_LAT_FMT;
iter->cpu_file = cpu_file;
iter->tr = &global_trace;
mutex_init(&iter->mutex);
@@ -2915,6 +3016,7 @@ waitagain:
offsetof(struct trace_iterator, seq));
iter->pos = -1;
trace_event_read_lock();
while (find_next_entry_inc(iter) != NULL) {
enum print_line_t ret;
int len = iter->seq.len;
@@ -2931,6 +3033,7 @@ waitagain:
if (iter->seq.len >= cnt)
break;
}
trace_event_read_unlock();
/* Now copy what we have to the user */
sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
@@ -3053,6 +3156,8 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
goto out_err;
}
trace_event_read_lock();
/* Fill as many pages as possible. */
for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
pages[i] = alloc_page(GFP_KERNEL);
@@ -3075,6 +3180,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
trace_seq_init(&iter->seq);
}
trace_event_read_unlock();
mutex_unlock(&iter->mutex);
spd.nr_pages = i;
@@ -3425,7 +3531,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
.spd_release = buffer_spd_release,
};
struct buffer_ref *ref;
int size, i;
int entries, size, i;
size_t ret;
if (*ppos & (PAGE_SIZE - 1)) {
@@ -3440,7 +3546,9 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
len &= PAGE_MASK;
}
for (i = 0; i < PIPE_BUFFERS && len; i++, len -= PAGE_SIZE) {
entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
struct page *page;
int r;
@@ -3457,7 +3565,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
}
r = ring_buffer_read_page(ref->buffer, &ref->page,
len, info->cpu, 0);
len, info->cpu, 1);
if (r < 0) {
ring_buffer_free_read_page(ref->buffer,
ref->page);
@@ -3481,6 +3589,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
spd.partial[i].private = (unsigned long)ref;
spd.nr_pages++;
*ppos += PAGE_SIZE;
entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
}
spd.nr_pages = i;
@@ -3508,6 +3618,45 @@ static const struct file_operations tracing_buffers_fops = {
.llseek = no_llseek,
};
static ssize_t
tracing_stats_read(struct file *filp, char __user *ubuf,
size_t count, loff_t *ppos)
{
unsigned long cpu = (unsigned long)filp->private_data;
struct trace_array *tr = &global_trace;
struct trace_seq *s;
unsigned long cnt;
s = kmalloc(sizeof(*s), GFP_ATOMIC);
if (!s)
return ENOMEM;
trace_seq_init(s);
cnt = ring_buffer_entries_cpu(tr->buffer, cpu);
trace_seq_printf(s, "entries: %ld\n", cnt);
cnt = ring_buffer_overrun_cpu(tr->buffer, cpu);
trace_seq_printf(s, "overrun: %ld\n", cnt);
cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
trace_seq_printf(s, "commit overrun: %ld\n", cnt);
cnt = ring_buffer_nmi_dropped_cpu(tr->buffer, cpu);
trace_seq_printf(s, "nmi dropped: %ld\n", cnt);
count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
kfree(s);
return count;
}
static const struct file_operations tracing_stats_fops = {
.open = tracing_open_generic,
.read = tracing_stats_read,
};
#ifdef CONFIG_DYNAMIC_FTRACE
int __weak ftrace_arch_read_dyn_info(char *buf, int size)
@@ -3597,7 +3746,7 @@ struct dentry *tracing_dentry_percpu(void)
static void tracing_init_debugfs_percpu(long cpu)
{
struct dentry *d_percpu = tracing_dentry_percpu();
struct dentry *entry, *d_cpu;
struct dentry *d_cpu;
/* strlen(cpu) + MAX(log10(cpu)) + '\0' */
char cpu_dir[7];
@@ -3612,21 +3761,18 @@ static void tracing_init_debugfs_percpu(long cpu)
}
/* per cpu trace_pipe */
entry = debugfs_create_file("trace_pipe", 0444, d_cpu,
(void *) cpu, &tracing_pipe_fops);
if (!entry)
pr_warning("Could not create debugfs 'trace_pipe' entry\n");
trace_create_file("trace_pipe", 0444, d_cpu,
(void *) cpu, &tracing_pipe_fops);
/* per cpu trace */
entry = debugfs_create_file("trace", 0644, d_cpu,
(void *) cpu, &tracing_fops);
if (!entry)
pr_warning("Could not create debugfs 'trace' entry\n");
trace_create_file("trace", 0644, d_cpu,
(void *) cpu, &tracing_fops);
entry = debugfs_create_file("trace_pipe_raw", 0444, d_cpu,
(void *) cpu, &tracing_buffers_fops);
if (!entry)
pr_warning("Could not create debugfs 'trace_pipe_raw' entry\n");
trace_create_file("trace_pipe_raw", 0444, d_cpu,
(void *) cpu, &tracing_buffers_fops);
trace_create_file("stats", 0444, d_cpu,
(void *) cpu, &tracing_stats_fops);
}
#ifdef CONFIG_FTRACE_SELFTEST
@@ -3782,6 +3928,22 @@ static const struct file_operations trace_options_core_fops = {
.write = trace_options_core_write,
};
struct dentry *trace_create_file(const char *name,
mode_t mode,
struct dentry *parent,
void *data,
const struct file_operations *fops)
{
struct dentry *ret;
ret = debugfs_create_file(name, mode, parent, data, fops);
if (!ret)
pr_warning("Could not create debugfs '%s' entry\n", name);
return ret;
}
static struct dentry *trace_options_init_dentry(void)
{
struct dentry *d_tracer;
@@ -3809,7 +3971,6 @@ create_trace_option_file(struct trace_option_dentry *topt,
struct tracer_opt *opt)
{
struct dentry *t_options;
struct dentry *entry;
t_options = trace_options_init_dentry();
if (!t_options)
@@ -3818,11 +3979,9 @@ create_trace_option_file(struct trace_option_dentry *topt,
topt->flags = flags;
topt->opt = opt;
entry = debugfs_create_file(opt->name, 0644, t_options, topt,
topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
&trace_options_fops);
topt->entry = entry;
}
static struct trace_option_dentry *
@@ -3877,123 +4036,84 @@ static struct dentry *
create_trace_option_core_file(const char *option, long index)
{
struct dentry *t_options;
struct dentry *entry;
t_options = trace_options_init_dentry();
if (!t_options)
return NULL;
entry = debugfs_create_file(option, 0644, t_options, (void *)index,
return trace_create_file(option, 0644, t_options, (void *)index,
&trace_options_core_fops);
return entry;
}
static __init void create_trace_options_dir(void)
{
struct dentry *t_options;
struct dentry *entry;
int i;
t_options = trace_options_init_dentry();
if (!t_options)
return;
for (i = 0; trace_options[i]; i++) {
entry = create_trace_option_core_file(trace_options[i], i);
if (!entry)
pr_warning("Could not create debugfs %s entry\n",
trace_options[i]);
}
for (i = 0; trace_options[i]; i++)
create_trace_option_core_file(trace_options[i], i);
}
static __init int tracer_init_debugfs(void)
{
struct dentry *d_tracer;
struct dentry *entry;
int cpu;
d_tracer = tracing_init_dentry();
entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
&global_trace, &tracing_ctrl_fops);
if (!entry)
pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
trace_create_file("tracing_enabled", 0644, d_tracer,
&global_trace, &tracing_ctrl_fops);
entry = debugfs_create_file("trace_options", 0644, d_tracer,
NULL, &tracing_iter_fops);
if (!entry)
pr_warning("Could not create debugfs 'trace_options' entry\n");
trace_create_file("trace_options", 0644, d_tracer,
NULL, &tracing_iter_fops);
create_trace_options_dir();
trace_create_file("tracing_cpumask", 0644, d_tracer,
NULL, &tracing_cpumask_fops);
entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
NULL, &tracing_cpumask_fops);
if (!entry)
pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
trace_create_file("trace", 0644, d_tracer,
(void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
entry = debugfs_create_file("trace", 0644, d_tracer,
(void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
if (!entry)
pr_warning("Could not create debugfs 'trace' entry\n");
trace_create_file("available_tracers", 0444, d_tracer,
&global_trace, &show_traces_fops);
entry = debugfs_create_file("available_tracers", 0444, d_tracer,
&global_trace, &show_traces_fops);
if (!entry)
pr_warning("Could not create debugfs 'available_tracers' entry\n");
trace_create_file("current_tracer", 0644, d_tracer,
&global_trace, &set_tracer_fops);
entry = debugfs_create_file("current_tracer", 0444, d_tracer,
&global_trace, &set_tracer_fops);
if (!entry)
pr_warning("Could not create debugfs 'current_tracer' entry\n");
trace_create_file("tracing_max_latency", 0644, d_tracer,
&tracing_max_latency, &tracing_max_lat_fops);
entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
&tracing_max_latency,
&tracing_max_lat_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'tracing_max_latency' entry\n");
trace_create_file("tracing_thresh", 0644, d_tracer,
&tracing_thresh, &tracing_max_lat_fops);
entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
&tracing_thresh, &tracing_max_lat_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'tracing_thresh' entry\n");
entry = debugfs_create_file("README", 0644, d_tracer,
NULL, &tracing_readme_fops);
if (!entry)
pr_warning("Could not create debugfs 'README' entry\n");
trace_create_file("README", 0444, d_tracer,
NULL, &tracing_readme_fops);
entry = debugfs_create_file("trace_pipe", 0444, d_tracer,
trace_create_file("trace_pipe", 0444, d_tracer,
(void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'trace_pipe' entry\n");
entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer,
&global_trace, &tracing_entries_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'buffer_size_kb' entry\n");
trace_create_file("buffer_size_kb", 0644, d_tracer,
&global_trace, &tracing_entries_fops);
entry = debugfs_create_file("trace_marker", 0220, d_tracer,
NULL, &tracing_mark_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'trace_marker' entry\n");
trace_create_file("trace_marker", 0220, d_tracer,
NULL, &tracing_mark_fops);
trace_create_file("saved_cmdlines", 0444, d_tracer,
NULL, &tracing_saved_cmdlines_fops);
#ifdef CONFIG_DYNAMIC_FTRACE
entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
&ftrace_update_tot_cnt,
&tracing_dyn_info_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'dyn_ftrace_total_info' entry\n");
trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
#endif
#ifdef CONFIG_SYSPROF_TRACER
init_tracer_sysprof_debugfs(d_tracer);
#endif
create_trace_options_dir();
for_each_tracing_cpu(cpu)
tracing_init_debugfs_percpu(cpu);
@@ -4064,7 +4184,8 @@ trace_printk_seq(struct trace_seq *s)
static void __ftrace_dump(bool disable_tracing)
{
static DEFINE_SPINLOCK(ftrace_dump_lock);
static raw_spinlock_t ftrace_dump_lock =
(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
/* use static because iter can be a bit big for the stack */
static struct trace_iterator iter;
unsigned int old_userobj;
@@ -4073,7 +4194,8 @@ static void __ftrace_dump(bool disable_tracing)
int cnt = 0, cpu;
/* only one dump */
spin_lock_irqsave(&ftrace_dump_lock, flags);
local_irq_save(flags);
__raw_spin_lock(&ftrace_dump_lock);
if (dump_ran)
goto out;
@@ -4145,7 +4267,8 @@ static void __ftrace_dump(bool disable_tracing)
}
out:
spin_unlock_irqrestore(&ftrace_dump_lock, flags);
__raw_spin_unlock(&ftrace_dump_lock);
local_irq_restore(flags);
}
/* By default: disable tracing after the dump */

View File

@@ -9,9 +9,12 @@
#include <linux/mmiotrace.h>
#include <linux/ftrace.h>
#include <trace/boot.h>
#include <trace/kmemtrace.h>
#include <linux/kmemtrace.h>
#include <trace/power.h>
#include <linux/trace_seq.h>
#include <linux/ftrace_event.h>
enum trace_type {
__TRACE_FIRST_TYPE = 0,
@@ -41,20 +44,6 @@ enum trace_type {
__TRACE_LAST_TYPE,
};
/*
* The trace entry - the most basic unit of tracing. This is what
* is printed in the end as a single line in the trace output, such as:
*
* bash-15816 [01] 235.197585: idle_cpu <- irq_enter
*/
struct trace_entry {
unsigned char type;
unsigned char flags;
unsigned char preempt_count;
int pid;
int tgid;
};
/*
* Function trace entry - function address and parent function addres:
*/
@@ -263,8 +252,6 @@ struct trace_array_cpu {
char comm[TASK_COMM_LEN];
};
struct trace_iterator;
/*
* The trace array - an array of per-CPU trace arrays. This is the
* highest level data structure that individual tracers deal with.
@@ -339,15 +326,6 @@ extern void __ftrace_bad_type(void);
__ftrace_bad_type(); \
} while (0)
/* Return values for print_line callback */
enum print_line_t {
TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */
TRACE_TYPE_HANDLED = 1,
TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */
TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */
};
/*
* An option specific to a tracer. This is a boolean value.
* The bit is the bit index that sets its value on the
@@ -423,60 +401,30 @@ struct tracer {
struct tracer_stat *stats;
};
struct trace_seq {
unsigned char buffer[PAGE_SIZE];
unsigned int len;
unsigned int readpos;
};
static inline void
trace_seq_init(struct trace_seq *s)
{
s->len = 0;
s->readpos = 0;
}
#define TRACE_PIPE_ALL_CPU -1
/*
* Trace iterator - used by printout routines who present trace
* results to users and which routines might sleep, etc:
*/
struct trace_iterator {
struct trace_array *tr;
struct tracer *trace;
void *private;
int cpu_file;
struct mutex mutex;
struct ring_buffer_iter *buffer_iter[NR_CPUS];
/* The below is zeroed out in pipe_read */
struct trace_seq seq;
struct trace_entry *ent;
int cpu;
u64 ts;
unsigned long iter_flags;
loff_t pos;
long idx;
cpumask_var_t started;
};
int tracer_init(struct tracer *t, struct trace_array *tr);
int tracing_is_enabled(void);
void trace_wake_up(void);
void tracing_reset(struct trace_array *tr, int cpu);
void tracing_reset_online_cpus(struct trace_array *tr);
void tracing_reset_current(int cpu);
void tracing_reset_current_online_cpus(void);
int tracing_open_generic(struct inode *inode, struct file *filp);
struct dentry *trace_create_file(const char *name,
mode_t mode,
struct dentry *parent,
void *data,
const struct file_operations *fops);
struct dentry *tracing_init_dentry(void);
void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
struct ring_buffer_event;
struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
unsigned char type,
int type,
unsigned long len,
unsigned long flags,
int pc);
@@ -484,14 +432,6 @@ void trace_buffer_unlock_commit(struct trace_array *tr,
struct ring_buffer_event *event,
unsigned long flags, int pc);
struct ring_buffer_event *
trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
unsigned long flags, int pc);
void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
unsigned long flags, int pc);
void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
unsigned long flags, int pc);
struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
struct trace_array_cpu *data);
@@ -514,7 +454,6 @@ void tracing_sched_switch_trace(struct trace_array *tr,
struct task_struct *prev,
struct task_struct *next,
unsigned long flags, int pc);
void tracing_record_cmdline(struct task_struct *tsk);
void tracing_sched_wakeup_trace(struct trace_array *tr,
struct task_struct *wakee,
@@ -599,6 +538,8 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace,
struct trace_array *tr);
extern int trace_selftest_startup_branch(struct tracer *trace,
struct trace_array *tr);
extern int trace_selftest_startup_hw_branches(struct tracer *trace,
struct trace_array *tr);
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
@@ -613,6 +554,8 @@ extern unsigned long trace_flags;
/* Standard output formatting function used for function return traces */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
extern enum print_line_t print_graph_function(struct trace_iterator *iter);
extern enum print_line_t
trace_print_graph_duration(unsigned long long duration, struct trace_seq *s);
#ifdef CONFIG_DYNAMIC_FTRACE
/* TODO: make this variable */
@@ -644,7 +587,6 @@ static inline int ftrace_graph_addr(unsigned long addr)
return 1;
}
#endif /* CONFIG_DYNAMIC_FTRACE */
#else /* CONFIG_FUNCTION_GRAPH_TRACER */
static inline enum print_line_t
print_graph_function(struct trace_iterator *iter)
@@ -692,6 +634,7 @@ enum trace_iterator_flags {
TRACE_ITER_LATENCY_FMT = 0x40000,
TRACE_ITER_GLOBAL_CLK = 0x80000,
TRACE_ITER_SLEEP_TIME = 0x100000,
TRACE_ITER_GRAPH_TIME = 0x200000,
};
/*
@@ -790,103 +733,113 @@ struct ftrace_event_field {
char *type;
int offset;
int size;
int is_signed;
};
struct ftrace_event_call {
char *name;
char *system;
struct dentry *dir;
int enabled;
int (*regfunc)(void);
void (*unregfunc)(void);
int id;
int (*raw_init)(void);
int (*show_format)(struct trace_seq *s);
int (*define_fields)(void);
struct list_head fields;
struct event_filter {
int n_preds;
struct filter_pred **preds;
#ifdef CONFIG_EVENT_PROFILE
atomic_t profile_count;
int (*profile_enable)(struct ftrace_event_call *);
void (*profile_disable)(struct ftrace_event_call *);
#endif
char *filter_string;
};
struct event_subsystem {
struct list_head list;
const char *name;
struct dentry *entry;
struct filter_pred **preds;
void *filter;
};
#define events_for_each(event) \
for (event = __start_ftrace_events; \
(unsigned long)event < (unsigned long)__stop_ftrace_events; \
event++)
#define MAX_FILTER_PRED 8
struct filter_pred;
typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
int val1, int val2);
struct filter_pred {
filter_pred_fn_t fn;
u64 val;
char *str_val;
char str_val[MAX_FILTER_STR_VAL];
int str_len;
char *field_name;
int offset;
int not;
int or;
int compound;
int clear;
int op;
int pop_n;
};
int trace_define_field(struct ftrace_event_call *call, char *type,
char *name, int offset, int size);
extern void filter_free_pred(struct filter_pred *pred);
extern void filter_print_preds(struct filter_pred **preds,
extern void print_event_filter(struct ftrace_event_call *call,
struct trace_seq *s);
extern int filter_parse(char **pbuf, struct filter_pred *pred);
extern int filter_add_pred(struct ftrace_event_call *call,
struct filter_pred *pred);
extern void filter_free_preds(struct ftrace_event_call *call);
extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
extern void filter_free_subsystem_preds(struct event_subsystem *system);
extern int filter_add_subsystem_pred(struct event_subsystem *system,
struct filter_pred *pred);
extern int apply_event_filter(struct ftrace_event_call *call,
char *filter_string);
extern int apply_subsystem_event_filter(struct event_subsystem *system,
char *filter_string);
extern void print_subsystem_event_filter(struct event_subsystem *system,
struct trace_seq *s);
void event_trace_printk(unsigned long ip, const char *fmt, ...);
extern struct ftrace_event_call __start_ftrace_events[];
extern struct ftrace_event_call __stop_ftrace_events[];
static inline int
filter_check_discard(struct ftrace_event_call *call, void *rec,
struct ring_buffer *buffer,
struct ring_buffer_event *event)
{
if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) {
ring_buffer_discard_commit(buffer, event);
return 1;
}
#define for_each_event(event) \
for (event = __start_ftrace_events; \
(unsigned long)event < (unsigned long)__stop_ftrace_events; \
event++)
return 0;
}
#define DEFINE_COMPARISON_PRED(type) \
static int filter_pred_##type(struct filter_pred *pred, void *event, \
int val1, int val2) \
{ \
type *addr = (type *)(event + pred->offset); \
type val = (type)pred->val; \
int match = 0; \
\
switch (pred->op) { \
case OP_LT: \
match = (*addr < val); \
break; \
case OP_LE: \
match = (*addr <= val); \
break; \
case OP_GT: \
match = (*addr > val); \
break; \
case OP_GE: \
match = (*addr >= val); \
break; \
default: \
break; \
} \
\
return match; \
}
#define DEFINE_EQUALITY_PRED(size) \
static int filter_pred_##size(struct filter_pred *pred, void *event, \
int val1, int val2) \
{ \
u##size *addr = (u##size *)(event + pred->offset); \
u##size val = (u##size)pred->val; \
int match; \
\
match = (val == *addr) ^ pred->not; \
\
return match; \
}
extern struct mutex event_mutex;
extern struct list_head ftrace_events;
extern const char *__start___trace_bprintk_fmt[];
extern const char *__stop___trace_bprintk_fmt[];
/*
* The double __builtin_constant_p is because gcc will give us an error
* if we try to allocate the static variable to fmt if it is not a
* constant. Even with the outer if statement optimizing out.
*/
#define event_trace_printk(ip, fmt, args...) \
do { \
__trace_printk_check_format(fmt, ##args); \
tracing_record_cmdline(current); \
if (__builtin_constant_p(fmt)) { \
static const char *trace_printk_fmt \
__attribute__((section("__trace_printk_fmt"))) = \
__builtin_constant_p(fmt) ? fmt : NULL; \
\
__trace_bprintk(ip, trace_printk_fmt, ##args); \
} else \
__trace_printk(ip, fmt, ##args); \
} while (0)
#undef TRACE_EVENT_FORMAT
#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
extern struct ftrace_event_call event_##call;
#undef TRACE_EVENT_FORMAT_NOFILTER
#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt)
#include "trace_event_types.h"
#endif /* _LINUX_KERNEL_TRACE_H */

View File

@@ -9,6 +9,7 @@
#include <linux/debugfs.h>
#include <linux/ftrace.h>
#include <linux/kallsyms.h>
#include <linux/time.h>
#include "trace.h"
#include "trace_output.h"
@@ -67,7 +68,7 @@ initcall_call_print_line(struct trace_iterator *iter)
trace_assign_type(field, entry);
call = &field->boot_call;
ts = iter->ts;
nsec_rem = do_div(ts, 1000000000);
nsec_rem = do_div(ts, NSEC_PER_SEC);
ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
(unsigned long)ts, nsec_rem, call->func, call->caller);
@@ -92,7 +93,7 @@ initcall_ret_print_line(struct trace_iterator *iter)
trace_assign_type(field, entry);
init_ret = &field->boot_ret;
ts = iter->ts;
nsec_rem = do_div(ts, 1000000000);
nsec_rem = do_div(ts, NSEC_PER_SEC);
ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
"returned %d after %llu msecs\n",

View File

@@ -30,6 +30,7 @@ static struct trace_array *branch_tracer;
static void
probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
{
struct ftrace_event_call *call = &event_branch;
struct trace_array *tr = branch_tracer;
struct ring_buffer_event *event;
struct trace_branch *entry;
@@ -73,7 +74,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
entry->line = f->line;
entry->correct = val == expect;
ring_buffer_unlock_commit(tr->buffer, event);
if (!filter_check_discard(call, entry, tr->buffer, event))
ring_buffer_unlock_commit(tr->buffer, event);
out:
atomic_dec(&tr->data[cpu]->disabled);
@@ -271,7 +273,7 @@ static int branch_stat_show(struct seq_file *m, void *v)
return 0;
}
static void *annotated_branch_stat_start(void)
static void *annotated_branch_stat_start(struct tracer_stat *trace)
{
return __start_annotated_branch_profile;
}
@@ -346,7 +348,7 @@ static int all_branch_stat_headers(struct seq_file *m)
return 0;
}
static void *all_branch_stat_start(void)
static void *all_branch_stat_start(struct tracer_stat *trace)
{
return __start_branch_profile;
}

View File

@@ -10,22 +10,30 @@
int ftrace_profile_enable(int event_id)
{
struct ftrace_event_call *event;
int ret = -EINVAL;
for_each_event(event) {
if (event->id == event_id)
return event->profile_enable(event);
mutex_lock(&event_mutex);
list_for_each_entry(event, &ftrace_events, list) {
if (event->id == event_id) {
ret = event->profile_enable(event);
break;
}
}
mutex_unlock(&event_mutex);
return -EINVAL;
return ret;
}
void ftrace_profile_disable(int event_id)
{
struct ftrace_event_call *event;
for_each_event(event) {
if (event->id == event_id)
return event->profile_disable(event);
mutex_lock(&event_mutex);
list_for_each_entry(event, &ftrace_events, list) {
if (event->id == event_id) {
event->profile_disable(event);
break;
}
}
mutex_unlock(&event_mutex);
}

View File

@@ -57,7 +57,7 @@ TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore,
TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]")
);
TRACE_EVENT_FORMAT(special, TRACE_SPECIAL, special_entry, ignore,
TRACE_EVENT_FORMAT_NOFILTER(special, TRACE_SPECIAL, special_entry, ignore,
TRACE_STRUCT(
TRACE_FIELD(unsigned long, arg1, arg1)
TRACE_FIELD(unsigned long, arg2, arg2)
@@ -122,8 +122,10 @@ TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore,
TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore,
TRACE_STRUCT(
TRACE_FIELD(unsigned int, line, line)
TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, func)
TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, file)
TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func,
TRACE_FUNC_SIZE+1, func)
TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file,
TRACE_FUNC_SIZE+1, file)
TRACE_FIELD(char, correct, correct)
),
TP_RAW_FMT("%u:%s:%s (%u)")
@@ -139,8 +141,8 @@ TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore,
TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore,
TRACE_STRUCT(
TRACE_FIELD(ktime_t, state_data.stamp, stamp)
TRACE_FIELD(ktime_t, state_data.end, end)
TRACE_FIELD_SIGN(ktime_t, state_data.stamp, stamp, 1)
TRACE_FIELD_SIGN(ktime_t, state_data.end, end, 1)
TRACE_FIELD(int, state_data.type, type)
TRACE_FIELD(int, state_data.state, state)
),

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,39 +0,0 @@
/*
* Stage 1 of the trace events.
*
* Override the macros in <trace/trace_event_types.h> to include the following:
*
* struct ftrace_raw_<call> {
* struct trace_entry ent;
* <type> <item>;
* <type2> <item2>[<len>];
* [...]
* };
*
* The <type> <item> is created by the __field(type, item) macro or
* the __array(type2, item2, len) macro.
* We simply do "type item;", and that will create the fields
* in the structure.
*/
#undef TRACE_FORMAT
#define TRACE_FORMAT(call, proto, args, fmt)
#undef __array
#define __array(type, item, len) type item[len];
#undef __field
#define __field(type, item) type item;
#undef TP_STRUCT__entry
#define TP_STRUCT__entry(args...) args
#undef TRACE_EVENT
#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
struct ftrace_raw_##name { \
struct trace_entry ent; \
tstruct \
}; \
static struct ftrace_event_call event_##name
#include <trace/trace_event_types.h>

View File

@@ -1,176 +0,0 @@
/*
* Stage 2 of the trace events.
*
* Override the macros in <trace/trace_event_types.h> to include the following:
*
* enum print_line_t
* ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
* {
* struct trace_seq *s = &iter->seq;
* struct ftrace_raw_<call> *field; <-- defined in stage 1
* struct trace_entry *entry;
* int ret;
*
* entry = iter->ent;
*
* if (entry->type != event_<call>.id) {
* WARN_ON_ONCE(1);
* return TRACE_TYPE_UNHANDLED;
* }
*
* field = (typeof(field))entry;
*
* ret = trace_seq_printf(s, <TP_printk> "\n");
* if (!ret)
* return TRACE_TYPE_PARTIAL_LINE;
*
* return TRACE_TYPE_HANDLED;
* }
*
* This is the method used to print the raw event to the trace
* output format. Note, this is not needed if the data is read
* in binary.
*/
#undef __entry
#define __entry field
#undef TP_printk
#define TP_printk(fmt, args...) fmt "\n", args
#undef TRACE_EVENT
#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
enum print_line_t \
ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
{ \
struct trace_seq *s = &iter->seq; \
struct ftrace_raw_##call *field; \
struct trace_entry *entry; \
int ret; \
\
entry = iter->ent; \
\
if (entry->type != event_##call.id) { \
WARN_ON_ONCE(1); \
return TRACE_TYPE_UNHANDLED; \
} \
\
field = (typeof(field))entry; \
\
ret = trace_seq_printf(s, #call ": " print); \
if (!ret) \
return TRACE_TYPE_PARTIAL_LINE; \
\
return TRACE_TYPE_HANDLED; \
}
#include <trace/trace_event_types.h>
/*
* Setup the showing format of trace point.
*
* int
* ftrace_format_##call(struct trace_seq *s)
* {
* struct ftrace_raw_##call field;
* int ret;
*
* ret = trace_seq_printf(s, #type " " #item ";"
* " offset:%u; size:%u;\n",
* offsetof(struct ftrace_raw_##call, item),
* sizeof(field.type));
*
* }
*/
#undef TP_STRUCT__entry
#define TP_STRUCT__entry(args...) args
#undef __field
#define __field(type, item) \
ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
"offset:%u;\tsize:%u;\n", \
(unsigned int)offsetof(typeof(field), item), \
(unsigned int)sizeof(field.item)); \
if (!ret) \
return 0;
#undef __array
#define __array(type, item, len) \
ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
"offset:%u;\tsize:%u;\n", \
(unsigned int)offsetof(typeof(field), item), \
(unsigned int)sizeof(field.item)); \
if (!ret) \
return 0;
#undef __entry
#define __entry REC
#undef TP_printk
#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
#undef TP_fast_assign
#define TP_fast_assign(args...) args
#undef TRACE_EVENT
#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
static int \
ftrace_format_##call(struct trace_seq *s) \
{ \
struct ftrace_raw_##call field; \
int ret; \
\
tstruct; \
\
trace_seq_printf(s, "\nprint fmt: " print); \
\
return ret; \
}
#include <trace/trace_event_types.h>
#undef __field
#define __field(type, item) \
ret = trace_define_field(event_call, #type, #item, \
offsetof(typeof(field), item), \
sizeof(field.item)); \
if (ret) \
return ret;
#undef __array
#define __array(type, item, len) \
ret = trace_define_field(event_call, #type "[" #len "]", #item, \
offsetof(typeof(field), item), \
sizeof(field.item)); \
if (ret) \
return ret;
#define __common_field(type, item) \
ret = trace_define_field(event_call, #type, "common_" #item, \
offsetof(typeof(field.ent), item), \
sizeof(field.ent.item)); \
if (ret) \
return ret;
#undef TRACE_EVENT
#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
int \
ftrace_define_fields_##call(void) \
{ \
struct ftrace_raw_##call field; \
struct ftrace_event_call *event_call = &event_##call; \
int ret; \
\
__common_field(unsigned char, type); \
__common_field(unsigned char, flags); \
__common_field(unsigned char, preempt_count); \
__common_field(int, pid); \
__common_field(int, tgid); \
\
tstruct; \
\
return ret; \
}
#include <trace/trace_event_types.h>

View File

@@ -1,281 +0,0 @@
/*
* Stage 3 of the trace events.
*
* Override the macros in <trace/trace_event_types.h> to include the following:
*
* static void ftrace_event_<call>(proto)
* {
* event_trace_printk(_RET_IP_, "<call>: " <fmt>);
* }
*
* static int ftrace_reg_event_<call>(void)
* {
* int ret;
*
* ret = register_trace_<call>(ftrace_event_<call>);
* if (!ret)
* pr_info("event trace: Could not activate trace point "
* "probe to <call>");
* return ret;
* }
*
* static void ftrace_unreg_event_<call>(void)
* {
* unregister_trace_<call>(ftrace_event_<call>);
* }
*
* For those macros defined with TRACE_FORMAT:
*
* static struct ftrace_event_call __used
* __attribute__((__aligned__(4)))
* __attribute__((section("_ftrace_events"))) event_<call> = {
* .name = "<call>",
* .regfunc = ftrace_reg_event_<call>,
* .unregfunc = ftrace_unreg_event_<call>,
* }
*
*
* For those macros defined with TRACE_EVENT:
*
* static struct ftrace_event_call event_<call>;
*
* static void ftrace_raw_event_<call>(proto)
* {
* struct ring_buffer_event *event;
* struct ftrace_raw_<call> *entry; <-- defined in stage 1
* unsigned long irq_flags;
* int pc;
*
* local_save_flags(irq_flags);
* pc = preempt_count();
*
* event = trace_current_buffer_lock_reserve(event_<call>.id,
* sizeof(struct ftrace_raw_<call>),
* irq_flags, pc);
* if (!event)
* return;
* entry = ring_buffer_event_data(event);
*
* <assign>; <-- Here we assign the entries by the __field and
* __array macros.
*
* trace_current_buffer_unlock_commit(event, irq_flags, pc);
* }
*
* static int ftrace_raw_reg_event_<call>(void)
* {
* int ret;
*
* ret = register_trace_<call>(ftrace_raw_event_<call>);
* if (!ret)
* pr_info("event trace: Could not activate trace point "
* "probe to <call>");
* return ret;
* }
*
* static void ftrace_unreg_event_<call>(void)
* {
* unregister_trace_<call>(ftrace_raw_event_<call>);
* }
*
* static struct trace_event ftrace_event_type_<call> = {
* .trace = ftrace_raw_output_<call>, <-- stage 2
* };
*
* static int ftrace_raw_init_event_<call>(void)
* {
* int id;
*
* id = register_ftrace_event(&ftrace_event_type_<call>);
* if (!id)
* return -ENODEV;
* event_<call>.id = id;
* return 0;
* }
*
* static struct ftrace_event_call __used
* __attribute__((__aligned__(4)))
* __attribute__((section("_ftrace_events"))) event_<call> = {
* .name = "<call>",
* .system = "<system>",
* .raw_init = ftrace_raw_init_event_<call>,
* .regfunc = ftrace_reg_event_<call>,
* .unregfunc = ftrace_unreg_event_<call>,
* .show_format = ftrace_format_<call>,
* }
*
*/
#undef TP_FMT
#define TP_FMT(fmt, args...) fmt "\n", ##args
#ifdef CONFIG_EVENT_PROFILE
#define _TRACE_PROFILE(call, proto, args) \
static void ftrace_profile_##call(proto) \
{ \
extern void perf_tpcounter_event(int); \
perf_tpcounter_event(event_##call.id); \
} \
\
static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \
{ \
int ret = 0; \
\
if (!atomic_inc_return(&call->profile_count)) \
ret = register_trace_##call(ftrace_profile_##call); \
\
return ret; \
} \
\
static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
{ \
if (atomic_add_negative(-1, &call->profile_count)) \
unregister_trace_##call(ftrace_profile_##call); \
}
#define _TRACE_PROFILE_INIT(call) \
.profile_count = ATOMIC_INIT(-1), \
.profile_enable = ftrace_profile_enable_##call, \
.profile_disable = ftrace_profile_disable_##call,
#else
#define _TRACE_PROFILE(call, proto, args)
#define _TRACE_PROFILE_INIT(call)
#endif
#define _TRACE_FORMAT(call, proto, args, fmt) \
static void ftrace_event_##call(proto) \
{ \
event_trace_printk(_RET_IP_, #call ": " fmt); \
} \
\
static int ftrace_reg_event_##call(void) \
{ \
int ret; \
\
ret = register_trace_##call(ftrace_event_##call); \
if (ret) \
pr_info("event trace: Could not activate trace point " \
"probe to " #call "\n"); \
return ret; \
} \
\
static void ftrace_unreg_event_##call(void) \
{ \
unregister_trace_##call(ftrace_event_##call); \
} \
\
static struct ftrace_event_call event_##call; \
\
static int ftrace_init_event_##call(void) \
{ \
int id; \
\
id = register_ftrace_event(NULL); \
if (!id) \
return -ENODEV; \
event_##call.id = id; \
return 0; \
}
#undef TRACE_FORMAT
#define TRACE_FORMAT(call, proto, args, fmt) \
_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \
_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \
static struct ftrace_event_call __used \
__attribute__((__aligned__(4))) \
__attribute__((section("_ftrace_events"))) event_##call = { \
.name = #call, \
.system = __stringify(TRACE_SYSTEM), \
.raw_init = ftrace_init_event_##call, \
.regfunc = ftrace_reg_event_##call, \
.unregfunc = ftrace_unreg_event_##call, \
_TRACE_PROFILE_INIT(call) \
}
#undef __entry
#define __entry entry
#undef TRACE_EVENT
#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \
\
static struct ftrace_event_call event_##call; \
\
static void ftrace_raw_event_##call(proto) \
{ \
struct ftrace_event_call *call = &event_##call; \
struct ring_buffer_event *event; \
struct ftrace_raw_##call *entry; \
unsigned long irq_flags; \
int pc; \
\
local_save_flags(irq_flags); \
pc = preempt_count(); \
\
event = trace_current_buffer_lock_reserve(event_##call.id, \
sizeof(struct ftrace_raw_##call), \
irq_flags, pc); \
if (!event) \
return; \
entry = ring_buffer_event_data(event); \
\
assign; \
\
if (call->preds && !filter_match_preds(call, entry)) \
ring_buffer_event_discard(event); \
\
trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
\
} \
\
static int ftrace_raw_reg_event_##call(void) \
{ \
int ret; \
\
ret = register_trace_##call(ftrace_raw_event_##call); \
if (ret) \
pr_info("event trace: Could not activate trace point " \
"probe to " #call "\n"); \
return ret; \
} \
\
static void ftrace_raw_unreg_event_##call(void) \
{ \
unregister_trace_##call(ftrace_raw_event_##call); \
} \
\
static struct trace_event ftrace_event_type_##call = { \
.trace = ftrace_raw_output_##call, \
}; \
\
static int ftrace_raw_init_event_##call(void) \
{ \
int id; \
\
id = register_ftrace_event(&ftrace_event_type_##call); \
if (!id) \
return -ENODEV; \
event_##call.id = id; \
INIT_LIST_HEAD(&event_##call.fields); \
return 0; \
} \
\
static struct ftrace_event_call __used \
__attribute__((__aligned__(4))) \
__attribute__((section("_ftrace_events"))) event_##call = { \
.name = #call, \
.system = __stringify(TRACE_SYSTEM), \
.raw_init = ftrace_raw_init_event_##call, \
.regfunc = ftrace_raw_reg_event_##call, \
.unregfunc = ftrace_raw_unreg_event_##call, \
.show_format = ftrace_format_##call, \
.define_fields = ftrace_define_fields_##call, \
_TRACE_PROFILE_INIT(call) \
}
#include <trace/trace_event_types.h>
#undef _TRACE_PROFILE
#undef _TRACE_PROFILE_INIT

View File

@@ -19,8 +19,12 @@
#undef TRACE_STRUCT
#define TRACE_STRUCT(args...) args
extern void __bad_type_size(void);
#undef TRACE_FIELD
#define TRACE_FIELD(type, item, assign) \
if (sizeof(type) != sizeof(field.item)) \
__bad_type_size(); \
ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
"offset:%u;\tsize:%u;\n", \
(unsigned int)offsetof(typeof(field), item), \
@@ -30,7 +34,7 @@
#undef TRACE_FIELD_SPECIAL
#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \
#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \
ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \
"offset:%u;\tsize:%u;\n", \
(unsigned int)offsetof(typeof(field), item), \
@@ -46,6 +50,9 @@
if (!ret) \
return 0;
#undef TRACE_FIELD_SIGN
#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \
TRACE_FIELD(type, item, assign)
#undef TP_RAW_FMT
#define TP_RAW_FMT(args...) args
@@ -65,6 +72,22 @@ ftrace_format_##call(struct trace_seq *s) \
return ret; \
}
#undef TRACE_EVENT_FORMAT_NOFILTER
#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \
tpfmt) \
static int \
ftrace_format_##call(struct trace_seq *s) \
{ \
struct args field; \
int ret; \
\
tstruct; \
\
trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \
\
return ret; \
}
#include "trace_event_types.h"
#undef TRACE_ZERO_CHAR
@@ -78,6 +101,10 @@ ftrace_format_##call(struct trace_seq *s) \
#define TRACE_FIELD(type, item, assign)\
entry->item = assign;
#undef TRACE_FIELD_SIGN
#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \
TRACE_FIELD(type, item, assign)
#undef TP_CMD
#define TP_CMD(cmd...) cmd
@@ -85,18 +112,95 @@ ftrace_format_##call(struct trace_seq *s) \
#define TRACE_ENTRY entry
#undef TRACE_FIELD_SPECIAL
#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \
#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \
cmd;
#undef TRACE_EVENT_FORMAT
#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
int ftrace_define_fields_##call(void); \
static int ftrace_raw_init_event_##call(void); \
\
static struct ftrace_event_call __used \
struct ftrace_event_call __used \
__attribute__((__aligned__(4))) \
__attribute__((section("_ftrace_events"))) event_##call = { \
.name = #call, \
.id = proto, \
.system = __stringify(TRACE_SYSTEM), \
.raw_init = ftrace_raw_init_event_##call, \
.show_format = ftrace_format_##call, \
.define_fields = ftrace_define_fields_##call, \
}; \
static int ftrace_raw_init_event_##call(void) \
{ \
INIT_LIST_HEAD(&event_##call.fields); \
init_preds(&event_##call); \
return 0; \
} \
#undef TRACE_EVENT_FORMAT_NOFILTER
#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \
tpfmt) \
\
struct ftrace_event_call __used \
__attribute__((__aligned__(4))) \
__attribute__((section("_ftrace_events"))) event_##call = { \
.name = #call, \
.id = proto, \
.system = __stringify(TRACE_SYSTEM), \
.show_format = ftrace_format_##call, \
}
};
#include "trace_event_types.h"
#undef TRACE_FIELD
#define TRACE_FIELD(type, item, assign) \
ret = trace_define_field(event_call, #type, #item, \
offsetof(typeof(field), item), \
sizeof(field.item), is_signed_type(type)); \
if (ret) \
return ret;
#undef TRACE_FIELD_SPECIAL
#define TRACE_FIELD_SPECIAL(type, item, len, cmd) \
ret = trace_define_field(event_call, #type "[" #len "]", #item, \
offsetof(typeof(field), item), \
sizeof(field.item), 0); \
if (ret) \
return ret;
#undef TRACE_FIELD_SIGN
#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \
ret = trace_define_field(event_call, #type, #item, \
offsetof(typeof(field), item), \
sizeof(field.item), is_signed); \
if (ret) \
return ret;
#undef TRACE_FIELD_ZERO_CHAR
#define TRACE_FIELD_ZERO_CHAR(item)
#undef TRACE_EVENT_FORMAT
#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
int \
ftrace_define_fields_##call(void) \
{ \
struct ftrace_event_call *event_call = &event_##call; \
struct args field; \
int ret; \
\
__common_field(unsigned char, type, 0); \
__common_field(unsigned char, flags, 0); \
__common_field(unsigned char, preempt_count, 0); \
__common_field(int, pid, 1); \
__common_field(int, tgid, 1); \
\
tstruct; \
\
return ret; \
}
#undef TRACE_EVENT_FORMAT_NOFILTER
#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \
tpfmt)
#include "trace_event_types.h"

View File

@@ -65,6 +65,12 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
if (!current->ret_stack)
return -EBUSY;
/*
* We must make sure the ret_stack is tested before we read
* anything else.
*/
smp_rmb();
/* The return trace stack is full */
if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
atomic_inc(&current->trace_overrun);
@@ -78,13 +84,14 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
current->ret_stack[index].ret = ret;
current->ret_stack[index].func = func;
current->ret_stack[index].calltime = calltime;
current->ret_stack[index].subtime = 0;
*depth = index;
return 0;
}
/* Retrieve a function return address to the trace stack on thread info.*/
void
static void
ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
{
int index;
@@ -104,9 +111,6 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
trace->calltime = current->ret_stack[index].calltime;
trace->overrun = atomic_read(&current->trace_overrun);
trace->depth = index;
barrier();
current->curr_ret_stack--;
}
/*
@@ -121,6 +125,8 @@ unsigned long ftrace_return_to_handler(void)
ftrace_pop_return_trace(&trace, &ret);
trace.rettime = trace_clock_local();
ftrace_graph_return(&trace);
barrier();
current->curr_ret_stack--;
if (unlikely(!ret)) {
ftrace_graph_stop();
@@ -426,8 +432,8 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
return TRACE_TYPE_HANDLED;
}
static enum print_line_t
print_graph_duration(unsigned long long duration, struct trace_seq *s)
enum print_line_t
trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
{
unsigned long nsecs_rem = do_div(duration, 1000);
/* log10(ULONG_MAX) + '\0' */
@@ -464,12 +470,23 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
}
return TRACE_TYPE_HANDLED;
}
static enum print_line_t
print_graph_duration(unsigned long long duration, struct trace_seq *s)
{
int ret;
ret = trace_print_graph_duration(duration, s);
if (ret != TRACE_TYPE_HANDLED)
return ret;
ret = trace_seq_printf(s, "| ");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
return TRACE_TYPE_HANDLED;
return TRACE_TYPE_HANDLED;
}
/* Case of a leaf function on its call entry */

View File

@@ -1,10 +1,9 @@
/*
* h/w branch tracer for x86 based on bts
* h/w branch tracer for x86 based on BTS
*
* Copyright (C) 2008-2009 Intel Corporation.
* Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
*/
#include <linux/spinlock.h>
#include <linux/kallsyms.h>
#include <linux/debugfs.h>
#include <linux/ftrace.h>
@@ -15,110 +14,119 @@
#include <asm/ds.h>
#include "trace.h"
#include "trace_output.h"
#include "trace.h"
#define SIZEOF_BTS (1 << 13)
#define BTS_BUFFER_SIZE (1 << 13)
/*
* The tracer lock protects the below per-cpu tracer array.
* It needs to be held to:
* - start tracing on all cpus
* - stop tracing on all cpus
* - start tracing on a single hotplug cpu
* - stop tracing on a single hotplug cpu
* - read the trace from all cpus
* - read the trace from a single cpu
*/
static DEFINE_SPINLOCK(bts_tracer_lock);
static DEFINE_PER_CPU(struct bts_tracer *, tracer);
static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer);
static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer);
#define this_tracer per_cpu(tracer, smp_processor_id())
#define this_buffer per_cpu(buffer, smp_processor_id())
static int __read_mostly trace_hw_branches_enabled;
static int trace_hw_branches_enabled __read_mostly;
static int trace_hw_branches_suspended __read_mostly;
static struct trace_array *hw_branch_trace __read_mostly;
/*
* Start tracing on the current cpu.
* The argument is ignored.
*
* pre: bts_tracer_lock must be locked.
*/
static void bts_trace_start_cpu(void *arg)
static void bts_trace_init_cpu(int cpu)
{
if (this_tracer)
ds_release_bts(this_tracer);
per_cpu(tracer, cpu) =
ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE,
NULL, (size_t)-1, BTS_KERNEL);
this_tracer =
ds_request_bts(/* task = */ NULL, this_buffer, SIZEOF_BTS,
/* ovfl = */ NULL, /* th = */ (size_t)-1,
BTS_KERNEL);
if (IS_ERR(this_tracer)) {
this_tracer = NULL;
return;
if (IS_ERR(per_cpu(tracer, cpu)))
per_cpu(tracer, cpu) = NULL;
}
static int bts_trace_init(struct trace_array *tr)
{
int cpu;
hw_branch_trace = tr;
trace_hw_branches_enabled = 0;
get_online_cpus();
for_each_online_cpu(cpu) {
bts_trace_init_cpu(cpu);
if (likely(per_cpu(tracer, cpu)))
trace_hw_branches_enabled = 1;
}
trace_hw_branches_suspended = 0;
put_online_cpus();
/* If we could not enable tracing on a single cpu, we fail. */
return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP;
}
static void bts_trace_reset(struct trace_array *tr)
{
int cpu;
get_online_cpus();
for_each_online_cpu(cpu) {
if (likely(per_cpu(tracer, cpu))) {
ds_release_bts(per_cpu(tracer, cpu));
per_cpu(tracer, cpu) = NULL;
}
}
trace_hw_branches_enabled = 0;
trace_hw_branches_suspended = 0;
put_online_cpus();
}
static void bts_trace_start(struct trace_array *tr)
{
spin_lock(&bts_tracer_lock);
int cpu;
on_each_cpu(bts_trace_start_cpu, NULL, 1);
trace_hw_branches_enabled = 1;
spin_unlock(&bts_tracer_lock);
}
/*
* Stop tracing on the current cpu.
* The argument is ignored.
*
* pre: bts_tracer_lock must be locked.
*/
static void bts_trace_stop_cpu(void *arg)
{
if (this_tracer) {
ds_release_bts(this_tracer);
this_tracer = NULL;
}
get_online_cpus();
for_each_online_cpu(cpu)
if (likely(per_cpu(tracer, cpu)))
ds_resume_bts(per_cpu(tracer, cpu));
trace_hw_branches_suspended = 0;
put_online_cpus();
}
static void bts_trace_stop(struct trace_array *tr)
{
spin_lock(&bts_tracer_lock);
int cpu;
trace_hw_branches_enabled = 0;
on_each_cpu(bts_trace_stop_cpu, NULL, 1);
spin_unlock(&bts_tracer_lock);
get_online_cpus();
for_each_online_cpu(cpu)
if (likely(per_cpu(tracer, cpu)))
ds_suspend_bts(per_cpu(tracer, cpu));
trace_hw_branches_suspended = 1;
put_online_cpus();
}
static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
spin_lock(&bts_tracer_lock);
if (!trace_hw_branches_enabled)
goto out;
int cpu = (long)hcpu;
switch (action) {
case CPU_ONLINE:
case CPU_DOWN_FAILED:
smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
/* The notification is sent with interrupts enabled. */
if (trace_hw_branches_enabled) {
bts_trace_init_cpu(cpu);
if (trace_hw_branches_suspended &&
likely(per_cpu(tracer, cpu)))
ds_suspend_bts(per_cpu(tracer, cpu));
}
break;
case CPU_DOWN_PREPARE:
smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
break;
/* The notification is sent with interrupts enabled. */
if (likely(per_cpu(tracer, cpu))) {
ds_release_bts(per_cpu(tracer, cpu));
per_cpu(tracer, cpu) = NULL;
}
}
out:
spin_unlock(&bts_tracer_lock);
return NOTIFY_DONE;
}
@@ -126,20 +134,6 @@ static struct notifier_block bts_hotcpu_notifier __cpuinitdata = {
.notifier_call = bts_hotcpu_handler
};
static int bts_trace_init(struct trace_array *tr)
{
hw_branch_trace = tr;
bts_trace_start(tr);
return 0;
}
static void bts_trace_reset(struct trace_array *tr)
{
bts_trace_stop(tr);
}
static void bts_trace_print_header(struct seq_file *m)
{
seq_puts(m, "# CPU# TO <- FROM\n");
@@ -147,10 +141,10 @@ static void bts_trace_print_header(struct seq_file *m)
static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
{
unsigned long symflags = TRACE_ITER_SYM_OFFSET;
struct trace_entry *entry = iter->ent;
struct trace_seq *seq = &iter->seq;
struct hw_branch_entry *it;
unsigned long symflags = TRACE_ITER_SYM_OFFSET;
trace_assign_type(it, entry);
@@ -168,6 +162,7 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
void trace_hw_branch(u64 from, u64 to)
{
struct ftrace_event_call *call = &event_hw_branch;
struct trace_array *tr = hw_branch_trace;
struct ring_buffer_event *event;
struct hw_branch_entry *entry;
@@ -194,7 +189,8 @@ void trace_hw_branch(u64 from, u64 to)
entry->ent.type = TRACE_HW_BRANCHES;
entry->from = from;
entry->to = to;
trace_buffer_unlock_commit(tr, event, 0, 0);
if (!filter_check_discard(call, entry, tr->buffer, event))
trace_buffer_unlock_commit(tr, event, 0, 0);
out:
atomic_dec(&tr->data[cpu]->disabled);
@@ -224,11 +220,11 @@ static void trace_bts_at(const struct bts_trace *trace, void *at)
/*
* Collect the trace on the current cpu and write it into the ftrace buffer.
*
* pre: bts_tracer_lock must be locked
* pre: tracing must be suspended on the current cpu
*/
static void trace_bts_cpu(void *arg)
{
struct trace_array *tr = (struct trace_array *) arg;
struct trace_array *tr = (struct trace_array *)arg;
const struct bts_trace *trace;
unsigned char *at;
@@ -241,10 +237,9 @@ static void trace_bts_cpu(void *arg)
if (unlikely(!this_tracer))
return;
ds_suspend_bts(this_tracer);
trace = ds_read_bts(this_tracer);
if (!trace)
goto out;
return;
for (at = trace->ds.top; (void *)at < trace->ds.end;
at += trace->ds.size)
@@ -253,18 +248,27 @@ static void trace_bts_cpu(void *arg)
for (at = trace->ds.begin; (void *)at < trace->ds.top;
at += trace->ds.size)
trace_bts_at(trace, at);
out:
ds_resume_bts(this_tracer);
}
static void trace_bts_prepare(struct trace_iterator *iter)
{
spin_lock(&bts_tracer_lock);
int cpu;
get_online_cpus();
for_each_online_cpu(cpu)
if (likely(per_cpu(tracer, cpu)))
ds_suspend_bts(per_cpu(tracer, cpu));
/*
* We need to collect the trace on the respective cpu since ftrace
* implicitly adds the record for the current cpu.
* Once that is more flexible, we could collect the data from any cpu.
*/
on_each_cpu(trace_bts_cpu, iter->tr, 1);
spin_unlock(&bts_tracer_lock);
for_each_online_cpu(cpu)
if (likely(per_cpu(tracer, cpu)))
ds_resume_bts(per_cpu(tracer, cpu));
put_online_cpus();
}
static void trace_bts_close(struct trace_iterator *iter)
@@ -274,11 +278,11 @@ static void trace_bts_close(struct trace_iterator *iter)
void trace_hw_branch_oops(void)
{
spin_lock(&bts_tracer_lock);
trace_bts_cpu(hw_branch_trace);
spin_unlock(&bts_tracer_lock);
if (this_tracer) {
ds_suspend_bts_noirq(this_tracer);
trace_bts_cpu(hw_branch_trace);
ds_resume_bts_noirq(this_tracer);
}
}
struct tracer bts_tracer __read_mostly =
@@ -291,7 +295,10 @@ struct tracer bts_tracer __read_mostly =
.start = bts_trace_start,
.stop = bts_trace_stop,
.open = trace_bts_prepare,
.close = trace_bts_close
.close = trace_bts_close,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_hw_branches,
#endif /* CONFIG_FTRACE_SELFTEST */
};
__init static int init_bts_trace(void)

View File

@@ -9,6 +9,8 @@
#include <linux/kernel.h>
#include <linux/mmiotrace.h>
#include <linux/pci.h>
#include <linux/time.h>
#include <asm/atomic.h>
#include "trace.h"
@@ -174,7 +176,7 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
struct mmiotrace_rw *rw;
struct trace_seq *s = &iter->seq;
unsigned long long t = ns2usecs(iter->ts);
unsigned long usec_rem = do_div(t, 1000000ULL);
unsigned long usec_rem = do_div(t, USEC_PER_SEC);
unsigned secs = (unsigned long)t;
int ret = 1;
@@ -221,7 +223,7 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter)
struct mmiotrace_map *m;
struct trace_seq *s = &iter->seq;
unsigned long long t = ns2usecs(iter->ts);
unsigned long usec_rem = do_div(t, 1000000ULL);
unsigned long usec_rem = do_div(t, USEC_PER_SEC);
unsigned secs = (unsigned long)t;
int ret;

View File

@@ -14,11 +14,25 @@
/* must be a power of 2 */
#define EVENT_HASHSIZE 128
static DEFINE_MUTEX(trace_event_mutex);
DECLARE_RWSEM(trace_event_mutex);
DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq);
EXPORT_PER_CPU_SYMBOL(ftrace_event_seq);
static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
static int next_event_type = __TRACE_LAST_TYPE + 1;
void trace_print_seq(struct seq_file *m, struct trace_seq *s)
{
int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
s->buffer[len] = 0;
seq_puts(m, s->buffer);
trace_seq_init(s);
}
enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
@@ -84,6 +98,39 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
return len;
}
EXPORT_SYMBOL_GPL(trace_seq_printf);
/**
* trace_seq_vprintf - sequence printing of trace information
* @s: trace sequence descriptor
* @fmt: printf format string
*
* The tracer may use either sequence operations or its own
* copy to user routines. To simplify formating of a trace
* trace_seq_printf is used to store strings into a special
* buffer (@s). Then the output may be either used by
* the sequencer or pulled into another buffer.
*/
int
trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
{
int len = (PAGE_SIZE - 1) - s->len;
int ret;
if (!len)
return 0;
ret = vsnprintf(s->buffer + s->len, len, fmt, args);
/* If we can't write it all, don't bother writing anything */
if (ret >= len)
return 0;
s->len += ret;
return len;
}
EXPORT_SYMBOL_GPL(trace_seq_vprintf);
int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
{
@@ -201,6 +248,67 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
return 0;
}
const char *
ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
unsigned long flags,
const struct trace_print_flags *flag_array)
{
unsigned long mask;
const char *str;
const char *ret = p->buffer + p->len;
int i;
for (i = 0; flag_array[i].name && flags; i++) {
mask = flag_array[i].mask;
if ((flags & mask) != mask)
continue;
str = flag_array[i].name;
flags &= ~mask;
if (p->len && delim)
trace_seq_puts(p, delim);
trace_seq_puts(p, str);
}
/* check for left over flags */
if (flags) {
if (p->len && delim)
trace_seq_puts(p, delim);
trace_seq_printf(p, "0x%lx", flags);
}
trace_seq_putc(p, 0);
return ret;
}
EXPORT_SYMBOL(ftrace_print_flags_seq);
const char *
ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
const struct trace_print_flags *symbol_array)
{
int i;
const char *ret = p->buffer + p->len;
for (i = 0; symbol_array[i].name; i++) {
if (val != symbol_array[i].mask)
continue;
trace_seq_puts(p, symbol_array[i].name);
break;
}
if (!p->len)
trace_seq_printf(p, "0x%lx", val);
trace_seq_putc(p, 0);
return ret;
}
EXPORT_SYMBOL(ftrace_print_symbols_seq);
#ifdef CONFIG_KRETPROBES
static inline const char *kretprobed(const char *name)
{
@@ -311,17 +419,20 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
if (ip == ULONG_MAX || !ret)
break;
if (i && ret)
ret = trace_seq_puts(s, " <- ");
if (ret)
ret = trace_seq_puts(s, " => ");
if (!ip) {
if (ret)
ret = trace_seq_puts(s, "??");
if (ret)
ret = trace_seq_puts(s, "\n");
continue;
}
if (!ret)
break;
if (ret)
ret = seq_print_user_ip(s, mm, ip, sym_flags);
ret = trace_seq_puts(s, "\n");
}
if (mm)
@@ -455,6 +566,7 @@ static int task_state_char(unsigned long state)
* @type: the type of event to look for
*
* Returns an event of type @type otherwise NULL
* Called with trace_event_read_lock() held.
*/
struct trace_event *ftrace_find_event(int type)
{
@@ -464,7 +576,7 @@ struct trace_event *ftrace_find_event(int type)
key = type & (EVENT_HASHSIZE - 1);
hlist_for_each_entry_rcu(event, n, &event_hash[key], node) {
hlist_for_each_entry(event, n, &event_hash[key], node) {
if (event->type == type)
return event;
}
@@ -472,6 +584,46 @@ struct trace_event *ftrace_find_event(int type)
return NULL;
}
static LIST_HEAD(ftrace_event_list);
static int trace_search_list(struct list_head **list)
{
struct trace_event *e;
int last = __TRACE_LAST_TYPE;
if (list_empty(&ftrace_event_list)) {
*list = &ftrace_event_list;
return last + 1;
}
/*
* We used up all possible max events,
* lets see if somebody freed one.
*/
list_for_each_entry(e, &ftrace_event_list, list) {
if (e->type != last + 1)
break;
last++;
}
/* Did we used up all 65 thousand events??? */
if ((last + 1) > FTRACE_MAX_EVENT)
return 0;
*list = &e->list;
return last + 1;
}
void trace_event_read_lock(void)
{
down_read(&trace_event_mutex);
}
void trace_event_read_unlock(void)
{
up_read(&trace_event_mutex);
}
/**
* register_ftrace_event - register output for an event type
* @event: the event type to register
@@ -492,22 +644,42 @@ int register_ftrace_event(struct trace_event *event)
unsigned key;
int ret = 0;
mutex_lock(&trace_event_mutex);
down_write(&trace_event_mutex);
if (!event) {
ret = next_event_type++;
if (WARN_ON(!event))
goto out;
}
if (!event->type)
event->type = next_event_type++;
else if (event->type > __TRACE_LAST_TYPE) {
INIT_LIST_HEAD(&event->list);
if (!event->type) {
struct list_head *list = NULL;
if (next_event_type > FTRACE_MAX_EVENT) {
event->type = trace_search_list(&list);
if (!event->type)
goto out;
} else {
event->type = next_event_type++;
list = &ftrace_event_list;
}
if (WARN_ON(ftrace_find_event(event->type)))
goto out;
list_add_tail(&event->list, list);
} else if (event->type > __TRACE_LAST_TYPE) {
printk(KERN_WARNING "Need to add type to trace.h\n");
WARN_ON(1);
}
if (ftrace_find_event(event->type))
goto out;
} else {
/* Is this event already used */
if (ftrace_find_event(event->type))
goto out;
}
if (event->trace == NULL)
event->trace = trace_nop_print;
@@ -520,14 +692,25 @@ int register_ftrace_event(struct trace_event *event)
key = event->type & (EVENT_HASHSIZE - 1);
hlist_add_head_rcu(&event->node, &event_hash[key]);
hlist_add_head(&event->node, &event_hash[key]);
ret = event->type;
out:
mutex_unlock(&trace_event_mutex);
up_write(&trace_event_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(register_ftrace_event);
/*
* Used by module code with the trace_event_mutex held for write.
*/
int __unregister_ftrace_event(struct trace_event *event)
{
hlist_del(&event->node);
list_del(&event->list);
return 0;
}
/**
* unregister_ftrace_event - remove a no longer used event
@@ -535,12 +718,13 @@ int register_ftrace_event(struct trace_event *event)
*/
int unregister_ftrace_event(struct trace_event *event)
{
mutex_lock(&trace_event_mutex);
hlist_del(&event->node);
mutex_unlock(&trace_event_mutex);
down_write(&trace_event_mutex);
__unregister_ftrace_event(event);
up_write(&trace_event_mutex);
return 0;
}
EXPORT_SYMBOL_GPL(unregister_ftrace_event);
/*
* Standard events
@@ -833,14 +1017,16 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
trace_assign_type(field, iter->ent);
if (!trace_seq_puts(s, "<stack trace>\n"))
goto partial;
for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
if (i) {
if (!trace_seq_puts(s, " <= "))
goto partial;
if (!field->caller[i] || (field->caller[i] == ULONG_MAX))
break;
if (!trace_seq_puts(s, " => "))
goto partial;
if (!seq_print_ip_sym(s, field->caller[i], flags))
goto partial;
}
if (!seq_print_ip_sym(s, field->caller[i], flags))
goto partial;
if (!trace_seq_puts(s, "\n"))
goto partial;
}
@@ -868,10 +1054,10 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
trace_assign_type(field, iter->ent);
if (!seq_print_userip_objs(field, s, flags))
if (!trace_seq_puts(s, "<user stack trace>\n"))
goto partial;
if (!trace_seq_putc(s, '\n'))
if (!seq_print_userip_objs(field, s, flags))
goto partial;
return TRACE_TYPE_HANDLED;

View File

@@ -1,41 +1,17 @@
#ifndef __TRACE_EVENTS_H
#define __TRACE_EVENTS_H
#include <linux/trace_seq.h>
#include "trace.h"
typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
int flags);
struct trace_event {
struct hlist_node node;
int type;
trace_print_func trace;
trace_print_func raw;
trace_print_func hex;
trace_print_func binary;
};
extern enum print_line_t
trace_print_bprintk_msg_only(struct trace_iterator *iter);
extern enum print_line_t
trace_print_printk_msg_only(struct trace_iterator *iter);
extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
__attribute__ ((format (printf, 2, 3)));
extern int
trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
extern int
seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
unsigned long sym_flags);
extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
size_t cnt);
extern int trace_seq_puts(struct trace_seq *s, const char *str);
extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len);
extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
size_t len);
extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
extern int trace_seq_path(struct trace_seq *s, struct path *path);
extern int seq_print_userip_objs(const struct userstack_entry *entry,
struct trace_seq *s, unsigned long sym_flags);
extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
@@ -44,13 +20,17 @@ extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
extern int trace_print_context(struct trace_iterator *iter);
extern int trace_print_lat_context(struct trace_iterator *iter);
extern void trace_event_read_lock(void);
extern void trace_event_read_unlock(void);
extern struct trace_event *ftrace_find_event(int type);
extern int register_ftrace_event(struct trace_event *event);
extern int unregister_ftrace_event(struct trace_event *event);
extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
int flags);
/* used by module unregistering */
extern int __unregister_ftrace_event(struct trace_event *event);
extern struct rw_semaphore trace_event_mutex;
#define MAX_MEMHEX_BYTES 8
#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)

View File

@@ -36,6 +36,7 @@ static void probe_power_start(struct power_trace *it, unsigned int type,
static void probe_power_end(struct power_trace *it)
{
struct ftrace_event_call *call = &event_power;
struct ring_buffer_event *event;
struct trace_power *entry;
struct trace_array_cpu *data;
@@ -54,7 +55,8 @@ static void probe_power_end(struct power_trace *it)
goto out;
entry = ring_buffer_event_data(event);
entry->state_data = *it;
trace_buffer_unlock_commit(tr, event, 0, 0);
if (!filter_check_discard(call, entry, tr->buffer, event))
trace_buffer_unlock_commit(tr, event, 0, 0);
out:
preempt_enable();
}
@@ -62,6 +64,7 @@ static void probe_power_end(struct power_trace *it)
static void probe_power_mark(struct power_trace *it, unsigned int type,
unsigned int level)
{
struct ftrace_event_call *call = &event_power;
struct ring_buffer_event *event;
struct trace_power *entry;
struct trace_array_cpu *data;
@@ -84,7 +87,8 @@ static void probe_power_mark(struct power_trace *it, unsigned int type,
goto out;
entry = ring_buffer_event_data(event);
entry->state_data = *it;
trace_buffer_unlock_commit(tr, event, 0, 0);
if (!filter_check_discard(call, entry, tr->buffer, event))
trace_buffer_unlock_commit(tr, event, 0, 0);
out:
preempt_enable();
}

View File

@@ -245,17 +245,13 @@ static const struct file_operations ftrace_formats_fops = {
static __init int init_trace_printk_function_export(void)
{
struct dentry *d_tracer;
struct dentry *entry;
d_tracer = tracing_init_dentry();
if (!d_tracer)
return 0;
entry = debugfs_create_file("printk_formats", 0444, d_tracer,
trace_create_file("printk_formats", 0444, d_tracer,
NULL, &ftrace_formats_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'printk_formats' entry\n");
return 0;
}

View File

@@ -10,7 +10,7 @@
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <trace/sched.h>
#include <trace/events/sched.h>
#include "trace.h"
@@ -29,13 +29,13 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
int cpu;
int pc;
if (!sched_ref || sched_stopped)
if (unlikely(!sched_ref))
return;
tracing_record_cmdline(prev);
tracing_record_cmdline(next);
if (!tracer_enabled)
if (!tracer_enabled || sched_stopped)
return;
pc = preempt_count();
@@ -56,15 +56,15 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
unsigned long flags;
int cpu, pc;
if (!likely(tracer_enabled))
if (unlikely(!sched_ref))
return;
tracing_record_cmdline(current);
if (!tracer_enabled || sched_stopped)
return;
pc = preempt_count();
tracing_record_cmdline(current);
if (sched_stopped)
return;
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = ctx_trace->data[cpu];

View File

@@ -15,7 +15,7 @@
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <trace/sched.h>
#include <trace/events/sched.h>
#include "trace.h"
@@ -138,9 +138,6 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
pc = preempt_count();
/* The task we are waiting for is waking up */
data = wakeup_trace->data[wakeup_cpu];
/* disable local data, not wakeup_cpu data */
cpu = raw_smp_processor_id();
disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
@@ -154,6 +151,9 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
if (unlikely(!tracer_enabled || next != wakeup_task))
goto out_unlock;
/* The task we are waiting for is waking up */
data = wakeup_trace->data[wakeup_cpu];
trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);

View File

@@ -16,6 +16,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
case TRACE_BRANCH:
case TRACE_GRAPH_ENT:
case TRACE_GRAPH_RET:
case TRACE_HW_BRANCHES:
return 1;
}
return 0;
@@ -188,6 +189,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
#else
# define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; })
#endif /* CONFIG_DYNAMIC_FTRACE */
/*
* Simple verification test of ftrace function tracer.
* Enable ftrace, sleep 1/10 second, and then read the trace
@@ -749,3 +751,59 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
return ret;
}
#endif /* CONFIG_BRANCH_TRACER */
#ifdef CONFIG_HW_BRANCH_TRACER
int
trace_selftest_startup_hw_branches(struct tracer *trace,
struct trace_array *tr)
{
struct trace_iterator *iter;
struct tracer tracer;
unsigned long count;
int ret;
if (!trace->open) {
printk(KERN_CONT "missing open function...");
return -1;
}
ret = tracer_init(trace, tr);
if (ret) {
warn_failed_init_tracer(trace, ret);
return ret;
}
/*
* The hw-branch tracer needs to collect the trace from the various
* cpu trace buffers - before tracing is stopped.
*/
iter = kzalloc(sizeof(*iter), GFP_KERNEL);
if (!iter)
return -ENOMEM;
memcpy(&tracer, trace, sizeof(tracer));
iter->trace = &tracer;
iter->tr = tr;
iter->pos = -1;
mutex_init(&iter->mutex);
trace->open(iter);
mutex_destroy(&iter->mutex);
kfree(iter);
tracing_stop();
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
tracing_start();
if (!ret && !count) {
printk(KERN_CONT "no entries found..");
ret = -1;
}
return ret;
}
#endif /* CONFIG_HW_BRANCH_TRACER */

View File

@@ -265,7 +265,7 @@ static int t_show(struct seq_file *m, void *v)
seq_printf(m, " Depth Size Location"
" (%d entries)\n"
" ----- ---- --------\n",
max_stack_trace.nr_entries);
max_stack_trace.nr_entries - 1);
if (!stack_tracer_enabled && !max_stack_size)
print_disabled(m);
@@ -352,19 +352,14 @@ __setup("stacktrace", enable_stacktrace);
static __init int stack_trace_init(void)
{
struct dentry *d_tracer;
struct dentry *entry;
d_tracer = tracing_init_dentry();
entry = debugfs_create_file("stack_max_size", 0644, d_tracer,
&max_stack_size, &stack_max_size_fops);
if (!entry)
pr_warning("Could not create debugfs 'stack_max_size' entry\n");
trace_create_file("stack_max_size", 0644, d_tracer,
&max_stack_size, &stack_max_size_fops);
entry = debugfs_create_file("stack_trace", 0444, d_tracer,
NULL, &stack_trace_fops);
if (!entry)
pr_warning("Could not create debugfs 'stack_trace' entry\n");
trace_create_file("stack_trace", 0444, d_tracer,
NULL, &stack_trace_fops);
if (stack_tracer_enabled)
register_ftrace_function(&trace_ops);

View File

@@ -1,7 +1,7 @@
/*
* Infrastructure for statistic tracing (histogram output).
*
* Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
* Copyright (C) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
*
* Based on the code from trace_branch.c which is
* Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
@@ -10,22 +10,27 @@
#include <linux/list.h>
#include <linux/rbtree.h>
#include <linux/debugfs.h>
#include "trace_stat.h"
#include "trace.h"
/* List of stat entries from a tracer */
struct trace_stat_list {
struct list_head list;
/*
* List of stat red-black nodes from a tracer
* We use a such tree to sort quickly the stat
* entries from the tracer.
*/
struct stat_node {
struct rb_node node;
void *stat;
};
/* A stat session is the stats output in one file */
struct tracer_stat_session {
struct stat_session {
struct list_head session_list;
struct tracer_stat *ts;
struct list_head stat_list;
struct rb_root stat_root;
struct mutex stat_mutex;
struct dentry *file;
};
@@ -37,18 +42,48 @@ static DEFINE_MUTEX(all_stat_sessions_mutex);
/* The root directory for all stat files */
static struct dentry *stat_dir;
static void reset_stat_session(struct tracer_stat_session *session)
/*
* Iterate through the rbtree using a post order traversal path
* to release the next node.
* It won't necessary release one at each iteration
* but it will at least advance closer to the next one
* to be released.
*/
static struct rb_node *release_next(struct rb_node *node)
{
struct trace_stat_list *node, *next;
struct stat_node *snode;
struct rb_node *parent = rb_parent(node);
list_for_each_entry_safe(node, next, &session->stat_list, list)
kfree(node);
if (node->rb_left)
return node->rb_left;
else if (node->rb_right)
return node->rb_right;
else {
if (!parent)
;
else if (parent->rb_left == node)
parent->rb_left = NULL;
else
parent->rb_right = NULL;
INIT_LIST_HEAD(&session->stat_list);
snode = container_of(node, struct stat_node, node);
kfree(snode);
return parent;
}
}
static void destroy_session(struct tracer_stat_session *session)
static void reset_stat_session(struct stat_session *session)
{
struct rb_node *node = session->stat_root.rb_node;
while (node)
node = release_next(node);
session->stat_root = RB_ROOT;
}
static void destroy_session(struct stat_session *session)
{
debugfs_remove(session->file);
reset_stat_session(session);
@@ -56,25 +91,60 @@ static void destroy_session(struct tracer_stat_session *session)
kfree(session);
}
/*
* For tracers that don't provide a stat_cmp callback.
* This one will force an immediate insertion on tail of
* the list.
*/
static int dummy_cmp(void *p1, void *p2)
typedef int (*cmp_stat_t)(void *, void *);
static int insert_stat(struct rb_root *root, void *stat, cmp_stat_t cmp)
{
return 1;
struct rb_node **new = &(root->rb_node), *parent = NULL;
struct stat_node *data;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
data->stat = stat;
/*
* Figure out where to put new node
* This is a descendent sorting
*/
while (*new) {
struct stat_node *this;
int result;
this = container_of(*new, struct stat_node, node);
result = cmp(data->stat, this->stat);
parent = *new;
if (result >= 0)
new = &((*new)->rb_left);
else
new = &((*new)->rb_right);
}
rb_link_node(&data->node, parent, new);
rb_insert_color(&data->node, root);
return 0;
}
/*
* Initialize the stat list at each trace_stat file opening.
* For tracers that don't provide a stat_cmp callback.
* This one will force an insertion as right-most node
* in the rbtree.
*/
static int dummy_cmp(void *p1, void *p2)
{
return -1;
}
/*
* Initialize the stat rbtree at each trace_stat file opening.
* All of these copies and sorting are required on all opening
* since the stats could have changed between two file sessions.
*/
static int stat_seq_init(struct tracer_stat_session *session)
static int stat_seq_init(struct stat_session *session)
{
struct trace_stat_list *iter_entry, *new_entry;
struct tracer_stat *ts = session->ts;
struct rb_root *root = &session->stat_root;
void *stat;
int ret = 0;
int i;
@@ -85,29 +155,16 @@ static int stat_seq_init(struct tracer_stat_session *session)
if (!ts->stat_cmp)
ts->stat_cmp = dummy_cmp;
stat = ts->stat_start();
stat = ts->stat_start(ts);
if (!stat)
goto exit;
/*
* The first entry. Actually this is the second, but the first
* one (the stat_list head) is pointless.
*/
new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
if (!new_entry) {
ret = -ENOMEM;
ret = insert_stat(root, stat, ts->stat_cmp);
if (ret)
goto exit;
}
INIT_LIST_HEAD(&new_entry->list);
list_add(&new_entry->list, &session->stat_list);
new_entry->stat = stat;
/*
* Iterate over the tracer stat entries and store them in a sorted
* list.
* Iterate over the tracer stat entries and store them in an rbtree.
*/
for (i = 1; ; i++) {
stat = ts->stat_next(stat, i);
@@ -116,36 +173,16 @@ static int stat_seq_init(struct tracer_stat_session *session)
if (!stat)
break;
new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
if (!new_entry) {
ret = -ENOMEM;
goto exit_free_list;
}
INIT_LIST_HEAD(&new_entry->list);
new_entry->stat = stat;
list_for_each_entry_reverse(iter_entry, &session->stat_list,
list) {
/* Insertion with a descendent sorting */
if (ts->stat_cmp(iter_entry->stat,
new_entry->stat) >= 0) {
list_add(&new_entry->list, &iter_entry->list);
break;
}
}
/* The current larger value */
if (list_empty(&new_entry->list))
list_add(&new_entry->list, &session->stat_list);
ret = insert_stat(root, stat, ts->stat_cmp);
if (ret)
goto exit_free_rbtree;
}
exit:
mutex_unlock(&session->stat_mutex);
return ret;
exit_free_list:
exit_free_rbtree:
reset_stat_session(session);
mutex_unlock(&session->stat_mutex);
return ret;
@@ -154,38 +191,51 @@ exit_free_list:
static void *stat_seq_start(struct seq_file *s, loff_t *pos)
{
struct tracer_stat_session *session = s->private;
struct stat_session *session = s->private;
struct rb_node *node;
int i;
/* Prevent from tracer switch or stat_list modification */
/* Prevent from tracer switch or rbtree modification */
mutex_lock(&session->stat_mutex);
/* If we are in the beginning of the file, print the headers */
if (!*pos && session->ts->stat_headers)
if (!*pos && session->ts->stat_headers) {
(*pos)++;
return SEQ_START_TOKEN;
}
return seq_list_start(&session->stat_list, *pos);
node = rb_first(&session->stat_root);
for (i = 0; node && i < *pos; i++)
node = rb_next(node);
(*pos)++;
return node;
}
static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
{
struct tracer_stat_session *session = s->private;
struct stat_session *session = s->private;
struct rb_node *node = p;
(*pos)++;
if (p == SEQ_START_TOKEN)
return seq_list_start(&session->stat_list, *pos);
return rb_first(&session->stat_root);
return seq_list_next(p, &session->stat_list, pos);
return rb_next(node);
}
static void stat_seq_stop(struct seq_file *s, void *p)
{
struct tracer_stat_session *session = s->private;
struct stat_session *session = s->private;
mutex_unlock(&session->stat_mutex);
}
static int stat_seq_show(struct seq_file *s, void *v)
{
struct tracer_stat_session *session = s->private;
struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list);
struct stat_session *session = s->private;
struct stat_node *l = container_of(v, struct stat_node, node);
if (v == SEQ_START_TOKEN)
return session->ts->stat_headers(s);
@@ -205,7 +255,7 @@ static int tracing_stat_open(struct inode *inode, struct file *file)
{
int ret;
struct tracer_stat_session *session = inode->i_private;
struct stat_session *session = inode->i_private;
ret = seq_open(file, &trace_stat_seq_ops);
if (!ret) {
@@ -218,11 +268,11 @@ static int tracing_stat_open(struct inode *inode, struct file *file)
}
/*
* Avoid consuming memory with our now useless list.
* Avoid consuming memory with our now useless rbtree.
*/
static int tracing_stat_release(struct inode *i, struct file *f)
{
struct tracer_stat_session *session = i->i_private;
struct stat_session *session = i->i_private;
mutex_lock(&session->stat_mutex);
reset_stat_session(session);
@@ -251,7 +301,7 @@ static int tracing_stat_init(void)
return 0;
}
static int init_stat_file(struct tracer_stat_session *session)
static int init_stat_file(struct stat_session *session)
{
if (!stat_dir && tracing_stat_init())
return -ENODEV;
@@ -266,7 +316,7 @@ static int init_stat_file(struct tracer_stat_session *session)
int register_stat_tracer(struct tracer_stat *trace)
{
struct tracer_stat_session *session, *node, *tmp;
struct stat_session *session, *node;
int ret;
if (!trace)
@@ -277,7 +327,7 @@ int register_stat_tracer(struct tracer_stat *trace)
/* Already registered? */
mutex_lock(&all_stat_sessions_mutex);
list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
list_for_each_entry(node, &all_stat_sessions, session_list) {
if (node->ts == trace) {
mutex_unlock(&all_stat_sessions_mutex);
return -EINVAL;
@@ -286,15 +336,13 @@ int register_stat_tracer(struct tracer_stat *trace)
mutex_unlock(&all_stat_sessions_mutex);
/* Init the session */
session = kmalloc(sizeof(struct tracer_stat_session), GFP_KERNEL);
session = kzalloc(sizeof(*session), GFP_KERNEL);
if (!session)
return -ENOMEM;
session->ts = trace;
INIT_LIST_HEAD(&session->session_list);
INIT_LIST_HEAD(&session->stat_list);
mutex_init(&session->stat_mutex);
session->file = NULL;
ret = init_stat_file(session);
if (ret) {
@@ -312,7 +360,7 @@ int register_stat_tracer(struct tracer_stat *trace)
void unregister_stat_tracer(struct tracer_stat *trace)
{
struct tracer_stat_session *node, *tmp;
struct stat_session *node, *tmp;
mutex_lock(&all_stat_sessions_mutex);
list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {

View File

@@ -12,7 +12,7 @@ struct tracer_stat {
/* The name of your stat file */
const char *name;
/* Iteration over statistic entries */
void *(*stat_start)(void);
void *(*stat_start)(struct tracer_stat *trace);
void *(*stat_next)(void *prev, int idx);
/* Compare two entries for stats sorting */
int (*stat_cmp)(void *p1, void *p2);

View File

@@ -321,11 +321,7 @@ static const struct file_operations sysprof_sample_fops = {
void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
{
struct dentry *entry;
entry = debugfs_create_file("sysprof_sample_period", 0644,
trace_create_file("sysprof_sample_period", 0644,
d_tracer, NULL, &sysprof_sample_fops);
if (entry)
return;
pr_warning("Could not create debugfs 'sysprof_sample_period' entry\n");
}

View File

@@ -6,7 +6,7 @@
*/
#include <trace/workqueue.h>
#include <trace/events/workqueue.h>
#include <linux/list.h>
#include <linux/percpu.h>
#include "trace_stat.h"
@@ -16,8 +16,6 @@
/* A cpu workqueue thread */
struct cpu_workqueue_stats {
struct list_head list;
/* Useful to know if we print the cpu headers */
bool first_entry;
int cpu;
pid_t pid;
/* Can be inserted from interrupt or user context, need to be atomic */
@@ -47,12 +45,11 @@ probe_workqueue_insertion(struct task_struct *wq_thread,
struct work_struct *work)
{
int cpu = cpumask_first(&wq_thread->cpus_allowed);
struct cpu_workqueue_stats *node, *next;
struct cpu_workqueue_stats *node;
unsigned long flags;
spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
list) {
list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
if (node->pid == wq_thread->pid) {
atomic_inc(&node->inserted);
goto found;
@@ -69,12 +66,11 @@ probe_workqueue_execution(struct task_struct *wq_thread,
struct work_struct *work)
{
int cpu = cpumask_first(&wq_thread->cpus_allowed);
struct cpu_workqueue_stats *node, *next;
struct cpu_workqueue_stats *node;
unsigned long flags;
spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
list) {
list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
if (node->pid == wq_thread->pid) {
node->executed++;
goto found;
@@ -105,8 +101,6 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
cws->pid = wq_thread->pid;
spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
if (list_empty(&workqueue_cpu_stat(cpu)->list))
cws->first_entry = true;
list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
}
@@ -152,7 +146,7 @@ static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
return ret;
}
static void *workqueue_stat_start(void)
static void *workqueue_stat_start(struct tracer_stat *trace)
{
int cpu;
void *ret = NULL;
@@ -191,16 +185,9 @@ static void *workqueue_stat_next(void *prev, int idx)
static int workqueue_stat_show(struct seq_file *s, void *p)
{
struct cpu_workqueue_stats *cws = p;
unsigned long flags;
int cpu = cws->cpu;
struct pid *pid;
struct task_struct *tsk;
spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
seq_printf(s, "\n");
spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
pid = find_get_pid(cws->pid);
if (pid) {
tsk = get_pid_task(pid, PIDTYPE_PID);

View File

@@ -154,7 +154,7 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
if (!list_empty(&wait->task_list))
list_del_init(&wait->task_list);
else if (waitqueue_active(q))
__wake_up_common(q, mode, 1, 0, key);
__wake_up_locked_key(q, mode, key);
spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL(abort_exclusive_wait);

View File

@@ -33,7 +33,8 @@
#include <linux/kallsyms.h>
#include <linux/debug_locks.h>
#include <linux/lockdep.h>
#include <trace/workqueue.h>
#define CREATE_TRACE_POINTS
#include <trace/events/workqueue.h>
/*
* The per-CPU workqueue (if single thread, we always use the first
@@ -124,8 +125,6 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
}
DEFINE_TRACE(workqueue_insertion);
static void insert_work(struct cpu_workqueue_struct *cwq,
struct work_struct *work, struct list_head *head)
{
@@ -262,8 +261,6 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
}
EXPORT_SYMBOL_GPL(queue_delayed_work_on);
DEFINE_TRACE(workqueue_execution);
static void run_workqueue(struct cpu_workqueue_struct *cwq)
{
spin_lock_irq(&cwq->lock);
@@ -753,8 +750,6 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
return cwq;
}
DEFINE_TRACE(workqueue_creation);
static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
{
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
@@ -860,8 +855,6 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
}
EXPORT_SYMBOL_GPL(__create_workqueue_key);
DEFINE_TRACE(workqueue_destruction);
static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
{
/*