Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (470 commits)
  x86: Fix comments of register/stack access functions
  perf tools: Replace %m with %a in sscanf
  hw-breakpoints: Keep track of user disabled breakpoints
  tracing/syscalls: Make syscall events print callbacks static
  tracing: Add DEFINE_EVENT(), DEFINE_SINGLE_EVENT() support to docbook
  perf: Don't free perf_mmap_data until work has been done
  perf_event: Fix compile error
  perf tools: Fix _GNU_SOURCE macro related strndup() build error
  trace_syscalls: Remove unused syscall_name_to_nr()
  trace_syscalls: Simplify syscall profile
  trace_syscalls: Remove duplicate init_enter_##sname()
  trace_syscalls: Add syscall_nr field to struct syscall_metadata
  trace_syscalls: Remove enter_id exit_id
  trace_syscalls: Set event_enter_##sname->data to its metadata
  trace_syscalls: Remove unused event_syscall_enter and event_syscall_exit
  perf_event: Initialize data.period in perf_swevent_hrtimer()
  perf probe: Simplify event naming
  perf probe: Add --list option for listing current probe events
  perf probe: Add argv_split() from lib/argv_split.c
  perf probe: Move probe event utility functions to probe-event.c
  ...
This commit is contained in:
Linus Torvalds
2009-12-05 15:30:21 -08:00
251 changed files with 21200 additions and 6210 deletions

View File

@@ -339,6 +339,27 @@ config POWER_TRACER
power management decisions, specifically the C-state and P-state
behavior.
config KSYM_TRACER
bool "Trace read and write access on kernel memory locations"
depends on HAVE_HW_BREAKPOINT
select TRACING
help
This tracer helps find read and write operations on any given kernel
symbol i.e. /proc/kallsyms.
config PROFILE_KSYM_TRACER
bool "Profile all kernel memory accesses on 'watched' variables"
depends on KSYM_TRACER
help
This tracer profiles kernel accesses on variables watched through the
ksym tracer ftrace plugin. Depending upon the hardware, all read
and write operations on kernel variables can be monitored for
accesses.
The results will be displayed in:
/debugfs/tracing/profile_ksym
Say N if unsure.
config STACK_TRACER
bool "Trace max stack"
@@ -428,6 +449,23 @@ config BLK_DEV_IO_TRACE
If unsure, say N.
config KPROBE_EVENT
depends on KPROBES
depends on X86
bool "Enable kprobes-based dynamic events"
select TRACING
default y
help
This allows the user to add tracing events (similar to tracepoints) on the fly
via the ftrace interface. See Documentation/trace/kprobetrace.txt
for more details.
Those events can be inserted wherever kprobes can probe, and record
various register and memory values.
This option is also required by perf-probe subcommand of perf tools. If
you want to use perf tools, this option is strongly recommended.
config DYNAMIC_FTRACE
bool "enable/disable ftrace tracepoints dynamically"
depends on FUNCTION_TRACER

View File

@@ -53,6 +53,8 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o
obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
obj-$(CONFIG_EVENT_TRACING) += power-traces.o
libftrace-y := ftrace.o

View File

@@ -397,18 +397,21 @@ int ring_buffer_print_page_header(struct trace_seq *s)
int ret;
ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
"offset:0;\tsize:%u;\n",
(unsigned int)sizeof(field.time_stamp));
"offset:0;\tsize:%u;\tsigned:%u;\n",
(unsigned int)sizeof(field.time_stamp),
(unsigned int)is_signed_type(u64));
ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
"offset:%u;\tsize:%u;\n",
"offset:%u;\tsize:%u;\tsigned:%u;\n",
(unsigned int)offsetof(typeof(field), commit),
(unsigned int)sizeof(field.commit));
(unsigned int)sizeof(field.commit),
(unsigned int)is_signed_type(long));
ret = trace_seq_printf(s, "\tfield: char data;\t"
"offset:%u;\tsize:%u;\n",
"offset:%u;\tsize:%u;\tsigned:%u;\n",
(unsigned int)offsetof(typeof(field), data),
(unsigned int)BUF_PAGE_SIZE);
(unsigned int)BUF_PAGE_SIZE,
(unsigned int)is_signed_type(char));
return ret;
}

View File

@@ -11,6 +11,7 @@
#include <linux/ftrace.h>
#include <trace/boot.h>
#include <linux/kmemtrace.h>
#include <linux/hw_breakpoint.h>
#include <linux/trace_seq.h>
#include <linux/ftrace_event.h>
@@ -37,6 +38,7 @@ enum trace_type {
TRACE_KMEM_ALLOC,
TRACE_KMEM_FREE,
TRACE_BLK,
TRACE_KSYM,
__TRACE_LAST_TYPE,
};
@@ -98,9 +100,32 @@ struct syscall_trace_enter {
struct syscall_trace_exit {
struct trace_entry ent;
int nr;
unsigned long ret;
long ret;
};
struct kprobe_trace_entry {
struct trace_entry ent;
unsigned long ip;
int nargs;
unsigned long args[];
};
#define SIZEOF_KPROBE_TRACE_ENTRY(n) \
(offsetof(struct kprobe_trace_entry, args) + \
(sizeof(unsigned long) * (n)))
struct kretprobe_trace_entry {
struct trace_entry ent;
unsigned long func;
unsigned long ret_ip;
int nargs;
unsigned long args[];
};
#define SIZEOF_KRETPROBE_TRACE_ENTRY(n) \
(offsetof(struct kretprobe_trace_entry, args) + \
(sizeof(unsigned long) * (n)))
/*
* trace_flag_type is an enumeration that holds different
* states when a trace occurs. These are:
@@ -209,6 +234,7 @@ extern void __ftrace_bad_type(void);
TRACE_KMEM_ALLOC); \
IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
TRACE_KMEM_FREE); \
IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
__ftrace_bad_type(); \
} while (0)
@@ -364,6 +390,8 @@ int register_tracer(struct tracer *type);
void unregister_tracer(struct tracer *type);
int is_tracing_stopped(void);
extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
extern unsigned long nsecs_to_usecs(unsigned long nsecs);
#ifdef CONFIG_TRACER_MAX_TRACE
@@ -438,6 +466,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
struct trace_array *tr);
extern int trace_selftest_startup_hw_branches(struct tracer *trace,
struct trace_array *tr);
extern int trace_selftest_startup_ksym(struct tracer *trace,
struct trace_array *tr);
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
@@ -683,7 +713,6 @@ struct event_filter {
int n_preds;
struct filter_pred **preds;
char *filter_string;
bool no_reset;
};
struct event_subsystem {
@@ -703,7 +732,7 @@ typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
typedef int (*regex_match_func)(char *str, struct regex *r, int len);
enum regex_type {
MATCH_FULL,
MATCH_FULL = 0,
MATCH_FRONT_ONLY,
MATCH_MIDDLE_ONLY,
MATCH_END_ONLY,
@@ -744,7 +773,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
struct ring_buffer *buffer,
struct ring_buffer_event *event)
{
if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) {
if (unlikely(call->filter_active) &&
!filter_match_preds(call->filter, rec)) {
ring_buffer_discard_commit(buffer, event);
return 1;
}

View File

@@ -364,3 +364,19 @@ FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
F_printk("type:%u call_site:%lx ptr:%p",
__entry->type_id, __entry->call_site, __entry->ptr)
);
FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
TRACE_KSYM,
F_STRUCT(
__field( unsigned long, ip )
__field( unsigned char, type )
__array( char , cmd, TASK_COMM_LEN )
__field( unsigned long, addr )
),
F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
(void *)__entry->ip, (unsigned int)__entry->type,
(void *)__entry->addr, __entry->cmd)
);

View File

@@ -8,17 +8,14 @@
#include <linux/module.h>
#include "trace.h"
/*
* We can't use a size but a type in alloc_percpu()
* So let's create a dummy type that matches the desired size
*/
typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
char *trace_profile_buf;
EXPORT_SYMBOL_GPL(trace_profile_buf);
char *perf_trace_buf;
EXPORT_SYMBOL_GPL(perf_trace_buf);
char *trace_profile_buf_nmi;
EXPORT_SYMBOL_GPL(trace_profile_buf_nmi);
char *perf_trace_buf_nmi;
EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
/* Count the events in use (per event id, not per instance) */
static int total_profile_count;
@@ -32,20 +29,20 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
return 0;
if (!total_profile_count) {
buf = (char *)alloc_percpu(profile_buf_t);
buf = (char *)alloc_percpu(perf_trace_t);
if (!buf)
goto fail_buf;
rcu_assign_pointer(trace_profile_buf, buf);
rcu_assign_pointer(perf_trace_buf, buf);
buf = (char *)alloc_percpu(profile_buf_t);
buf = (char *)alloc_percpu(perf_trace_t);
if (!buf)
goto fail_buf_nmi;
rcu_assign_pointer(trace_profile_buf_nmi, buf);
rcu_assign_pointer(perf_trace_buf_nmi, buf);
}
ret = event->profile_enable();
ret = event->profile_enable(event);
if (!ret) {
total_profile_count++;
return 0;
@@ -53,10 +50,10 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
fail_buf_nmi:
if (!total_profile_count) {
free_percpu(trace_profile_buf_nmi);
free_percpu(trace_profile_buf);
trace_profile_buf_nmi = NULL;
trace_profile_buf = NULL;
free_percpu(perf_trace_buf_nmi);
free_percpu(perf_trace_buf);
perf_trace_buf_nmi = NULL;
perf_trace_buf = NULL;
}
fail_buf:
atomic_dec(&event->profile_count);
@@ -89,14 +86,14 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
if (!atomic_add_negative(-1, &event->profile_count))
return;
event->profile_disable();
event->profile_disable(event);
if (!--total_profile_count) {
buf = trace_profile_buf;
rcu_assign_pointer(trace_profile_buf, NULL);
buf = perf_trace_buf;
rcu_assign_pointer(perf_trace_buf, NULL);
nmi_buf = trace_profile_buf_nmi;
rcu_assign_pointer(trace_profile_buf_nmi, NULL);
nmi_buf = perf_trace_buf_nmi;
rcu_assign_pointer(perf_trace_buf_nmi, NULL);
/*
* Ensure every events in profiling have finished before

View File

@@ -93,9 +93,7 @@ int trace_define_common_fields(struct ftrace_event_call *call)
}
EXPORT_SYMBOL_GPL(trace_define_common_fields);
#ifdef CONFIG_MODULES
static void trace_destroy_fields(struct ftrace_event_call *call)
void trace_destroy_fields(struct ftrace_event_call *call)
{
struct ftrace_event_field *field, *next;
@@ -107,8 +105,6 @@ static void trace_destroy_fields(struct ftrace_event_call *call)
}
}
#endif /* CONFIG_MODULES */
static void ftrace_event_enable_disable(struct ftrace_event_call *call,
int enable)
{
@@ -117,14 +113,14 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
if (call->enabled) {
call->enabled = 0;
tracing_stop_cmdline_record();
call->unregfunc(call->data);
call->unregfunc(call);
}
break;
case 1:
if (!call->enabled) {
call->enabled = 1;
tracing_start_cmdline_record();
call->regfunc(call->data);
call->regfunc(call);
}
break;
}
@@ -507,7 +503,7 @@ extern char *__bad_type_size(void);
#define FIELD(type, name) \
sizeof(type) != sizeof(field.name) ? __bad_type_size() : \
#type, "common_" #name, offsetof(typeof(field), name), \
sizeof(field.name)
sizeof(field.name), is_signed_type(type)
static int trace_write_header(struct trace_seq *s)
{
@@ -515,17 +511,17 @@ static int trace_write_header(struct trace_seq *s)
/* struct trace_entry */
return trace_seq_printf(s,
"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
"\n",
FIELD(unsigned short, type),
FIELD(unsigned char, flags),
FIELD(unsigned char, preempt_count),
FIELD(int, pid),
FIELD(int, lock_depth));
"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
"\n",
FIELD(unsigned short, type),
FIELD(unsigned char, flags),
FIELD(unsigned char, preempt_count),
FIELD(int, pid),
FIELD(int, lock_depth));
}
static ssize_t
@@ -937,27 +933,46 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
return 0;
}
#define for_each_event(event, start, end) \
for (event = start; \
(unsigned long)event < (unsigned long)end; \
event++)
static int __trace_add_event_call(struct ftrace_event_call *call)
{
struct dentry *d_events;
int ret;
#ifdef CONFIG_MODULES
if (!call->name)
return -EINVAL;
static LIST_HEAD(ftrace_module_file_list);
if (call->raw_init) {
ret = call->raw_init(call);
if (ret < 0) {
if (ret != -ENOSYS)
pr_warning("Could not initialize trace "
"events/%s\n", call->name);
return ret;
}
}
/*
* Modules must own their file_operations to keep up with
* reference counting.
*/
struct ftrace_module_file_ops {
struct list_head list;
struct module *mod;
struct file_operations id;
struct file_operations enable;
struct file_operations format;
struct file_operations filter;
};
d_events = event_trace_events_dir();
if (!d_events)
return -ENOENT;
ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
&ftrace_enable_fops, &ftrace_event_filter_fops,
&ftrace_event_format_fops);
if (!ret)
list_add(&call->list, &ftrace_events);
return ret;
}
/* Add an additional event_call dynamically */
int trace_add_event_call(struct ftrace_event_call *call)
{
int ret;
mutex_lock(&event_mutex);
ret = __trace_add_event_call(call);
mutex_unlock(&event_mutex);
return ret;
}
static void remove_subsystem_dir(const char *name)
{
@@ -985,6 +1000,53 @@ static void remove_subsystem_dir(const char *name)
}
}
/*
* Must be called under locking both of event_mutex and trace_event_mutex.
*/
static void __trace_remove_event_call(struct ftrace_event_call *call)
{
ftrace_event_enable_disable(call, 0);
if (call->event)
__unregister_ftrace_event(call->event);
debugfs_remove_recursive(call->dir);
list_del(&call->list);
trace_destroy_fields(call);
destroy_preds(call);
remove_subsystem_dir(call->system);
}
/* Remove an event_call */
void trace_remove_event_call(struct ftrace_event_call *call)
{
mutex_lock(&event_mutex);
down_write(&trace_event_mutex);
__trace_remove_event_call(call);
up_write(&trace_event_mutex);
mutex_unlock(&event_mutex);
}
#define for_each_event(event, start, end) \
for (event = start; \
(unsigned long)event < (unsigned long)end; \
event++)
#ifdef CONFIG_MODULES
static LIST_HEAD(ftrace_module_file_list);
/*
* Modules must own their file_operations to keep up with
* reference counting.
*/
struct ftrace_module_file_ops {
struct list_head list;
struct module *mod;
struct file_operations id;
struct file_operations enable;
struct file_operations format;
struct file_operations filter;
};
static struct ftrace_module_file_ops *
trace_create_file_ops(struct module *mod)
{
@@ -1042,7 +1104,7 @@ static void trace_module_add_events(struct module *mod)
if (!call->name)
continue;
if (call->raw_init) {
ret = call->raw_init();
ret = call->raw_init(call);
if (ret < 0) {
if (ret != -ENOSYS)
pr_warning("Could not initialize trace "
@@ -1060,10 +1122,11 @@ static void trace_module_add_events(struct module *mod)
return;
}
call->mod = mod;
list_add(&call->list, &ftrace_events);
event_create_dir(call, d_events,
&file_ops->id, &file_ops->enable,
&file_ops->filter, &file_ops->format);
ret = event_create_dir(call, d_events,
&file_ops->id, &file_ops->enable,
&file_ops->filter, &file_ops->format);
if (!ret)
list_add(&call->list, &ftrace_events);
}
}
@@ -1077,14 +1140,7 @@ static void trace_module_remove_events(struct module *mod)
list_for_each_entry_safe(call, p, &ftrace_events, list) {
if (call->mod == mod) {
found = true;
ftrace_event_enable_disable(call, 0);
if (call->event)
__unregister_ftrace_event(call->event);
debugfs_remove_recursive(call->dir);
list_del(&call->list);
trace_destroy_fields(call);
destroy_preds(call);
remove_subsystem_dir(call->system);
__trace_remove_event_call(call);
}
}
@@ -1202,7 +1258,7 @@ static __init int event_trace_init(void)
if (!call->name)
continue;
if (call->raw_init) {
ret = call->raw_init();
ret = call->raw_init(call);
if (ret < 0) {
if (ret != -ENOSYS)
pr_warning("Could not initialize trace "
@@ -1210,10 +1266,12 @@ static __init int event_trace_init(void)
continue;
}
}
list_add(&call->list, &ftrace_events);
event_create_dir(call, d_events, &ftrace_event_id_fops,
&ftrace_enable_fops, &ftrace_event_filter_fops,
&ftrace_event_format_fops);
ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
&ftrace_enable_fops,
&ftrace_event_filter_fops,
&ftrace_event_format_fops);
if (!ret)
list_add(&call->list, &ftrace_events);
}
while (true) {

View File

@@ -21,6 +21,7 @@
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/mutex.h>
#include <linux/perf_event.h>
#include "trace.h"
#include "trace_output.h"
@@ -29,6 +30,7 @@ enum filter_op_ids
{
OP_OR,
OP_AND,
OP_GLOB,
OP_NE,
OP_EQ,
OP_LT,
@@ -46,16 +48,17 @@ struct filter_op {
};
static struct filter_op filter_ops[] = {
{ OP_OR, "||", 1 },
{ OP_AND, "&&", 2 },
{ OP_NE, "!=", 4 },
{ OP_EQ, "==", 4 },
{ OP_LT, "<", 5 },
{ OP_LE, "<=", 5 },
{ OP_GT, ">", 5 },
{ OP_GE, ">=", 5 },
{ OP_NONE, "OP_NONE", 0 },
{ OP_OPEN_PAREN, "(", 0 },
{ OP_OR, "||", 1 },
{ OP_AND, "&&", 2 },
{ OP_GLOB, "~", 4 },
{ OP_NE, "!=", 4 },
{ OP_EQ, "==", 4 },
{ OP_LT, "<", 5 },
{ OP_LE, "<=", 5 },
{ OP_GT, ">", 5 },
{ OP_GE, ">=", 5 },
{ OP_NONE, "OP_NONE", 0 },
{ OP_OPEN_PAREN, "(", 0 },
};
enum {
@@ -329,22 +332,18 @@ enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not)
return type;
}
static int filter_build_regex(struct filter_pred *pred)
static void filter_build_regex(struct filter_pred *pred)
{
struct regex *r = &pred->regex;
char *search, *dup;
enum regex_type type;
int not;
char *search;
enum regex_type type = MATCH_FULL;
int not = 0;
type = filter_parse_regex(r->pattern, r->len, &search, &not);
dup = kstrdup(search, GFP_KERNEL);
if (!dup)
return -ENOMEM;
strcpy(r->pattern, dup);
kfree(dup);
r->len = strlen(r->pattern);
if (pred->op == OP_GLOB) {
type = filter_parse_regex(r->pattern, r->len, &search, &not);
r->len = strlen(search);
memmove(r->pattern, search, r->len+1);
}
switch (type) {
case MATCH_FULL:
@@ -362,14 +361,11 @@ static int filter_build_regex(struct filter_pred *pred)
}
pred->not ^= not;
return 0;
}
/* return 1 if event matches, 0 otherwise (discard) */
int filter_match_preds(struct ftrace_event_call *call, void *rec)
int filter_match_preds(struct event_filter *filter, void *rec)
{
struct event_filter *filter = call->filter;
int match, top = 0, val1 = 0, val2 = 0;
int stack[MAX_FILTER_PRED];
struct filter_pred *pred;
@@ -542,9 +538,8 @@ static void filter_disable_preds(struct ftrace_event_call *call)
filter->preds[i]->fn = filter_pred_none;
}
void destroy_preds(struct ftrace_event_call *call)
static void __free_preds(struct event_filter *filter)
{
struct event_filter *filter = call->filter;
int i;
if (!filter)
@@ -557,21 +552,24 @@ void destroy_preds(struct ftrace_event_call *call)
kfree(filter->preds);
kfree(filter->filter_string);
kfree(filter);
call->filter = NULL;
}
static int init_preds(struct ftrace_event_call *call)
void destroy_preds(struct ftrace_event_call *call)
{
__free_preds(call->filter);
call->filter = NULL;
call->filter_active = 0;
}
static struct event_filter *__alloc_preds(void)
{
struct event_filter *filter;
struct filter_pred *pred;
int i;
if (call->filter)
return 0;
filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
if (!call->filter)
return -ENOMEM;
filter = kzalloc(sizeof(*filter), GFP_KERNEL);
if (!filter)
return ERR_PTR(-ENOMEM);
filter->n_preds = 0;
@@ -587,12 +585,24 @@ static int init_preds(struct ftrace_event_call *call)
filter->preds[i] = pred;
}
return 0;
return filter;
oom:
destroy_preds(call);
__free_preds(filter);
return ERR_PTR(-ENOMEM);
}
return -ENOMEM;
static int init_preds(struct ftrace_event_call *call)
{
if (call->filter)
return 0;
call->filter_active = 0;
call->filter = __alloc_preds();
if (IS_ERR(call->filter))
return PTR_ERR(call->filter);
return 0;
}
static int init_subsystem_preds(struct event_subsystem *system)
@@ -615,14 +625,7 @@ static int init_subsystem_preds(struct event_subsystem *system)
return 0;
}
enum {
FILTER_DISABLE_ALL,
FILTER_INIT_NO_RESET,
FILTER_SKIP_NO_RESET,
};
static void filter_free_subsystem_preds(struct event_subsystem *system,
int flag)
static void filter_free_subsystem_preds(struct event_subsystem *system)
{
struct ftrace_event_call *call;
@@ -633,14 +636,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
if (strcmp(call->system, system->name) != 0)
continue;
if (flag == FILTER_INIT_NO_RESET) {
call->filter->no_reset = false;
continue;
}
if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset)
continue;
filter_disable_preds(call);
remove_filter_string(call->filter);
}
@@ -648,10 +643,10 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
static int filter_add_pred_fn(struct filter_parse_state *ps,
struct ftrace_event_call *call,
struct event_filter *filter,
struct filter_pred *pred,
filter_pred_fn_t fn)
{
struct event_filter *filter = call->filter;
int idx, err;
if (filter->n_preds == MAX_FILTER_PRED) {
@@ -666,7 +661,6 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
return err;
filter->n_preds++;
call->filter_active = 1;
return 0;
}
@@ -691,7 +685,10 @@ static bool is_string_field(struct ftrace_event_field *field)
static int is_legal_op(struct ftrace_event_field *field, int op)
{
if (is_string_field(field) && (op != OP_EQ && op != OP_NE))
if (is_string_field(field) &&
(op != OP_EQ && op != OP_NE && op != OP_GLOB))
return 0;
if (!is_string_field(field) && op == OP_GLOB)
return 0;
return 1;
@@ -742,6 +739,7 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
static int filter_add_pred(struct filter_parse_state *ps,
struct ftrace_event_call *call,
struct event_filter *filter,
struct filter_pred *pred,
bool dry_run)
{
@@ -776,15 +774,13 @@ static int filter_add_pred(struct filter_parse_state *ps,
}
if (is_string_field(field)) {
ret = filter_build_regex(pred);
if (ret)
return ret;
filter_build_regex(pred);
if (field->filter_type == FILTER_STATIC_STRING) {
fn = filter_pred_string;
pred->regex.field_len = field->size;
} else if (field->filter_type == FILTER_DYN_STRING)
fn = filter_pred_strloc;
fn = filter_pred_strloc;
else {
fn = filter_pred_pchar;
pred->regex.field_len = strlen(pred->regex.pattern);
@@ -813,45 +809,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
add_pred_fn:
if (!dry_run)
return filter_add_pred_fn(ps, call, pred, fn);
return 0;
}
static int filter_add_subsystem_pred(struct filter_parse_state *ps,
struct event_subsystem *system,
struct filter_pred *pred,
char *filter_string,
bool dry_run)
{
struct ftrace_event_call *call;
int err = 0;
bool fail = true;
list_for_each_entry(call, &ftrace_events, list) {
if (!call->define_fields)
continue;
if (strcmp(call->system, system->name))
continue;
if (call->filter->no_reset)
continue;
err = filter_add_pred(ps, call, pred, dry_run);
if (err)
call->filter->no_reset = true;
else
fail = false;
if (!dry_run)
replace_filter_string(call->filter, filter_string);
}
if (fail) {
parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
return err;
}
return filter_add_pred_fn(ps, call, filter, pred, fn);
return 0;
}
@@ -1209,8 +1167,8 @@ static int check_preds(struct filter_parse_state *ps)
return 0;
}
static int replace_preds(struct event_subsystem *system,
struct ftrace_event_call *call,
static int replace_preds(struct ftrace_event_call *call,
struct event_filter *filter,
struct filter_parse_state *ps,
char *filter_string,
bool dry_run)
@@ -1257,11 +1215,7 @@ static int replace_preds(struct event_subsystem *system,
add_pred:
if (!pred)
return -ENOMEM;
if (call)
err = filter_add_pred(ps, call, pred, false);
else
err = filter_add_subsystem_pred(ps, system, pred,
filter_string, dry_run);
err = filter_add_pred(ps, call, filter, pred, dry_run);
filter_free_pred(pred);
if (err)
return err;
@@ -1272,10 +1226,50 @@ add_pred:
return 0;
}
static int replace_system_preds(struct event_subsystem *system,
struct filter_parse_state *ps,
char *filter_string)
{
struct ftrace_event_call *call;
bool fail = true;
int err;
list_for_each_entry(call, &ftrace_events, list) {
struct event_filter *filter = call->filter;
if (!call->define_fields)
continue;
if (strcmp(call->system, system->name) != 0)
continue;
/* try to see if the filter can be applied */
err = replace_preds(call, filter, ps, filter_string, true);
if (err)
continue;
/* really apply the filter */
filter_disable_preds(call);
err = replace_preds(call, filter, ps, filter_string, false);
if (err)
filter_disable_preds(call);
else {
call->filter_active = 1;
replace_filter_string(filter, filter_string);
}
fail = false;
}
if (fail) {
parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
return -EINVAL;
}
return 0;
}
int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
{
int err;
struct filter_parse_state *ps;
mutex_lock(&event_mutex);
@@ -1287,8 +1281,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
if (!strcmp(strstrip(filter_string), "0")) {
filter_disable_preds(call);
remove_filter_string(call->filter);
mutex_unlock(&event_mutex);
return 0;
goto out_unlock;
}
err = -ENOMEM;
@@ -1306,10 +1299,11 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
goto out;
}
err = replace_preds(NULL, call, ps, filter_string, false);
err = replace_preds(call, call->filter, ps, filter_string, false);
if (err)
append_filter_err(ps, call->filter);
else
call->filter_active = 1;
out:
filter_opstack_clear(ps);
postfix_clear(ps);
@@ -1324,7 +1318,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
char *filter_string)
{
int err;
struct filter_parse_state *ps;
mutex_lock(&event_mutex);
@@ -1334,10 +1327,9 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
goto out_unlock;
if (!strcmp(strstrip(filter_string), "0")) {
filter_free_subsystem_preds(system, FILTER_DISABLE_ALL);
filter_free_subsystem_preds(system);
remove_filter_string(system->filter);
mutex_unlock(&event_mutex);
return 0;
goto out_unlock;
}
err = -ENOMEM;
@@ -1354,23 +1346,9 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
goto out;
}
filter_free_subsystem_preds(system, FILTER_INIT_NO_RESET);
/* try to see the filter can be applied to which events */
err = replace_preds(system, NULL, ps, filter_string, true);
if (err) {
err = replace_system_preds(system, ps, filter_string);
if (err)
append_filter_err(ps, system->filter);
goto out;
}
filter_free_subsystem_preds(system, FILTER_SKIP_NO_RESET);
/* really apply the filter to the events */
err = replace_preds(system, NULL, ps, filter_string, false);
if (err) {
append_filter_err(ps, system->filter);
filter_free_subsystem_preds(system, 2);
}
out:
filter_opstack_clear(ps);
@@ -1382,3 +1360,73 @@ out_unlock:
return err;
}
#ifdef CONFIG_EVENT_PROFILE
void ftrace_profile_free_filter(struct perf_event *event)
{
struct event_filter *filter = event->filter;
event->filter = NULL;
__free_preds(filter);
}
int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str)
{
int err;
struct event_filter *filter;
struct filter_parse_state *ps;
struct ftrace_event_call *call = NULL;
mutex_lock(&event_mutex);
list_for_each_entry(call, &ftrace_events, list) {
if (call->id == event_id)
break;
}
err = -EINVAL;
if (!call)
goto out_unlock;
err = -EEXIST;
if (event->filter)
goto out_unlock;
filter = __alloc_preds();
if (IS_ERR(filter)) {
err = PTR_ERR(filter);
goto out_unlock;
}
err = -ENOMEM;
ps = kzalloc(sizeof(*ps), GFP_KERNEL);
if (!ps)
goto free_preds;
parse_init(ps, filter_ops, filter_str);
err = filter_parse(ps);
if (err)
goto free_ps;
err = replace_preds(call, filter, ps, filter_str, false);
if (!err)
event->filter = filter;
free_ps:
filter_opstack_clear(ps);
postfix_clear(ps);
kfree(ps);
free_preds:
if (err)
__free_preds(filter);
out_unlock:
mutex_unlock(&event_mutex);
return err;
}
#endif /* CONFIG_EVENT_PROFILE */

View File

@@ -66,44 +66,47 @@ static void __always_unused ____ftrace_check_##name(void) \
#undef __field
#define __field(type, item) \
ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
"offset:%zu;\tsize:%zu;\n", \
"offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
offsetof(typeof(field), item), \
sizeof(field.item)); \
sizeof(field.item), is_signed_type(type)); \
if (!ret) \
return 0;
#undef __field_desc
#define __field_desc(type, container, item) \
ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
"offset:%zu;\tsize:%zu;\n", \
"offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
offsetof(typeof(field), container.item), \
sizeof(field.container.item)); \
sizeof(field.container.item), \
is_signed_type(type)); \
if (!ret) \
return 0;
#undef __array
#define __array(type, item, len) \
ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
"offset:%zu;\tsize:%zu;\n", \
offsetof(typeof(field), item), \
sizeof(field.item)); \
"offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
offsetof(typeof(field), item), \
sizeof(field.item), is_signed_type(type)); \
if (!ret) \
return 0;
#undef __array_desc
#define __array_desc(type, container, item, len) \
ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
"offset:%zu;\tsize:%zu;\n", \
"offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
offsetof(typeof(field), container.item), \
sizeof(field.container.item)); \
sizeof(field.container.item), \
is_signed_type(type)); \
if (!ret) \
return 0;
#undef __dynamic_array
#define __dynamic_array(type, item) \
ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
"offset:%zu;\tsize:0;\n", \
offsetof(typeof(field), item)); \
"offset:%zu;\tsize:0;\tsigned:%u;\n", \
offsetof(typeof(field), item), \
is_signed_type(type)); \
if (!ret) \
return 0;
@@ -131,7 +134,6 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
#include "trace_entries.h"
#undef __field
#define __field(type, item) \
ret = trace_define_field(event_call, #type, #item, \
@@ -193,6 +195,11 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
#include "trace_entries.h"
static int ftrace_raw_init_event(struct ftrace_event_call *call)
{
INIT_LIST_HEAD(&call->fields);
return 0;
}
#undef __field
#define __field(type, item)
@@ -211,7 +218,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \
static int ftrace_raw_init_event_##call(void); \
\
struct ftrace_event_call __used \
__attribute__((__aligned__(4))) \
@@ -219,14 +225,9 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
.name = #call, \
.id = type, \
.system = __stringify(TRACE_SYSTEM), \
.raw_init = ftrace_raw_init_event_##call, \
.raw_init = ftrace_raw_init_event, \
.show_format = ftrace_format_##call, \
.define_fields = ftrace_define_fields_##call, \
}; \
static int ftrace_raw_init_event_##call(void) \
{ \
INIT_LIST_HEAD(&event_##call.fields); \
return 0; \
} \
#include "trace_entries.h"

1523
kernel/trace/trace_kprobe.c Normal file

File diff suppressed because it is too large Load Diff

550
kernel/trace/trace_ksym.c Normal file
View File

@@ -0,0 +1,550 @@
/*
* trace_ksym.c - Kernel Symbol Tracer
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2009
*/
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/debugfs.h>
#include <linux/ftrace.h>
#include <linux/module.h>
#include <linux/fs.h>
#include "trace_output.h"
#include "trace_stat.h"
#include "trace.h"
#include <linux/hw_breakpoint.h>
#include <asm/hw_breakpoint.h>
/*
* For now, let us restrict the no. of symbols traced simultaneously to number
* of available hardware breakpoint registers.
*/
#define KSYM_TRACER_MAX HBP_NUM
#define KSYM_TRACER_OP_LEN 3 /* rw- */
struct trace_ksym {
struct perf_event **ksym_hbp;
struct perf_event_attr attr;
#ifdef CONFIG_PROFILE_KSYM_TRACER
unsigned long counter;
#endif
struct hlist_node ksym_hlist;
};
static struct trace_array *ksym_trace_array;
static unsigned int ksym_filter_entry_count;
static unsigned int ksym_tracing_enabled;
static HLIST_HEAD(ksym_filter_head);
static DEFINE_MUTEX(ksym_tracer_mutex);
#ifdef CONFIG_PROFILE_KSYM_TRACER
#define MAX_UL_INT 0xffffffff
void ksym_collect_stats(unsigned long hbp_hit_addr)
{
struct hlist_node *node;
struct trace_ksym *entry;
rcu_read_lock();
hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
if ((entry->attr.bp_addr == hbp_hit_addr) &&
(entry->counter <= MAX_UL_INT)) {
entry->counter++;
break;
}
}
rcu_read_unlock();
}
#endif /* CONFIG_PROFILE_KSYM_TRACER */
void ksym_hbp_handler(struct perf_event *hbp, void *data)
{
struct ring_buffer_event *event;
struct ksym_trace_entry *entry;
struct pt_regs *regs = data;
struct ring_buffer *buffer;
int pc;
if (!ksym_tracing_enabled)
return;
buffer = ksym_trace_array->buffer;
pc = preempt_count();
event = trace_buffer_lock_reserve(buffer, TRACE_KSYM,
sizeof(*entry), 0, pc);
if (!event)
return;
entry = ring_buffer_event_data(event);
entry->ip = instruction_pointer(regs);
entry->type = hw_breakpoint_type(hbp);
entry->addr = hw_breakpoint_addr(hbp);
strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
#ifdef CONFIG_PROFILE_KSYM_TRACER
ksym_collect_stats(hw_breakpoint_addr(hbp));
#endif /* CONFIG_PROFILE_KSYM_TRACER */
trace_buffer_unlock_commit(buffer, event, 0, pc);
}
/* Valid access types are represented as
*
* rw- : Set Read/Write Access Breakpoint
* -w- : Set Write Access Breakpoint
* --- : Clear Breakpoints
* --x : Set Execution Break points (Not available yet)
*
*/
static int ksym_trace_get_access_type(char *str)
{
int access = 0;
if (str[0] == 'r')
access |= HW_BREAKPOINT_R;
if (str[1] == 'w')
access |= HW_BREAKPOINT_W;
if (str[2] == 'x')
access |= HW_BREAKPOINT_X;
switch (access) {
case HW_BREAKPOINT_R:
case HW_BREAKPOINT_W:
case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
return access;
default:
return -EINVAL;
}
}
/*
* There can be several possible malformed requests and we attempt to capture
* all of them. We enumerate some of the rules
* 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
* i.e. multiple ':' symbols disallowed. Possible uses are of the form
* <module>:<ksym_name>:<op>.
* 2. No delimiter symbol ':' in the input string
* 3. Spurious operator symbols or symbols not in their respective positions
* 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
* 5. Kernel symbol not a part of /proc/kallsyms
* 6. Duplicate requests
*/
static int parse_ksym_trace_str(char *input_string, char **ksymname,
unsigned long *addr)
{
int ret;
*ksymname = strsep(&input_string, ":");
*addr = kallsyms_lookup_name(*ksymname);
/* Check for malformed request: (2), (1) and (5) */
if ((!input_string) ||
(strlen(input_string) != KSYM_TRACER_OP_LEN) ||
(*addr == 0))
return -EINVAL;;
ret = ksym_trace_get_access_type(input_string);
return ret;
}
int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
{
struct trace_ksym *entry;
int ret = -ENOMEM;
if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
" new requests for tracing can be accepted now.\n",
KSYM_TRACER_MAX);
return -ENOSPC;
}
entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
if (!entry)
return -ENOMEM;
hw_breakpoint_init(&entry->attr);
entry->attr.bp_type = op;
entry->attr.bp_addr = addr;
entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
ret = -EAGAIN;
entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
ksym_hbp_handler);
if (IS_ERR(entry->ksym_hbp)) {
ret = PTR_ERR(entry->ksym_hbp);
printk(KERN_INFO "ksym_tracer request failed. Try again"
" later!!\n");
goto err;
}
hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
ksym_filter_entry_count++;
return 0;
err:
kfree(entry);
return ret;
}
static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
size_t count, loff_t *ppos)
{
struct trace_ksym *entry;
struct hlist_node *node;
struct trace_seq *s;
ssize_t cnt = 0;
int ret;
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s)
return -ENOMEM;
trace_seq_init(s);
mutex_lock(&ksym_tracer_mutex);
hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
ret = trace_seq_printf(s, "%pS:", (void *)entry->attr.bp_addr);
if (entry->attr.bp_type == HW_BREAKPOINT_R)
ret = trace_seq_puts(s, "r--\n");
else if (entry->attr.bp_type == HW_BREAKPOINT_W)
ret = trace_seq_puts(s, "-w-\n");
else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
ret = trace_seq_puts(s, "rw-\n");
WARN_ON_ONCE(!ret);
}
cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
mutex_unlock(&ksym_tracer_mutex);
kfree(s);
return cnt;
}
static void __ksym_trace_reset(void)
{
struct trace_ksym *entry;
struct hlist_node *node, *node1;
mutex_lock(&ksym_tracer_mutex);
hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
ksym_hlist) {
unregister_wide_hw_breakpoint(entry->ksym_hbp);
ksym_filter_entry_count--;
hlist_del_rcu(&(entry->ksym_hlist));
synchronize_rcu();
kfree(entry);
}
mutex_unlock(&ksym_tracer_mutex);
}
static ssize_t ksym_trace_filter_write(struct file *file,
const char __user *buffer,
size_t count, loff_t *ppos)
{
struct trace_ksym *entry;
struct hlist_node *node;
char *input_string, *ksymname = NULL;
unsigned long ksym_addr = 0;
int ret, op, changed = 0;
input_string = kzalloc(count + 1, GFP_KERNEL);
if (!input_string)
return -ENOMEM;
if (copy_from_user(input_string, buffer, count)) {
kfree(input_string);
return -EFAULT;
}
input_string[count] = '\0';
strstrip(input_string);
/*
* Clear all breakpoints if:
* 1: echo > ksym_trace_filter
* 2: echo 0 > ksym_trace_filter
* 3: echo "*:---" > ksym_trace_filter
*/
if (!input_string[0] || !strcmp(input_string, "0") ||
!strcmp(input_string, "*:---")) {
__ksym_trace_reset();
kfree(input_string);
return count;
}
ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
if (ret < 0) {
kfree(input_string);
return ret;
}
mutex_lock(&ksym_tracer_mutex);
ret = -EINVAL;
hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
if (entry->attr.bp_addr == ksym_addr) {
/* Check for malformed request: (6) */
if (entry->attr.bp_type != op)
changed = 1;
else
goto out;
break;
}
}
if (changed) {
unregister_wide_hw_breakpoint(entry->ksym_hbp);
entry->attr.bp_type = op;
ret = 0;
if (op > 0) {
entry->ksym_hbp =
register_wide_hw_breakpoint(&entry->attr,
ksym_hbp_handler);
if (IS_ERR(entry->ksym_hbp))
ret = PTR_ERR(entry->ksym_hbp);
else
goto out;
}
/* Error or "symbol:---" case: drop it */
ksym_filter_entry_count--;
hlist_del_rcu(&(entry->ksym_hlist));
synchronize_rcu();
kfree(entry);
goto out;
} else {
/* Check for malformed request: (4) */
if (op == 0)
goto out;
ret = process_new_ksym_entry(ksymname, op, ksym_addr);
}
out:
mutex_unlock(&ksym_tracer_mutex);
kfree(input_string);
if (!ret)
ret = count;
return ret;
}
static const struct file_operations ksym_tracing_fops = {
.open = tracing_open_generic,
.read = ksym_trace_filter_read,
.write = ksym_trace_filter_write,
};
static void ksym_trace_reset(struct trace_array *tr)
{
ksym_tracing_enabled = 0;
__ksym_trace_reset();
}
static int ksym_trace_init(struct trace_array *tr)
{
int cpu, ret = 0;
for_each_online_cpu(cpu)
tracing_reset(tr, cpu);
ksym_tracing_enabled = 1;
ksym_trace_array = tr;
return ret;
}
static void ksym_trace_print_header(struct seq_file *m)
{
seq_puts(m,
"# TASK-PID CPU# Symbol "
"Type Function\n");
seq_puts(m,
"# | | | "
" | |\n");
}
static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
{
struct trace_entry *entry = iter->ent;
struct trace_seq *s = &iter->seq;
struct ksym_trace_entry *field;
char str[KSYM_SYMBOL_LEN];
int ret;
if (entry->type != TRACE_KSYM)
return TRACE_TYPE_UNHANDLED;
trace_assign_type(field, entry);
ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
entry->pid, iter->cpu, (char *)field->addr);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
switch (field->type) {
case HW_BREAKPOINT_R:
ret = trace_seq_printf(s, " R ");
break;
case HW_BREAKPOINT_W:
ret = trace_seq_printf(s, " W ");
break;
case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
ret = trace_seq_printf(s, " RW ");
break;
default:
return TRACE_TYPE_PARTIAL_LINE;
}
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
sprint_symbol(str, field->ip);
ret = trace_seq_printf(s, "%s\n", str);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
return TRACE_TYPE_HANDLED;
}
struct tracer ksym_tracer __read_mostly =
{
.name = "ksym_tracer",
.init = ksym_trace_init,
.reset = ksym_trace_reset,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_ksym,
#endif
.print_header = ksym_trace_print_header,
.print_line = ksym_trace_output
};
__init static int init_ksym_trace(void)
{
struct dentry *d_tracer;
struct dentry *entry;
d_tracer = tracing_init_dentry();
ksym_filter_entry_count = 0;
entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
NULL, &ksym_tracing_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'ksym_trace_filter' file\n");
return register_tracer(&ksym_tracer);
}
device_initcall(init_ksym_trace);
#ifdef CONFIG_PROFILE_KSYM_TRACER
static int ksym_tracer_stat_headers(struct seq_file *m)
{
seq_puts(m, " Access Type ");
seq_puts(m, " Symbol Counter\n");
seq_puts(m, " ----------- ");
seq_puts(m, " ------ -------\n");
return 0;
}
static int ksym_tracer_stat_show(struct seq_file *m, void *v)
{
struct hlist_node *stat = v;
struct trace_ksym *entry;
int access_type = 0;
char fn_name[KSYM_NAME_LEN];
entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
access_type = entry->attr.bp_type;
switch (access_type) {
case HW_BREAKPOINT_R:
seq_puts(m, " R ");
break;
case HW_BREAKPOINT_W:
seq_puts(m, " W ");
break;
case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
seq_puts(m, " RW ");
break;
default:
seq_puts(m, " NA ");
}
if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
seq_printf(m, " %-36s", fn_name);
else
seq_printf(m, " %-36s", "<NA>");
seq_printf(m, " %15lu\n", entry->counter);
return 0;
}
static void *ksym_tracer_stat_start(struct tracer_stat *trace)
{
return ksym_filter_head.first;
}
static void *
ksym_tracer_stat_next(void *v, int idx)
{
struct hlist_node *stat = v;
return stat->next;
}
static struct tracer_stat ksym_tracer_stats = {
.name = "ksym_tracer",
.stat_start = ksym_tracer_stat_start,
.stat_next = ksym_tracer_stat_next,
.stat_headers = ksym_tracer_stat_headers,
.stat_show = ksym_tracer_stat_show
};
__init static int ksym_tracer_stat_init(void)
{
int ret;
ret = register_stat_tracer(&ksym_tracer_stats);
if (ret) {
printk(KERN_WARNING "Warning: could not register "
"ksym tracer stats\n");
return 1;
}
return 0;
}
fs_initcall(ksym_tracer_stat_init);
#endif /* CONFIG_PROFILE_KSYM_TRACER */

View File

@@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
case TRACE_GRAPH_ENT:
case TRACE_GRAPH_RET:
case TRACE_HW_BRANCHES:
case TRACE_KSYM:
return 1;
}
return 0;
@@ -808,3 +809,57 @@ trace_selftest_startup_hw_branches(struct tracer *trace,
return ret;
}
#endif /* CONFIG_HW_BRANCH_TRACER */
#ifdef CONFIG_KSYM_TRACER
static int ksym_selftest_dummy;
int
trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
{
unsigned long count;
int ret;
/* start the tracing */
ret = tracer_init(trace, tr);
if (ret) {
warn_failed_init_tracer(trace, ret);
return ret;
}
ksym_selftest_dummy = 0;
/* Register the read-write tracing request */
ret = process_new_ksym_entry("ksym_selftest_dummy",
HW_BREAKPOINT_R | HW_BREAKPOINT_W,
(unsigned long)(&ksym_selftest_dummy));
if (ret < 0) {
printk(KERN_CONT "ksym_trace read-write startup test failed\n");
goto ret_path;
}
/* Perform a read and a write operation over the dummy variable to
* trigger the tracer
*/
if (ksym_selftest_dummy == 0)
ksym_selftest_dummy++;
/* stop the tracing. */
tracing_stop();
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
tracing_start();
/* read & write operations - one each is performed on the dummy variable
* triggering two entries in the trace buffer
*/
if (!ret && count != 2) {
printk(KERN_CONT "Ksym tracer startup test failed");
ret = -1;
}
ret_path:
return ret;
}
#endif /* CONFIG_KSYM_TRACER */

View File

@@ -51,32 +51,6 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr)
return syscalls_metadata[nr];
}
int syscall_name_to_nr(char *name)
{
int i;
if (!syscalls_metadata)
return -1;
for (i = 0; i < NR_syscalls; i++) {
if (syscalls_metadata[i]) {
if (!strcmp(syscalls_metadata[i]->name, name))
return i;
}
}
return -1;
}
void set_syscall_enter_id(int num, int id)
{
syscalls_metadata[num]->enter_id = id;
}
void set_syscall_exit_id(int num, int id)
{
syscalls_metadata[num]->exit_id = id;
}
enum print_line_t
print_syscall_enter(struct trace_iterator *iter, int flags)
{
@@ -93,7 +67,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags)
if (!entry)
goto end;
if (entry->enter_id != ent->type) {
if (entry->enter_event->id != ent->type) {
WARN_ON_ONCE(1);
goto end;
}
@@ -148,7 +122,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
return TRACE_TYPE_HANDLED;
}
if (entry->exit_id != ent->type) {
if (entry->exit_event->id != ent->type) {
WARN_ON_ONCE(1);
return TRACE_TYPE_UNHANDLED;
}
@@ -166,24 +140,19 @@ extern char *__bad_type_size(void);
#define SYSCALL_FIELD(type, name) \
sizeof(type) != sizeof(trace.name) ? \
__bad_type_size() : \
#type, #name, offsetof(typeof(trace), name), sizeof(trace.name)
#type, #name, offsetof(typeof(trace), name), \
sizeof(trace.name), is_signed_type(type)
int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
{
int i;
int nr;
int ret;
struct syscall_metadata *entry;
struct syscall_metadata *entry = call->data;
struct syscall_trace_enter trace;
int offset = offsetof(struct syscall_trace_enter, args);
nr = syscall_name_to_nr(call->data);
entry = syscall_nr_to_meta(nr);
if (!entry)
return 0;
ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
"\tsigned:%u;\n",
SYSCALL_FIELD(int, nr));
if (!ret)
return 0;
@@ -193,8 +162,10 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
entry->args[i]);
if (!ret)
return 0;
ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset,
sizeof(unsigned long));
ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
"\tsigned:%u;\n", offset,
sizeof(unsigned long),
is_signed_type(unsigned long));
if (!ret)
return 0;
offset += sizeof(unsigned long);
@@ -226,8 +197,10 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
struct syscall_trace_exit trace;
ret = trace_seq_printf(s,
"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
"\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
"\tsigned:%u;\n"
"\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
"\tsigned:%u;\n",
SYSCALL_FIELD(int, nr),
SYSCALL_FIELD(long, ret));
if (!ret)
@@ -239,22 +212,19 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
int syscall_enter_define_fields(struct ftrace_event_call *call)
{
struct syscall_trace_enter trace;
struct syscall_metadata *meta;
struct syscall_metadata *meta = call->data;
int ret;
int nr;
int i;
int offset = offsetof(typeof(trace), args);
nr = syscall_name_to_nr(call->data);
meta = syscall_nr_to_meta(nr);
if (!meta)
return 0;
ret = trace_define_common_fields(call);
if (ret)
return ret;
ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
if (ret)
return ret;
for (i = 0; i < meta->nb_args; i++) {
ret = trace_define_field(call, meta->types[i],
meta->args[i], offset,
@@ -275,7 +245,11 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
if (ret)
return ret;
ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 0,
ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
if (ret)
return ret;
ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
FILTER_OTHER);
return ret;
@@ -302,8 +276,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id,
size, 0, 0);
event = trace_current_buffer_lock_reserve(&buffer,
sys_data->enter_event->id, size, 0, 0);
if (!event)
return;
@@ -334,8 +308,8 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
if (!sys_data)
return;
event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id,
sizeof(*entry), 0, 0);
event = trace_current_buffer_lock_reserve(&buffer,
sys_data->exit_event->id, sizeof(*entry), 0, 0);
if (!event)
return;
@@ -348,14 +322,12 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
trace_current_buffer_unlock_commit(buffer, event, 0, 0);
}
int reg_event_syscall_enter(void *ptr)
int reg_event_syscall_enter(struct ftrace_event_call *call)
{
int ret = 0;
int num;
char *name;
name = (char *)ptr;
num = syscall_name_to_nr(name);
num = ((struct syscall_metadata *)call->data)->syscall_nr;
if (num < 0 || num >= NR_syscalls)
return -ENOSYS;
mutex_lock(&syscall_trace_lock);
@@ -372,13 +344,11 @@ int reg_event_syscall_enter(void *ptr)
return ret;
}
void unreg_event_syscall_enter(void *ptr)
void unreg_event_syscall_enter(struct ftrace_event_call *call)
{
int num;
char *name;
name = (char *)ptr;
num = syscall_name_to_nr(name);
num = ((struct syscall_metadata *)call->data)->syscall_nr;
if (num < 0 || num >= NR_syscalls)
return;
mutex_lock(&syscall_trace_lock);
@@ -389,14 +359,12 @@ void unreg_event_syscall_enter(void *ptr)
mutex_unlock(&syscall_trace_lock);
}
int reg_event_syscall_exit(void *ptr)
int reg_event_syscall_exit(struct ftrace_event_call *call)
{
int ret = 0;
int num;
char *name;
name = (char *)ptr;
num = syscall_name_to_nr(name);
num = ((struct syscall_metadata *)call->data)->syscall_nr;
if (num < 0 || num >= NR_syscalls)
return -ENOSYS;
mutex_lock(&syscall_trace_lock);
@@ -413,13 +381,11 @@ int reg_event_syscall_exit(void *ptr)
return ret;
}
void unreg_event_syscall_exit(void *ptr)
void unreg_event_syscall_exit(struct ftrace_event_call *call)
{
int num;
char *name;
name = (char *)ptr;
num = syscall_name_to_nr(name);
num = ((struct syscall_metadata *)call->data)->syscall_nr;
if (num < 0 || num >= NR_syscalls)
return;
mutex_lock(&syscall_trace_lock);
@@ -430,13 +396,17 @@ void unreg_event_syscall_exit(void *ptr)
mutex_unlock(&syscall_trace_lock);
}
struct trace_event event_syscall_enter = {
.trace = print_syscall_enter,
};
int init_syscall_trace(struct ftrace_event_call *call)
{
int id;
struct trace_event event_syscall_exit = {
.trace = print_syscall_exit,
};
id = register_ftrace_event(call->event);
if (!id)
return -ENODEV;
call->id = id;
INIT_LIST_HEAD(&call->fields);
return 0;
}
int __init init_ftrace_syscalls(void)
{
@@ -454,6 +424,10 @@ int __init init_ftrace_syscalls(void)
for (i = 0; i < NR_syscalls; i++) {
addr = arch_syscall_addr(i);
meta = find_syscall_meta(addr);
if (!meta)
continue;
meta->syscall_nr = i;
syscalls_metadata[i] = meta;
}
@@ -473,8 +447,10 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec;
unsigned long flags;
char *trace_buf;
char *raw_data;
int syscall_nr;
int rctx;
int size;
int cpu;
@@ -498,41 +474,42 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
/* Protect the per cpu buffer, begin the rcu read side */
local_irq_save(flags);
rctx = perf_swevent_get_recursion_context();
if (rctx < 0)
goto end_recursion;
cpu = smp_processor_id();
if (in_nmi())
raw_data = rcu_dereference(trace_profile_buf_nmi);
else
raw_data = rcu_dereference(trace_profile_buf);
trace_buf = rcu_dereference(perf_trace_buf);
if (!raw_data)
if (!trace_buf)
goto end;
raw_data = per_cpu_ptr(raw_data, cpu);
raw_data = per_cpu_ptr(trace_buf, cpu);
/* zero the dead bytes from align to not leak stack to user */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
rec = (struct syscall_trace_enter *) raw_data;
tracing_generic_entry_update(&rec->ent, 0, 0);
rec->ent.type = sys_data->enter_id;
rec->ent.type = sys_data->enter_event->id;
rec->nr = syscall_nr;
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
(unsigned long *)&rec->args);
perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
end:
perf_swevent_put_recursion_context(rctx);
end_recursion:
local_irq_restore(flags);
}
int reg_prof_syscall_enter(char *name)
int prof_sysenter_enable(struct ftrace_event_call *call)
{
int ret = 0;
int num;
num = syscall_name_to_nr(name);
if (num < 0 || num >= NR_syscalls)
return -ENOSYS;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
mutex_lock(&syscall_trace_lock);
if (!sys_prof_refcount_enter)
@@ -548,13 +525,11 @@ int reg_prof_syscall_enter(char *name)
return ret;
}
void unreg_prof_syscall_enter(char *name)
void prof_sysenter_disable(struct ftrace_event_call *call)
{
int num;
num = syscall_name_to_nr(name);
if (num < 0 || num >= NR_syscalls)
return;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
mutex_lock(&syscall_trace_lock);
sys_prof_refcount_enter--;
@@ -570,7 +545,9 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
struct syscall_trace_exit *rec;
unsigned long flags;
int syscall_nr;
char *trace_buf;
char *raw_data;
int rctx;
int size;
int cpu;
@@ -596,17 +573,19 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
/* Protect the per cpu buffer, begin the rcu read side */
local_irq_save(flags);
rctx = perf_swevent_get_recursion_context();
if (rctx < 0)
goto end_recursion;
cpu = smp_processor_id();
if (in_nmi())
raw_data = rcu_dereference(trace_profile_buf_nmi);
else
raw_data = rcu_dereference(trace_profile_buf);
trace_buf = rcu_dereference(perf_trace_buf);
if (!raw_data)
if (!trace_buf)
goto end;
raw_data = per_cpu_ptr(raw_data, cpu);
raw_data = per_cpu_ptr(trace_buf, cpu);
/* zero the dead bytes from align to not leak stack to user */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -614,24 +593,24 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
rec = (struct syscall_trace_exit *)raw_data;
tracing_generic_entry_update(&rec->ent, 0, 0);
rec->ent.type = sys_data->exit_id;
rec->ent.type = sys_data->exit_event->id;
rec->nr = syscall_nr;
rec->ret = syscall_get_return_value(current, regs);
perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size);
end:
perf_swevent_put_recursion_context(rctx);
end_recursion:
local_irq_restore(flags);
}
int reg_prof_syscall_exit(char *name)
int prof_sysexit_enable(struct ftrace_event_call *call)
{
int ret = 0;
int num;
num = syscall_name_to_nr(name);
if (num < 0 || num >= NR_syscalls)
return -ENOSYS;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
mutex_lock(&syscall_trace_lock);
if (!sys_prof_refcount_exit)
@@ -647,13 +626,11 @@ int reg_prof_syscall_exit(char *name)
return ret;
}
void unreg_prof_syscall_exit(char *name)
void prof_sysexit_disable(struct ftrace_event_call *call)
{
int num;
num = syscall_name_to_nr(name);
if (num < 0 || num >= NR_syscalls)
return;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
mutex_lock(&syscall_trace_lock);
sys_prof_refcount_exit--;