Merge tag 'trace-v5.7' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace

Pull tracing updates from Steven Rostedt:
 "New tracing features:

   - The ring buffer is no longer disabled when reading the trace file.

     The trace_pipe file was made to be used for live tracing and
     reading as it acted like the normal producer/consumer. As the trace
     file would not consume the data, the easy way of handling it was to
     just disable writes to the ring buffer.

     This came to a surprise to the BPF folks who complained about lost
     events due to reading. This is no longer an issue. If someone wants
     to keep the old disabling there's a new option "pause-on-trace"
     that can be set.

   - New set_ftrace_notrace_pid file. PIDs in this file will not be
     traced by the function tracer.

     Similar to set_ftrace_pid, which makes the function tracer only
     trace those tasks with PIDs in the file, the set_ftrace_notrace_pid
     does the reverse.

   - New set_event_notrace_pid file. PIDs in this file will cause events
     not to be traced if triggered by a task with a matching PID.

     Similar to the set_event_pid file but will not be traced. Note,
     sched_waking and sched_switch events may still be traced if one of
     the tasks referenced by those events contains a PID that is allowed
     to be traced.

  Tracing related features:

   - New bootconfig option, that is attached to the initrd file.

     If bootconfig is on the command line, then the initrd file is
     searched looking for a bootconfig appended at the end.

   - New GPU tracepoint infrastructure to help the gfx drivers to get
     off debugfs (acked by Greg Kroah-Hartman)

  And other minor updates and fixes"

* tag 'trace-v5.7' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: (27 commits)
  tracing: Do not allocate buffer in trace_find_next_entry() in atomic
  tracing: Add documentation on set_ftrace_notrace_pid and set_event_notrace_pid
  selftests/ftrace: Add test to test new set_event_notrace_pid file
  selftests/ftrace: Add test to test new set_ftrace_notrace_pid file
  tracing: Create set_event_notrace_pid to not trace tasks
  ftrace: Create set_ftrace_notrace_pid to not trace tasks
  ftrace: Make function trace pid filtering a bit more exact
  ftrace/kprobe: Show the maxactive number on kprobe_events
  tracing: Have the document reflect that the trace file keeps tracing enabled
  ring-buffer/tracing: Have iterator acknowledge dropped events
  tracing: Do not disable tracing when reading the trace file
  ring-buffer: Do not disable recording when there is an iterator
  ring-buffer: Make resize disable per cpu buffer instead of total buffer
  ring-buffer: Optimize rb_iter_head_event()
  ring-buffer: Do not die if rb_iter_peek() fails more than thrice
  ring-buffer: Have rb_iter_head_event() handle concurrent writer
  ring-buffer: Add page_stamp to iterator for synchronization
  ring-buffer: Rename ring_buffer_read() to read_buffer_iter_advance()
  ring-buffer: Have ring_buffer_empty() not depend on tracing stopped
  tracing: Save off entry when peeking at next entry
  ...
This commit is contained in:
Linus Torvalds
2020-04-05 10:36:18 -07:00
28 changed files with 1196 additions and 258 deletions

View File

@@ -102,7 +102,7 @@ static bool ftrace_pids_enabled(struct ftrace_ops *ops)
tr = ops->private;
return tr->function_pids != NULL;
return tr->function_pids != NULL || tr->function_no_pids != NULL;
}
static void ftrace_update_trampoline(struct ftrace_ops *ops);
@@ -139,13 +139,23 @@ static inline void ftrace_ops_init(struct ftrace_ops *ops)
#endif
}
#define FTRACE_PID_IGNORE -1
#define FTRACE_PID_TRACE -2
static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *regs)
{
struct trace_array *tr = op->private;
int pid;
if (tr && this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid))
return;
if (tr) {
pid = this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid);
if (pid == FTRACE_PID_IGNORE)
return;
if (pid != FTRACE_PID_TRACE &&
pid != current->pid)
return;
}
op->saved_func(ip, parent_ip, op, regs);
}
@@ -6923,11 +6933,17 @@ ftrace_filter_pid_sched_switch_probe(void *data, bool preempt,
{
struct trace_array *tr = data;
struct trace_pid_list *pid_list;
struct trace_pid_list *no_pid_list;
pid_list = rcu_dereference_sched(tr->function_pids);
no_pid_list = rcu_dereference_sched(tr->function_no_pids);
this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid,
trace_ignore_this_task(pid_list, next));
if (trace_ignore_this_task(pid_list, no_pid_list, next))
this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid,
FTRACE_PID_IGNORE);
else
this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid,
next->pid);
}
static void
@@ -6940,6 +6956,9 @@ ftrace_pid_follow_sched_process_fork(void *data,
pid_list = rcu_dereference_sched(tr->function_pids);
trace_filter_add_remove_task(pid_list, self, task);
pid_list = rcu_dereference_sched(tr->function_no_pids);
trace_filter_add_remove_task(pid_list, self, task);
}
static void
@@ -6950,6 +6969,9 @@ ftrace_pid_follow_sched_process_exit(void *data, struct task_struct *task)
pid_list = rcu_dereference_sched(tr->function_pids);
trace_filter_add_remove_task(pid_list, NULL, task);
pid_list = rcu_dereference_sched(tr->function_no_pids);
trace_filter_add_remove_task(pid_list, NULL, task);
}
void ftrace_pid_follow_fork(struct trace_array *tr, bool enable)
@@ -6967,42 +6989,57 @@ void ftrace_pid_follow_fork(struct trace_array *tr, bool enable)
}
}
static void clear_ftrace_pids(struct trace_array *tr)
static void clear_ftrace_pids(struct trace_array *tr, int type)
{
struct trace_pid_list *pid_list;
struct trace_pid_list *no_pid_list;
int cpu;
pid_list = rcu_dereference_protected(tr->function_pids,
lockdep_is_held(&ftrace_lock));
if (!pid_list)
no_pid_list = rcu_dereference_protected(tr->function_no_pids,
lockdep_is_held(&ftrace_lock));
/* Make sure there's something to do */
if (!pid_type_enabled(type, pid_list, no_pid_list))
return;
unregister_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr);
/* See if the pids still need to be checked after this */
if (!still_need_pid_events(type, pid_list, no_pid_list)) {
unregister_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr);
for_each_possible_cpu(cpu)
per_cpu_ptr(tr->array_buffer.data, cpu)->ftrace_ignore_pid = FTRACE_PID_TRACE;
}
for_each_possible_cpu(cpu)
per_cpu_ptr(tr->array_buffer.data, cpu)->ftrace_ignore_pid = false;
if (type & TRACE_PIDS)
rcu_assign_pointer(tr->function_pids, NULL);
rcu_assign_pointer(tr->function_pids, NULL);
if (type & TRACE_NO_PIDS)
rcu_assign_pointer(tr->function_no_pids, NULL);
/* Wait till all users are no longer using pid filtering */
synchronize_rcu();
trace_free_pid_list(pid_list);
if ((type & TRACE_PIDS) && pid_list)
trace_free_pid_list(pid_list);
if ((type & TRACE_NO_PIDS) && no_pid_list)
trace_free_pid_list(no_pid_list);
}
void ftrace_clear_pids(struct trace_array *tr)
{
mutex_lock(&ftrace_lock);
clear_ftrace_pids(tr);
clear_ftrace_pids(tr, TRACE_PIDS | TRACE_NO_PIDS);
mutex_unlock(&ftrace_lock);
}
static void ftrace_pid_reset(struct trace_array *tr)
static void ftrace_pid_reset(struct trace_array *tr, int type)
{
mutex_lock(&ftrace_lock);
clear_ftrace_pids(tr);
clear_ftrace_pids(tr, type);
ftrace_update_pid_func();
ftrace_startup_all(0);
@@ -7066,9 +7103,45 @@ static const struct seq_operations ftrace_pid_sops = {
.show = fpid_show,
};
static int
ftrace_pid_open(struct inode *inode, struct file *file)
static void *fnpid_start(struct seq_file *m, loff_t *pos)
__acquires(RCU)
{
struct trace_pid_list *pid_list;
struct trace_array *tr = m->private;
mutex_lock(&ftrace_lock);
rcu_read_lock_sched();
pid_list = rcu_dereference_sched(tr->function_no_pids);
if (!pid_list)
return !(*pos) ? FTRACE_NO_PIDS : NULL;
return trace_pid_start(pid_list, pos);
}
static void *fnpid_next(struct seq_file *m, void *v, loff_t *pos)
{
struct trace_array *tr = m->private;
struct trace_pid_list *pid_list = rcu_dereference_sched(tr->function_no_pids);
if (v == FTRACE_NO_PIDS) {
(*pos)++;
return NULL;
}
return trace_pid_next(pid_list, v, pos);
}
static const struct seq_operations ftrace_no_pid_sops = {
.start = fnpid_start,
.next = fnpid_next,
.stop = fpid_stop,
.show = fpid_show,
};
static int pid_open(struct inode *inode, struct file *file, int type)
{
const struct seq_operations *seq_ops;
struct trace_array *tr = inode->i_private;
struct seq_file *m;
int ret = 0;
@@ -7079,9 +7152,18 @@ ftrace_pid_open(struct inode *inode, struct file *file)
if ((file->f_mode & FMODE_WRITE) &&
(file->f_flags & O_TRUNC))
ftrace_pid_reset(tr);
ftrace_pid_reset(tr, type);
ret = seq_open(file, &ftrace_pid_sops);
switch (type) {
case TRACE_PIDS:
seq_ops = &ftrace_pid_sops;
break;
case TRACE_NO_PIDS:
seq_ops = &ftrace_no_pid_sops;
break;
}
ret = seq_open(file, seq_ops);
if (ret < 0) {
trace_array_put(tr);
} else {
@@ -7093,10 +7175,23 @@ ftrace_pid_open(struct inode *inode, struct file *file)
return ret;
}
static int
ftrace_pid_open(struct inode *inode, struct file *file)
{
return pid_open(inode, file, TRACE_PIDS);
}
static int
ftrace_no_pid_open(struct inode *inode, struct file *file)
{
return pid_open(inode, file, TRACE_NO_PIDS);
}
static void ignore_task_cpu(void *data)
{
struct trace_array *tr = data;
struct trace_pid_list *pid_list;
struct trace_pid_list *no_pid_list;
/*
* This function is called by on_each_cpu() while the
@@ -7104,18 +7199,25 @@ static void ignore_task_cpu(void *data)
*/
pid_list = rcu_dereference_protected(tr->function_pids,
mutex_is_locked(&ftrace_lock));
no_pid_list = rcu_dereference_protected(tr->function_no_pids,
mutex_is_locked(&ftrace_lock));
this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid,
trace_ignore_this_task(pid_list, current));
if (trace_ignore_this_task(pid_list, no_pid_list, current))
this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid,
FTRACE_PID_IGNORE);
else
this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid,
current->pid);
}
static ssize_t
ftrace_pid_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
pid_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos, int type)
{
struct seq_file *m = filp->private_data;
struct trace_array *tr = m->private;
struct trace_pid_list *filtered_pids = NULL;
struct trace_pid_list *filtered_pids;
struct trace_pid_list *other_pids;
struct trace_pid_list *pid_list;
ssize_t ret;
@@ -7124,19 +7226,39 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf,
mutex_lock(&ftrace_lock);
filtered_pids = rcu_dereference_protected(tr->function_pids,
switch (type) {
case TRACE_PIDS:
filtered_pids = rcu_dereference_protected(tr->function_pids,
lockdep_is_held(&ftrace_lock));
other_pids = rcu_dereference_protected(tr->function_no_pids,
lockdep_is_held(&ftrace_lock));
break;
case TRACE_NO_PIDS:
filtered_pids = rcu_dereference_protected(tr->function_no_pids,
lockdep_is_held(&ftrace_lock));
other_pids = rcu_dereference_protected(tr->function_pids,
lockdep_is_held(&ftrace_lock));
break;
}
ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
if (ret < 0)
goto out;
rcu_assign_pointer(tr->function_pids, pid_list);
switch (type) {
case TRACE_PIDS:
rcu_assign_pointer(tr->function_pids, pid_list);
break;
case TRACE_NO_PIDS:
rcu_assign_pointer(tr->function_no_pids, pid_list);
break;
}
if (filtered_pids) {
synchronize_rcu();
trace_free_pid_list(filtered_pids);
} else if (pid_list) {
} else if (pid_list && !other_pids) {
/* Register a probe to set whether to ignore the tracing of a task */
register_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr);
}
@@ -7159,6 +7281,20 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf,
return ret;
}
static ssize_t
ftrace_pid_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
return pid_write(filp, ubuf, cnt, ppos, TRACE_PIDS);
}
static ssize_t
ftrace_no_pid_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
return pid_write(filp, ubuf, cnt, ppos, TRACE_NO_PIDS);
}
static int
ftrace_pid_release(struct inode *inode, struct file *file)
{
@@ -7177,10 +7313,20 @@ static const struct file_operations ftrace_pid_fops = {
.release = ftrace_pid_release,
};
static const struct file_operations ftrace_no_pid_fops = {
.open = ftrace_no_pid_open,
.write = ftrace_no_pid_write,
.read = seq_read,
.llseek = tracing_lseek,
.release = ftrace_pid_release,
};
void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer)
{
trace_create_file("set_ftrace_pid", 0644, d_tracer,
tr, &ftrace_pid_fops);
trace_create_file("set_ftrace_notrace_pid", 0644, d_tracer,
tr, &ftrace_no_pid_fops);
}
void __init ftrace_init_tracefs_toplevel(struct trace_array *tr,

View File

@@ -441,6 +441,7 @@ enum {
struct ring_buffer_per_cpu {
int cpu;
atomic_t record_disabled;
atomic_t resize_disabled;
struct trace_buffer *buffer;
raw_spinlock_t reader_lock; /* serialize readers */
arch_spinlock_t lock;
@@ -484,7 +485,6 @@ struct trace_buffer {
unsigned flags;
int cpus;
atomic_t record_disabled;
atomic_t resize_disabled;
cpumask_var_t cpumask;
struct lock_class_key *reader_lock_key;
@@ -503,10 +503,14 @@ struct trace_buffer {
struct ring_buffer_iter {
struct ring_buffer_per_cpu *cpu_buffer;
unsigned long head;
unsigned long next_event;
struct buffer_page *head_page;
struct buffer_page *cache_reader_page;
unsigned long cache_read;
u64 read_stamp;
u64 page_stamp;
struct ring_buffer_event *event;
int missed_events;
};
/**
@@ -1737,18 +1741,24 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
size = nr_pages * BUF_PAGE_SIZE;
/*
* Don't succeed if resizing is disabled, as a reader might be
* manipulating the ring buffer and is expecting a sane state while
* this is true.
*/
if (atomic_read(&buffer->resize_disabled))
return -EBUSY;
/* prevent another thread from changing buffer sizes */
mutex_lock(&buffer->mutex);
if (cpu_id == RING_BUFFER_ALL_CPUS) {
/*
* Don't succeed if resizing is disabled, as a reader might be
* manipulating the ring buffer and is expecting a sane state while
* this is true.
*/
for_each_buffer_cpu(buffer, cpu) {
cpu_buffer = buffer->buffers[cpu];
if (atomic_read(&cpu_buffer->resize_disabled)) {
err = -EBUSY;
goto out_err_unlock;
}
}
/* calculate the pages to update */
for_each_buffer_cpu(buffer, cpu) {
cpu_buffer = buffer->buffers[cpu];
@@ -1816,6 +1826,16 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
if (nr_pages == cpu_buffer->nr_pages)
goto out;
/*
* Don't succeed if resizing is disabled, as a reader might be
* manipulating the ring buffer and is expecting a sane state while
* this is true.
*/
if (atomic_read(&cpu_buffer->resize_disabled)) {
err = -EBUSY;
goto out_err_unlock;
}
cpu_buffer->nr_pages_to_update = nr_pages -
cpu_buffer->nr_pages;
@@ -1885,6 +1905,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
free_buffer_page(bpage);
}
}
out_err_unlock:
mutex_unlock(&buffer->mutex);
return err;
}
@@ -1913,17 +1934,65 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->reader_page->read);
}
static __always_inline struct ring_buffer_event *
rb_iter_head_event(struct ring_buffer_iter *iter)
{
return __rb_page_index(iter->head_page, iter->head);
}
static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
{
return local_read(&bpage->page->commit);
}
static struct ring_buffer_event *
rb_iter_head_event(struct ring_buffer_iter *iter)
{
struct ring_buffer_event *event;
struct buffer_page *iter_head_page = iter->head_page;
unsigned long commit;
unsigned length;
if (iter->head != iter->next_event)
return iter->event;
/*
* When the writer goes across pages, it issues a cmpxchg which
* is a mb(), which will synchronize with the rmb here.
* (see rb_tail_page_update() and __rb_reserve_next())
*/
commit = rb_page_commit(iter_head_page);
smp_rmb();
event = __rb_page_index(iter_head_page, iter->head);
length = rb_event_length(event);
/*
* READ_ONCE() doesn't work on functions and we don't want the
* compiler doing any crazy optimizations with length.
*/
barrier();
if ((iter->head + length) > commit || length > BUF_MAX_DATA_SIZE)
/* Writer corrupted the read? */
goto reset;
memcpy(iter->event, event, length);
/*
* If the page stamp is still the same after this rmb() then the
* event was safely copied without the writer entering the page.
*/
smp_rmb();
/* Make sure the page didn't change since we read this */
if (iter->page_stamp != iter_head_page->page->time_stamp ||
commit > rb_page_commit(iter_head_page))
goto reset;
iter->next_event = iter->head + length;
return iter->event;
reset:
/* Reset to the beginning */
iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp;
iter->head = 0;
iter->next_event = 0;
iter->missed_events = 1;
return NULL;
}
/* Size is determined by what has been committed */
static __always_inline unsigned rb_page_size(struct buffer_page *bpage)
{
@@ -1959,8 +2028,9 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
else
rb_inc_page(cpu_buffer, &iter->head_page);
iter->read_stamp = iter->head_page->page->time_stamp;
iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp;
iter->head = 0;
iter->next_event = 0;
}
/*
@@ -3547,14 +3617,18 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
/* Iterator usage is expected to have record disabled */
iter->head_page = cpu_buffer->reader_page;
iter->head = cpu_buffer->reader_page->read;
iter->next_event = iter->head;
iter->cache_reader_page = iter->head_page;
iter->cache_read = cpu_buffer->read;
if (iter->head)
if (iter->head) {
iter->read_stamp = cpu_buffer->read_stamp;
else
iter->page_stamp = cpu_buffer->reader_page->page->time_stamp;
} else {
iter->read_stamp = iter->head_page->page->time_stamp;
iter->page_stamp = iter->read_stamp;
}
}
/**
@@ -3590,17 +3664,38 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
struct buffer_page *reader;
struct buffer_page *head_page;
struct buffer_page *commit_page;
struct buffer_page *curr_commit_page;
unsigned commit;
u64 curr_commit_ts;
u64 commit_ts;
cpu_buffer = iter->cpu_buffer;
/* Remember, trace recording is off when iterator is in use */
reader = cpu_buffer->reader_page;
head_page = cpu_buffer->head_page;
commit_page = cpu_buffer->commit_page;
commit = rb_page_commit(commit_page);
commit_ts = commit_page->page->time_stamp;
return ((iter->head_page == commit_page && iter->head == commit) ||
/*
* When the writer goes across pages, it issues a cmpxchg which
* is a mb(), which will synchronize with the rmb here.
* (see rb_tail_page_update())
*/
smp_rmb();
commit = rb_page_commit(commit_page);
/* We want to make sure that the commit page doesn't change */
smp_rmb();
/* Make sure commit page didn't change */
curr_commit_page = READ_ONCE(cpu_buffer->commit_page);
curr_commit_ts = READ_ONCE(curr_commit_page->page->time_stamp);
/* If the commit page changed, then there's more data */
if (curr_commit_page != commit_page ||
curr_commit_ts != commit_ts)
return 0;
/* Still racy, as it may return a false positive, but that's OK */
return ((iter->head_page == commit_page && iter->head >= commit) ||
(iter->head_page == reader && commit_page == head_page &&
head_page->read == commit &&
iter->head == rb_page_commit(cpu_buffer->reader_page)));
@@ -3828,15 +3923,22 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
static void rb_advance_iter(struct ring_buffer_iter *iter)
{
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_event *event;
unsigned length;
cpu_buffer = iter->cpu_buffer;
/* If head == next_event then we need to jump to the next event */
if (iter->head == iter->next_event) {
/* If the event gets overwritten again, there's nothing to do */
if (rb_iter_head_event(iter) == NULL)
return;
}
iter->head = iter->next_event;
/*
* Check if we are at the end of the buffer.
*/
if (iter->head >= rb_page_size(iter->head_page)) {
if (iter->next_event >= rb_page_size(iter->head_page)) {
/* discarded commits can make the page empty */
if (iter->head_page == cpu_buffer->commit_page)
return;
@@ -3844,27 +3946,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
return;
}
event = rb_iter_head_event(iter);
length = rb_event_length(event);
/*
* This should not be called to advance the header if we are
* at the tail of the buffer.
*/
if (RB_WARN_ON(cpu_buffer,
(iter->head_page == cpu_buffer->commit_page) &&
(iter->head + length > rb_commit_index(cpu_buffer))))
return;
rb_update_iter_read_stamp(iter, event);
iter->head += length;
/* check for end of page padding */
if ((iter->head >= rb_page_size(iter->head_page)) &&
(iter->head_page != cpu_buffer->commit_page))
rb_inc_iter(iter);
rb_update_iter_read_stamp(iter, iter->event);
}
static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
@@ -3952,6 +4034,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_event *event;
int nr_loops = 0;
bool failed = false;
if (ts)
*ts = 0;
@@ -3978,10 +4061,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
* to a data event, we should never loop more than three times.
* Once for going to next page, once on time extend, and
* finally once to get the event.
* (We never hit the following condition more than thrice).
* We should never hit the following condition more than thrice,
* unless the buffer is very small, and there's a writer
* that is causing the reader to fail getting an event.
*/
if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3))
if (++nr_loops > 3) {
RB_WARN_ON(cpu_buffer, !failed);
return NULL;
}
if (rb_per_cpu_empty(cpu_buffer))
return NULL;
@@ -3992,6 +4079,10 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
}
event = rb_iter_head_event(iter);
if (!event) {
failed = true;
goto again;
}
switch (event->type_len) {
case RINGBUF_TYPE_PADDING:
@@ -4102,6 +4193,20 @@ ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts,
return event;
}
/** ring_buffer_iter_dropped - report if there are dropped events
* @iter: The ring buffer iterator
*
* Returns true if there was dropped events since the last peek.
*/
bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter)
{
bool ret = iter->missed_events != 0;
iter->missed_events = 0;
return ret;
}
EXPORT_SYMBOL_GPL(ring_buffer_iter_dropped);
/**
* ring_buffer_iter_peek - peek at the next event to be read
* @iter: The ring buffer iterator
@@ -4208,16 +4313,21 @@ ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags)
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return NULL;
iter = kmalloc(sizeof(*iter), flags);
iter = kzalloc(sizeof(*iter), flags);
if (!iter)
return NULL;
iter->event = kmalloc(BUF_MAX_DATA_SIZE, flags);
if (!iter->event) {
kfree(iter);
return NULL;
}
cpu_buffer = buffer->buffers[cpu];
iter->cpu_buffer = cpu_buffer;
atomic_inc(&buffer->resize_disabled);
atomic_inc(&cpu_buffer->record_disabled);
atomic_inc(&cpu_buffer->resize_disabled);
return iter;
}
@@ -4290,42 +4400,31 @@ ring_buffer_read_finish(struct ring_buffer_iter *iter)
rb_check_pages(cpu_buffer);
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
atomic_dec(&cpu_buffer->record_disabled);
atomic_dec(&cpu_buffer->buffer->resize_disabled);
atomic_dec(&cpu_buffer->resize_disabled);
kfree(iter->event);
kfree(iter);
}
EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
/**
* ring_buffer_read - read the next item in the ring buffer by the iterator
* ring_buffer_iter_advance - advance the iterator to the next location
* @iter: The ring buffer iterator
* @ts: The time stamp of the event read.
*
* This reads the next event in the ring buffer and increments the iterator.
* Move the location of the iterator such that the next read will
* be the next location of the iterator.
*/
struct ring_buffer_event *
ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
void ring_buffer_iter_advance(struct ring_buffer_iter *iter)
{
struct ring_buffer_event *event;
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
unsigned long flags;
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
again:
event = rb_iter_peek(iter, ts);
if (!event)
goto out;
if (event->type_len == RINGBUF_TYPE_PADDING)
goto again;
rb_advance_iter(iter);
out:
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
return event;
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
}
EXPORT_SYMBOL_GPL(ring_buffer_read);
EXPORT_SYMBOL_GPL(ring_buffer_iter_advance);
/**
* ring_buffer_size - return the size of the ring buffer (in bytes)
@@ -4406,7 +4505,7 @@ void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return;
atomic_inc(&buffer->resize_disabled);
atomic_inc(&cpu_buffer->resize_disabled);
atomic_inc(&cpu_buffer->record_disabled);
/* Make sure all commits have finished */
@@ -4427,7 +4526,7 @@ void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
atomic_dec(&cpu_buffer->record_disabled);
atomic_dec(&buffer->resize_disabled);
atomic_dec(&cpu_buffer->resize_disabled);
}
EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);

View File

@@ -386,16 +386,22 @@ trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
* Returns false if @task should be traced.
*/
bool
trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
trace_ignore_this_task(struct trace_pid_list *filtered_pids,
struct trace_pid_list *filtered_no_pids,
struct task_struct *task)
{
/*
* Return false, because if filtered_pids does not exist,
* all pids are good to trace.
* If filterd_no_pids is not empty, and the task's pid is listed
* in filtered_no_pids, then return true.
* Otherwise, if filtered_pids is empty, that means we can
* trace all tasks. If it has content, then only trace pids
* within filtered_pids.
*/
if (!filtered_pids)
return false;
return !trace_find_filtered_pid(filtered_pids, task->pid);
return (filtered_pids &&
!trace_find_filtered_pid(filtered_pids, task->pid)) ||
(filtered_no_pids &&
trace_find_filtered_pid(filtered_no_pids, task->pid));
}
/**
@@ -3378,7 +3384,7 @@ static void trace_iterator_increment(struct trace_iterator *iter)
iter->idx++;
if (buf_iter)
ring_buffer_read(buf_iter, NULL);
ring_buffer_iter_advance(buf_iter);
}
static struct trace_entry *
@@ -3388,11 +3394,15 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
struct ring_buffer_event *event;
struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
if (buf_iter)
if (buf_iter) {
event = ring_buffer_iter_peek(buf_iter, ts);
else
if (lost_events)
*lost_events = ring_buffer_iter_dropped(buf_iter) ?
(unsigned long)-1 : 0;
} else {
event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
lost_events);
}
if (event) {
iter->ent_size = ring_buffer_event_length(event);
@@ -3462,11 +3472,51 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
return next;
}
#define STATIC_TEMP_BUF_SIZE 128
static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
/* Find the next real entry, without updating the iterator itself */
struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
int *ent_cpu, u64 *ent_ts)
{
return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
/* __find_next_entry will reset ent_size */
int ent_size = iter->ent_size;
struct trace_entry *entry;
/*
* If called from ftrace_dump(), then the iter->temp buffer
* will be the static_temp_buf and not created from kmalloc.
* If the entry size is greater than the buffer, we can
* not save it. Just return NULL in that case. This is only
* used to add markers when two consecutive events' time
* stamps have a large delta. See trace_print_lat_context()
*/
if (iter->temp == static_temp_buf &&
STATIC_TEMP_BUF_SIZE < ent_size)
return NULL;
/*
* The __find_next_entry() may call peek_next_entry(), which may
* call ring_buffer_peek() that may make the contents of iter->ent
* undefined. Need to copy iter->ent now.
*/
if (iter->ent && iter->ent != iter->temp) {
if ((!iter->temp || iter->temp_size < iter->ent_size) &&
!WARN_ON_ONCE(iter->temp == static_temp_buf)) {
kfree(iter->temp);
iter->temp = kmalloc(iter->ent_size, GFP_KERNEL);
if (!iter->temp)
return NULL;
}
memcpy(iter->temp, iter->ent, iter->ent_size);
iter->temp_size = iter->ent_size;
iter->ent = iter->temp;
}
entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
/* Put back the original ent_size */
iter->ent_size = ent_size;
return entry;
}
/* Find the next real entry, and increment the iterator to the next entry */
@@ -3538,7 +3588,7 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu)
if (ts >= iter->array_buffer->time_start)
break;
entries++;
ring_buffer_read(buf_iter, NULL);
ring_buffer_iter_advance(buf_iter);
}
per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
@@ -3981,8 +4031,12 @@ enum print_line_t print_trace_line(struct trace_iterator *iter)
enum print_line_t ret;
if (iter->lost_events) {
trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
iter->cpu, iter->lost_events);
if (iter->lost_events == (unsigned long)-1)
trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
iter->cpu);
else
trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
iter->cpu, iter->lost_events);
if (trace_seq_has_overflowed(&iter->seq))
return TRACE_TYPE_PARTIAL_LINE;
}
@@ -4197,6 +4251,18 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
if (!iter->buffer_iter)
goto release;
/*
* trace_find_next_entry() may need to save off iter->ent.
* It will place it into the iter->temp buffer. As most
* events are less than 128, allocate a buffer of that size.
* If one is greater, then trace_find_next_entry() will
* allocate a new buffer to adjust for the bigger iter->ent.
* It's not critical if it fails to get allocated here.
*/
iter->temp = kmalloc(128, GFP_KERNEL);
if (iter->temp)
iter->temp_size = 128;
/*
* We make a copy of the current tracer to avoid concurrent
* changes on it while we are reading.
@@ -4237,8 +4303,11 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
if (trace_clocks[tr->clock_id].in_ns)
iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
/* stop the trace while dumping if we are not opening "snapshot" */
if (!iter->snapshot)
/*
* If pause-on-trace is enabled, then stop the trace while
* dumping, unless this is the "snapshot" file
*/
if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
tracing_stop_tr(tr);
if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
@@ -4269,6 +4338,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
fail:
mutex_unlock(&trace_types_lock);
kfree(iter->trace);
kfree(iter->temp);
kfree(iter->buffer_iter);
release:
seq_release_private(inode, file);
@@ -4334,7 +4404,7 @@ static int tracing_release(struct inode *inode, struct file *file)
if (iter->trace && iter->trace->close)
iter->trace->close(iter);
if (!iter->snapshot)
if (!iter->snapshot && tr->stop_count)
/* reenable tracing if it was previously enabled */
tracing_start_tr(tr);
@@ -4344,6 +4414,7 @@ static int tracing_release(struct inode *inode, struct file *file)
mutex_destroy(&iter->mutex);
free_cpumask_var(iter->started);
kfree(iter->temp);
kfree(iter->trace);
kfree(iter->buffer_iter);
seq_release_private(inode, file);
@@ -4964,6 +5035,8 @@ static const char readme_msg[] =
#ifdef CONFIG_FUNCTION_TRACER
" set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
"\t\t (function)\n"
" set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
"\t\t (function)\n"
#endif
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
" set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
@@ -9146,6 +9219,9 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
/* Simulate the iterator */
trace_init_global_iter(&iter);
/* Can not use kmalloc for iter.temp */
iter.temp = static_temp_buf;
iter.temp_size = STATIC_TEMP_BUF_SIZE;
for_each_tracing_cpu(cpu) {
atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
@@ -9334,7 +9410,7 @@ __init static int tracer_alloc_buffers(void)
goto out_free_buffer_mask;
/* Only allocate trace_printk buffers if a trace_printk exists */
if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
/* Must be called before global_trace.buffer is allocated */
trace_printk_init_buffers();

View File

@@ -178,10 +178,10 @@ struct trace_array_cpu {
kuid_t uid;
char comm[TASK_COMM_LEN];
bool ignore_pid;
#ifdef CONFIG_FUNCTION_TRACER
bool ftrace_ignore_pid;
int ftrace_ignore_pid;
#endif
bool ignore_pid;
};
struct tracer;
@@ -207,6 +207,30 @@ struct trace_pid_list {
unsigned long *pids;
};
enum {
TRACE_PIDS = BIT(0),
TRACE_NO_PIDS = BIT(1),
};
static inline bool pid_type_enabled(int type, struct trace_pid_list *pid_list,
struct trace_pid_list *no_pid_list)
{
/* Return true if the pid list in type has pids */
return ((type & TRACE_PIDS) && pid_list) ||
((type & TRACE_NO_PIDS) && no_pid_list);
}
static inline bool still_need_pid_events(int type, struct trace_pid_list *pid_list,
struct trace_pid_list *no_pid_list)
{
/*
* Turning off what is in @type, return true if the "other"
* pid list, still has pids in it.
*/
return (!(type & TRACE_PIDS) && pid_list) ||
(!(type & TRACE_NO_PIDS) && no_pid_list);
}
typedef bool (*cond_update_fn_t)(struct trace_array *tr, void *cond_data);
/**
@@ -285,6 +309,7 @@ struct trace_array {
#endif
#endif
struct trace_pid_list __rcu *filtered_pids;
struct trace_pid_list __rcu *filtered_no_pids;
/*
* max_lock is used to protect the swapping of buffers
* when taking a max snapshot. The buffers themselves are
@@ -331,6 +356,7 @@ struct trace_array {
#ifdef CONFIG_FUNCTION_TRACER
struct ftrace_ops *ops;
struct trace_pid_list __rcu *function_pids;
struct trace_pid_list __rcu *function_no_pids;
#ifdef CONFIG_DYNAMIC_FTRACE
/* All of these are protected by the ftrace_lock */
struct list_head func_probes;
@@ -557,12 +583,7 @@ struct tracer {
* caller, and we can skip the current check.
*/
enum {
TRACE_BUFFER_BIT,
TRACE_BUFFER_NMI_BIT,
TRACE_BUFFER_IRQ_BIT,
TRACE_BUFFER_SIRQ_BIT,
/* Start of function recursion bits */
/* Function recursion bits */
TRACE_FTRACE_BIT,
TRACE_FTRACE_NMI_BIT,
TRACE_FTRACE_IRQ_BIT,
@@ -787,6 +808,7 @@ extern int pid_max;
bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids,
pid_t search_pid);
bool trace_ignore_this_task(struct trace_pid_list *filtered_pids,
struct trace_pid_list *filtered_no_pids,
struct task_struct *task);
void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
struct task_struct *self,
@@ -1307,6 +1329,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
C(IRQ_INFO, "irq-info"), \
C(MARKERS, "markers"), \
C(EVENT_FORK, "event-fork"), \
C(PAUSE_ON_TRACE, "pause-on-trace"), \
FUNCTION_FLAGS \
FGRAPH_FLAGS \
STACK_FLAGS \

View File

@@ -325,14 +325,16 @@ FTRACE_ENTRY(hwlat, hwlat_entry,
__field_desc( long, timestamp, tv_nsec )
__field( unsigned int, nmi_count )
__field( unsigned int, seqnum )
__field( unsigned int, count )
),
F_printk("cnt:%u\tts:%010llu.%010lu\tinner:%llu\touter:%llu\tnmi-ts:%llu\tnmi-count:%u\n",
F_printk("cnt:%u\tts:%010llu.%010lu\tinner:%llu\touter:%llu\tcount:%d\tnmi-ts:%llu\tnmi-count:%u\n",
__entry->seqnum,
__entry->tv_sec,
__entry->tv_nsec,
__entry->duration,
__entry->outer_duration,
__entry->count,
__entry->nmi_total_ts,
__entry->nmi_count)
);

View File

@@ -232,10 +232,13 @@ bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
{
struct trace_array *tr = trace_file->tr;
struct trace_array_cpu *data;
struct trace_pid_list *no_pid_list;
struct trace_pid_list *pid_list;
pid_list = rcu_dereference_raw(tr->filtered_pids);
if (!pid_list)
no_pid_list = rcu_dereference_raw(tr->filtered_no_pids);
if (!pid_list && !no_pid_list)
return false;
data = this_cpu_ptr(tr->array_buffer.data);
@@ -510,6 +513,9 @@ event_filter_pid_sched_process_exit(void *data, struct task_struct *task)
pid_list = rcu_dereference_raw(tr->filtered_pids);
trace_filter_add_remove_task(pid_list, NULL, task);
pid_list = rcu_dereference_raw(tr->filtered_no_pids);
trace_filter_add_remove_task(pid_list, NULL, task);
}
static void
@@ -522,6 +528,9 @@ event_filter_pid_sched_process_fork(void *data,
pid_list = rcu_dereference_sched(tr->filtered_pids);
trace_filter_add_remove_task(pid_list, self, task);
pid_list = rcu_dereference_sched(tr->filtered_no_pids);
trace_filter_add_remove_task(pid_list, self, task);
}
void trace_event_follow_fork(struct trace_array *tr, bool enable)
@@ -544,13 +553,23 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
struct task_struct *prev, struct task_struct *next)
{
struct trace_array *tr = data;
struct trace_pid_list *no_pid_list;
struct trace_pid_list *pid_list;
bool ret;
pid_list = rcu_dereference_sched(tr->filtered_pids);
no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
this_cpu_write(tr->array_buffer.data->ignore_pid,
trace_ignore_this_task(pid_list, prev) &&
trace_ignore_this_task(pid_list, next));
/*
* Sched switch is funny, as we only want to ignore it
* in the notrace case if both prev and next should be ignored.
*/
ret = trace_ignore_this_task(NULL, no_pid_list, prev) &&
trace_ignore_this_task(NULL, no_pid_list, next);
this_cpu_write(tr->array_buffer.data->ignore_pid, ret ||
(trace_ignore_this_task(pid_list, NULL, prev) &&
trace_ignore_this_task(pid_list, NULL, next)));
}
static void
@@ -558,18 +577,21 @@ event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
struct task_struct *prev, struct task_struct *next)
{
struct trace_array *tr = data;
struct trace_pid_list *no_pid_list;
struct trace_pid_list *pid_list;
pid_list = rcu_dereference_sched(tr->filtered_pids);
no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
this_cpu_write(tr->array_buffer.data->ignore_pid,
trace_ignore_this_task(pid_list, next));
trace_ignore_this_task(pid_list, no_pid_list, next));
}
static void
event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
{
struct trace_array *tr = data;
struct trace_pid_list *no_pid_list;
struct trace_pid_list *pid_list;
/* Nothing to do if we are already tracing */
@@ -577,15 +599,17 @@ event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
return;
pid_list = rcu_dereference_sched(tr->filtered_pids);
no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
this_cpu_write(tr->array_buffer.data->ignore_pid,
trace_ignore_this_task(pid_list, task));
trace_ignore_this_task(pid_list, no_pid_list, task));
}
static void
event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
{
struct trace_array *tr = data;
struct trace_pid_list *no_pid_list;
struct trace_pid_list *pid_list;
/* Nothing to do if we are not tracing */
@@ -593,23 +617,15 @@ event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
return;
pid_list = rcu_dereference_sched(tr->filtered_pids);
no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
/* Set tracing if current is enabled */
this_cpu_write(tr->array_buffer.data->ignore_pid,
trace_ignore_this_task(pid_list, current));
trace_ignore_this_task(pid_list, no_pid_list, current));
}
static void __ftrace_clear_event_pids(struct trace_array *tr)
static void unregister_pid_events(struct trace_array *tr)
{
struct trace_pid_list *pid_list;
struct trace_event_file *file;
int cpu;
pid_list = rcu_dereference_protected(tr->filtered_pids,
lockdep_is_held(&event_mutex));
if (!pid_list)
return;
unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr);
unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr);
@@ -621,26 +637,55 @@ static void __ftrace_clear_event_pids(struct trace_array *tr)
unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr);
unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr);
}
list_for_each_entry(file, &tr->events, list) {
clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
static void __ftrace_clear_event_pids(struct trace_array *tr, int type)
{
struct trace_pid_list *pid_list;
struct trace_pid_list *no_pid_list;
struct trace_event_file *file;
int cpu;
pid_list = rcu_dereference_protected(tr->filtered_pids,
lockdep_is_held(&event_mutex));
no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
lockdep_is_held(&event_mutex));
/* Make sure there's something to do */
if (!pid_type_enabled(type, pid_list, no_pid_list))
return;
if (!still_need_pid_events(type, pid_list, no_pid_list)) {
unregister_pid_events(tr);
list_for_each_entry(file, &tr->events, list) {
clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
}
for_each_possible_cpu(cpu)
per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false;
}
for_each_possible_cpu(cpu)
per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false;
if (type & TRACE_PIDS)
rcu_assign_pointer(tr->filtered_pids, NULL);
rcu_assign_pointer(tr->filtered_pids, NULL);
if (type & TRACE_NO_PIDS)
rcu_assign_pointer(tr->filtered_no_pids, NULL);
/* Wait till all users are no longer using pid filtering */
tracepoint_synchronize_unregister();
trace_free_pid_list(pid_list);
if ((type & TRACE_PIDS) && pid_list)
trace_free_pid_list(pid_list);
if ((type & TRACE_NO_PIDS) && no_pid_list)
trace_free_pid_list(no_pid_list);
}
static void ftrace_clear_event_pids(struct trace_array *tr)
static void ftrace_clear_event_pids(struct trace_array *tr, int type)
{
mutex_lock(&event_mutex);
__ftrace_clear_event_pids(tr);
__ftrace_clear_event_pids(tr, type);
mutex_unlock(&event_mutex);
}
@@ -1013,15 +1058,32 @@ static void t_stop(struct seq_file *m, void *p)
}
static void *
p_next(struct seq_file *m, void *v, loff_t *pos)
__next(struct seq_file *m, void *v, loff_t *pos, int type)
{
struct trace_array *tr = m->private;
struct trace_pid_list *pid_list = rcu_dereference_sched(tr->filtered_pids);
struct trace_pid_list *pid_list;
if (type == TRACE_PIDS)
pid_list = rcu_dereference_sched(tr->filtered_pids);
else
pid_list = rcu_dereference_sched(tr->filtered_no_pids);
return trace_pid_next(pid_list, v, pos);
}
static void *p_start(struct seq_file *m, loff_t *pos)
static void *
p_next(struct seq_file *m, void *v, loff_t *pos)
{
return __next(m, v, pos, TRACE_PIDS);
}
static void *
np_next(struct seq_file *m, void *v, loff_t *pos)
{
return __next(m, v, pos, TRACE_NO_PIDS);
}
static void *__start(struct seq_file *m, loff_t *pos, int type)
__acquires(RCU)
{
struct trace_pid_list *pid_list;
@@ -1036,7 +1098,10 @@ static void *p_start(struct seq_file *m, loff_t *pos)
mutex_lock(&event_mutex);
rcu_read_lock_sched();
pid_list = rcu_dereference_sched(tr->filtered_pids);
if (type == TRACE_PIDS)
pid_list = rcu_dereference_sched(tr->filtered_pids);
else
pid_list = rcu_dereference_sched(tr->filtered_no_pids);
if (!pid_list)
return NULL;
@@ -1044,6 +1109,18 @@ static void *p_start(struct seq_file *m, loff_t *pos)
return trace_pid_start(pid_list, pos);
}
static void *p_start(struct seq_file *m, loff_t *pos)
__acquires(RCU)
{
return __start(m, pos, TRACE_PIDS);
}
static void *np_start(struct seq_file *m, loff_t *pos)
__acquires(RCU)
{
return __start(m, pos, TRACE_NO_PIDS);
}
static void p_stop(struct seq_file *m, void *p)
__releases(RCU)
{
@@ -1588,6 +1665,7 @@ static void ignore_task_cpu(void *data)
{
struct trace_array *tr = data;
struct trace_pid_list *pid_list;
struct trace_pid_list *no_pid_list;
/*
* This function is called by on_each_cpu() while the
@@ -1595,18 +1673,50 @@ static void ignore_task_cpu(void *data)
*/
pid_list = rcu_dereference_protected(tr->filtered_pids,
mutex_is_locked(&event_mutex));
no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
mutex_is_locked(&event_mutex));
this_cpu_write(tr->array_buffer.data->ignore_pid,
trace_ignore_this_task(pid_list, current));
trace_ignore_this_task(pid_list, no_pid_list, current));
}
static void register_pid_events(struct trace_array *tr)
{
/*
* Register a probe that is called before all other probes
* to set ignore_pid if next or prev do not match.
* Register a probe this is called after all other probes
* to only keep ignore_pid set if next pid matches.
*/
register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
tr, INT_MAX);
register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
tr, 0);
register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
tr, INT_MAX);
register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
tr, 0);
register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
tr, INT_MAX);
register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
tr, 0);
register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
tr, INT_MAX);
register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
tr, 0);
}
static ssize_t
ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
event_pid_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos, int type)
{
struct seq_file *m = filp->private_data;
struct trace_array *tr = m->private;
struct trace_pid_list *filtered_pids = NULL;
struct trace_pid_list *other_pids = NULL;
struct trace_pid_list *pid_list;
struct trace_event_file *file;
ssize_t ret;
@@ -1620,14 +1730,26 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
mutex_lock(&event_mutex);
filtered_pids = rcu_dereference_protected(tr->filtered_pids,
lockdep_is_held(&event_mutex));
if (type == TRACE_PIDS) {
filtered_pids = rcu_dereference_protected(tr->filtered_pids,
lockdep_is_held(&event_mutex));
other_pids = rcu_dereference_protected(tr->filtered_no_pids,
lockdep_is_held(&event_mutex));
} else {
filtered_pids = rcu_dereference_protected(tr->filtered_no_pids,
lockdep_is_held(&event_mutex));
other_pids = rcu_dereference_protected(tr->filtered_pids,
lockdep_is_held(&event_mutex));
}
ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
if (ret < 0)
goto out;
rcu_assign_pointer(tr->filtered_pids, pid_list);
if (type == TRACE_PIDS)
rcu_assign_pointer(tr->filtered_pids, pid_list);
else
rcu_assign_pointer(tr->filtered_no_pids, pid_list);
list_for_each_entry(file, &tr->events, list) {
set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
@@ -1636,32 +1758,8 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
if (filtered_pids) {
tracepoint_synchronize_unregister();
trace_free_pid_list(filtered_pids);
} else if (pid_list) {
/*
* Register a probe that is called before all other probes
* to set ignore_pid if next or prev do not match.
* Register a probe this is called after all other probes
* to only keep ignore_pid set if next pid matches.
*/
register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
tr, INT_MAX);
register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
tr, 0);
register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
tr, INT_MAX);
register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
tr, 0);
register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
tr, INT_MAX);
register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
tr, 0);
register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
tr, INT_MAX);
register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
tr, 0);
} else if (pid_list && !other_pids) {
register_pid_events(tr);
}
/*
@@ -1680,9 +1778,24 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
return ret;
}
static ssize_t
ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
return event_pid_write(filp, ubuf, cnt, ppos, TRACE_PIDS);
}
static ssize_t
ftrace_event_npid_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
return event_pid_write(filp, ubuf, cnt, ppos, TRACE_NO_PIDS);
}
static int ftrace_event_avail_open(struct inode *inode, struct file *file);
static int ftrace_event_set_open(struct inode *inode, struct file *file);
static int ftrace_event_set_pid_open(struct inode *inode, struct file *file);
static int ftrace_event_set_npid_open(struct inode *inode, struct file *file);
static int ftrace_event_release(struct inode *inode, struct file *file);
static const struct seq_operations show_event_seq_ops = {
@@ -1706,6 +1819,13 @@ static const struct seq_operations show_set_pid_seq_ops = {
.stop = p_stop,
};
static const struct seq_operations show_set_no_pid_seq_ops = {
.start = np_start,
.next = np_next,
.show = trace_pid_show,
.stop = p_stop,
};
static const struct file_operations ftrace_avail_fops = {
.open = ftrace_event_avail_open,
.read = seq_read,
@@ -1729,6 +1849,14 @@ static const struct file_operations ftrace_set_event_pid_fops = {
.release = ftrace_event_release,
};
static const struct file_operations ftrace_set_event_notrace_pid_fops = {
.open = ftrace_event_set_npid_open,
.read = seq_read,
.write = ftrace_event_npid_write,
.llseek = seq_lseek,
.release = ftrace_event_release,
};
static const struct file_operations ftrace_enable_fops = {
.open = tracing_open_generic,
.read = event_enable_read,
@@ -1858,7 +1986,28 @@ ftrace_event_set_pid_open(struct inode *inode, struct file *file)
if ((file->f_mode & FMODE_WRITE) &&
(file->f_flags & O_TRUNC))
ftrace_clear_event_pids(tr);
ftrace_clear_event_pids(tr, TRACE_PIDS);
ret = ftrace_event_open(inode, file, seq_ops);
if (ret < 0)
trace_array_put(tr);
return ret;
}
static int
ftrace_event_set_npid_open(struct inode *inode, struct file *file)
{
const struct seq_operations *seq_ops = &show_set_no_pid_seq_ops;
struct trace_array *tr = inode->i_private;
int ret;
ret = tracing_check_open_get_tr(tr);
if (ret)
return ret;
if ((file->f_mode & FMODE_WRITE) &&
(file->f_flags & O_TRUNC))
ftrace_clear_event_pids(tr, TRACE_NO_PIDS);
ret = ftrace_event_open(inode, file, seq_ops);
if (ret < 0)
@@ -3075,6 +3224,11 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
if (!entry)
pr_warn("Could not create tracefs 'set_event_pid' entry\n");
entry = tracefs_create_file("set_event_notrace_pid", 0644, parent,
tr, &ftrace_set_event_notrace_pid_fops);
if (!entry)
pr_warn("Could not create tracefs 'set_event_notrace_pid' entry\n");
/* ring buffer internal formats */
entry = trace_create_file("header_page", 0444, d_events,
ring_buffer_print_page_header,
@@ -3158,7 +3312,7 @@ int event_trace_del_tracer(struct trace_array *tr)
clear_event_triggers(tr);
/* Clear the pid list */
__ftrace_clear_event_pids(tr);
__ftrace_clear_event_pids(tr, TRACE_PIDS | TRACE_NO_PIDS);
/* Disable any running events */
__ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);

View File

@@ -482,7 +482,7 @@ get_return_for_leaf(struct trace_iterator *iter,
/* this is a leaf, now advance the iterator */
if (ring_iter)
ring_buffer_read(ring_iter, NULL);
ring_buffer_iter_advance(ring_iter);
return next;
}

View File

@@ -83,6 +83,7 @@ struct hwlat_sample {
u64 nmi_total_ts; /* Total time spent in NMIs */
struct timespec64 timestamp; /* wall time */
int nmi_count; /* # NMIs during this sample */
int count; /* # of iteratons over threash */
};
/* keep the global state somewhere. */
@@ -124,6 +125,7 @@ static void trace_hwlat_sample(struct hwlat_sample *sample)
entry->timestamp = sample->timestamp;
entry->nmi_total_ts = sample->nmi_total_ts;
entry->nmi_count = sample->nmi_count;
entry->count = sample->count;
if (!call_filter_check_discard(call, entry, buffer, event))
trace_buffer_unlock_commit_nostack(buffer, event);
@@ -167,12 +169,14 @@ void trace_hwlat_callback(bool enter)
static int get_sample(void)
{
struct trace_array *tr = hwlat_trace;
struct hwlat_sample s;
time_type start, t1, t2, last_t2;
s64 diff, total, last_total = 0;
s64 diff, outer_diff, total, last_total = 0;
u64 sample = 0;
u64 thresh = tracing_thresh;
u64 outer_sample = 0;
int ret = -1;
unsigned int count = 0;
do_div(thresh, NSEC_PER_USEC); /* modifies interval value */
@@ -186,6 +190,7 @@ static int get_sample(void)
init_time(last_t2, 0);
start = time_get(); /* start timestamp */
outer_diff = 0;
do {
@@ -194,14 +199,14 @@ static int get_sample(void)
if (time_u64(last_t2)) {
/* Check the delta from outer loop (t2 to next t1) */
diff = time_to_us(time_sub(t1, last_t2));
outer_diff = time_to_us(time_sub(t1, last_t2));
/* This shouldn't happen */
if (diff < 0) {
if (outer_diff < 0) {
pr_err(BANNER "time running backwards\n");
goto out;
}
if (diff > outer_sample)
outer_sample = diff;
if (outer_diff > outer_sample)
outer_sample = outer_diff;
}
last_t2 = t2;
@@ -217,6 +222,12 @@ static int get_sample(void)
/* This checks the inner loop (t1 to t2) */
diff = time_to_us(time_sub(t2, t1)); /* current diff */
if (diff > thresh || outer_diff > thresh) {
if (!count)
ktime_get_real_ts64(&s.timestamp);
count++;
}
/* This shouldn't happen */
if (diff < 0) {
pr_err(BANNER "time running backwards\n");
@@ -236,7 +247,6 @@ static int get_sample(void)
/* If we exceed the threshold value, we have found a hardware latency */
if (sample > thresh || outer_sample > thresh) {
struct hwlat_sample s;
u64 latency;
ret = 1;
@@ -249,9 +259,9 @@ static int get_sample(void)
s.seqnum = hwlat_data.count;
s.duration = sample;
s.outer_duration = outer_sample;
ktime_get_real_ts64(&s.timestamp);
s.nmi_total_ts = nmi_total_ts;
s.nmi_count = nmi_count;
s.count = count;
trace_hwlat_sample(&s);
latency = max(sample, outer_sample);

View File

@@ -1078,6 +1078,8 @@ static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev)
int i;
seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p');
if (trace_kprobe_is_return(tk) && tk->rp.maxactive)
seq_printf(m, "%d", tk->rp.maxactive);
seq_printf(m, ":%s/%s", trace_probe_group_name(&tk->tp),
trace_probe_name(&tk->tp));

View File

@@ -617,22 +617,19 @@ int trace_print_context(struct trace_iterator *iter)
int trace_print_lat_context(struct trace_iterator *iter)
{
struct trace_entry *entry, *next_entry;
struct trace_array *tr = iter->tr;
/* trace_find_next_entry will reset ent_size */
int ent_size = iter->ent_size;
struct trace_seq *s = &iter->seq;
u64 next_ts;
struct trace_entry *entry = iter->ent,
*next_entry = trace_find_next_entry(iter, NULL,
&next_ts);
unsigned long verbose = (tr->trace_flags & TRACE_ITER_VERBOSE);
u64 next_ts;
/* Restore the original ent_size */
iter->ent_size = ent_size;
next_entry = trace_find_next_entry(iter, NULL, &next_ts);
if (!next_entry)
next_ts = iter->ts;
/* trace_find_next_entry() may change iter->ent */
entry = iter->ent;
if (verbose) {
char comm[TASK_COMM_LEN];
@@ -1158,12 +1155,12 @@ trace_hwlat_print(struct trace_iterator *iter, int flags,
trace_assign_type(field, entry);
trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%lld.%09ld",
trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%lld.%09ld count:%d",
field->seqnum,
field->duration,
field->outer_duration,
(long long)field->timestamp.tv_sec,
field->timestamp.tv_nsec);
field->timestamp.tv_nsec, field->count);
if (field->nmi_count) {
/*