Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "Features: - Add "uretprobes" - an optimization to uprobes, like kretprobes are an optimization to kprobes. "perf probe -x file sym%return" now works like kretprobes. By Oleg Nesterov. - Introduce per core aggregation in 'perf stat', from Stephane Eranian. - Add memory profiling via PEBS, from Stephane Eranian. - Event group view for 'annotate' in --stdio, --tui and --gtk, from Namhyung Kim. - Add support for AMD NB and L2I "uncore" counters, by Jacob Shin. - Add Ivy Bridge-EP uncore support, by Zheng Yan - IBM zEnterprise EC12 oprofile support patchlet from Robert Richter. - Add perf test entries for checking breakpoint overflow signal handler issues, from Jiri Olsa. - Add perf test entry for for checking number of EXIT events, from Namhyung Kim. - Add perf test entries for checking --cpu in record and stat, from Jiri Olsa. - Introduce perf stat --repeat forever, from Frederik Deweerdt. - Add --no-demangle to report/top, from Namhyung Kim. - PowerPC fixes plus a couple of cleanups/optimizations in uprobes and trace_uprobes, by Oleg Nesterov. Various fixes and refactorings: - Fix dependency of the python binding wrt libtraceevent, from Naohiro Aota. - Simplify some perf_evlist methods and to allow 'stat' to share code with 'record' and 'trace', by Arnaldo Carvalho de Melo. - Remove dead code in related to libtraceevent integration, from Namhyung Kim. - Revert "perf sched: Handle PERF_RECORD_EXIT events" to get 'perf sched lat' back working, by Arnaldo Carvalho de Melo - We don't use Newt anymore, just plain libslang, by Arnaldo Carvalho de Melo. - Kill a bunch of die() calls, from Namhyung Kim. - Fix build on non-glibc systems due to libio.h absence, from Cody P Schafer. - Remove some perf_session and tracing dead code, from David Ahern. - Honor parallel jobs, fix from Borislav Petkov - Introduce tools/lib/lk library, initially just removing duplication among tools/perf and tools/vm. from Borislav Petkov ... and many more I missed to list, see the shortlog and git log for more details." * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (136 commits) perf/x86/intel/P4: Robistify P4 PMU types perf/x86/amd: Fix AMD NB and L2I "uncore" support perf/x86/amd: Remove old-style NB counter support from perf_event_amd.c perf/x86: Check all MSRs before passing hw check perf/x86/amd: Add support for AMD NB and L2I "uncore" counters perf/x86/intel: Add Ivy Bridge-EP uncore support perf/x86/intel: Fix SNB-EP CBO and PCU uncore PMU filter management perf/x86: Avoid kfree() in CPU_{STARTING,DYING} uprobes/perf: Avoid perf_trace_buf_prepare/submit if ->perf_events is empty uprobes/tracing: Don't pass addr=ip to perf_trace_buf_submit() uprobes/tracing: Change create_trace_uprobe() to support uretprobes uprobes/tracing: Make seq_printf() code uretprobe-friendly uprobes/tracing: Make register_uprobe_event() paths uretprobe-friendly uprobes/tracing: Make uprobe_{trace,perf}_print() uretprobe-friendly uprobes/tracing: Introduce is_ret_probe() and uretprobe_dispatcher() uprobes/tracing: Introduce uprobe_{trace,perf}_print() helpers uprobes/tracing: Generalize struct uprobe_trace_entry_head uprobes/tracing: Kill the pointless local_save_flags/preempt_count calls uprobes/tracing: Kill the pointless seq_print_ip_sym() call uprobes/tracing: Kill the pointless task_pt_regs() calls ...
This commit is contained in:
@@ -37,6 +37,7 @@
|
||||
#include <linux/ftrace_event.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/cgroup.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
@@ -233,6 +234,20 @@ static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
|
||||
|
||||
#ifdef CONFIG_CGROUP_PERF
|
||||
|
||||
/*
|
||||
* perf_cgroup_info keeps track of time_enabled for a cgroup.
|
||||
* This is a per-cpu dynamically allocated data structure.
|
||||
*/
|
||||
struct perf_cgroup_info {
|
||||
u64 time;
|
||||
u64 timestamp;
|
||||
};
|
||||
|
||||
struct perf_cgroup {
|
||||
struct cgroup_subsys_state css;
|
||||
struct perf_cgroup_info __percpu *info;
|
||||
};
|
||||
|
||||
/*
|
||||
* Must ensure cgroup is pinned (css_get) before calling
|
||||
* this function. In other words, we cannot call this function
|
||||
@@ -976,9 +991,15 @@ static void perf_event__header_size(struct perf_event *event)
|
||||
if (sample_type & PERF_SAMPLE_PERIOD)
|
||||
size += sizeof(data->period);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT)
|
||||
size += sizeof(data->weight);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_READ)
|
||||
size += event->read_size;
|
||||
|
||||
if (sample_type & PERF_SAMPLE_DATA_SRC)
|
||||
size += sizeof(data->data_src.val);
|
||||
|
||||
event->header_size = size;
|
||||
}
|
||||
|
||||
@@ -4193,6 +4214,12 @@ void perf_output_sample(struct perf_output_handle *handle,
|
||||
perf_output_sample_ustack(handle,
|
||||
data->stack_user_size,
|
||||
data->regs_user.regs);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT)
|
||||
perf_output_put(handle, data->weight);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_DATA_SRC)
|
||||
perf_output_put(handle, data->data_src.val);
|
||||
}
|
||||
|
||||
void perf_prepare_sample(struct perf_event_header *header,
|
||||
@@ -4782,6 +4809,9 @@ got_name:
|
||||
mmap_event->file_name = name;
|
||||
mmap_event->file_size = size;
|
||||
|
||||
if (!(vma->vm_flags & VM_EXEC))
|
||||
mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA;
|
||||
|
||||
mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
|
||||
|
||||
rcu_read_lock();
|
||||
|
@@ -75,6 +75,15 @@ struct uprobe {
|
||||
struct arch_uprobe arch;
|
||||
};
|
||||
|
||||
struct return_instance {
|
||||
struct uprobe *uprobe;
|
||||
unsigned long func;
|
||||
unsigned long orig_ret_vaddr; /* original return address */
|
||||
bool chained; /* true, if instance is nested */
|
||||
|
||||
struct return_instance *next; /* keep as stack */
|
||||
};
|
||||
|
||||
/*
|
||||
* valid_vma: Verify if the specified vma is an executable vma
|
||||
* Relax restrictions while unregistering: vm_flags might have
|
||||
@@ -173,10 +182,31 @@ bool __weak is_swbp_insn(uprobe_opcode_t *insn)
|
||||
return *insn == UPROBE_SWBP_INSN;
|
||||
}
|
||||
|
||||
static void copy_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *opcode)
|
||||
/**
|
||||
* is_trap_insn - check if instruction is breakpoint instruction.
|
||||
* @insn: instruction to be checked.
|
||||
* Default implementation of is_trap_insn
|
||||
* Returns true if @insn is a breakpoint instruction.
|
||||
*
|
||||
* This function is needed for the case where an architecture has multiple
|
||||
* trap instructions (like powerpc).
|
||||
*/
|
||||
bool __weak is_trap_insn(uprobe_opcode_t *insn)
|
||||
{
|
||||
return is_swbp_insn(insn);
|
||||
}
|
||||
|
||||
static void copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len)
|
||||
{
|
||||
void *kaddr = kmap_atomic(page);
|
||||
memcpy(opcode, kaddr + (vaddr & ~PAGE_MASK), UPROBE_SWBP_INSN_SIZE);
|
||||
memcpy(dst, kaddr + (vaddr & ~PAGE_MASK), len);
|
||||
kunmap_atomic(kaddr);
|
||||
}
|
||||
|
||||
static void copy_to_page(struct page *page, unsigned long vaddr, const void *src, int len)
|
||||
{
|
||||
void *kaddr = kmap_atomic(page);
|
||||
memcpy(kaddr + (vaddr & ~PAGE_MASK), src, len);
|
||||
kunmap_atomic(kaddr);
|
||||
}
|
||||
|
||||
@@ -185,7 +215,16 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
|
||||
uprobe_opcode_t old_opcode;
|
||||
bool is_swbp;
|
||||
|
||||
copy_opcode(page, vaddr, &old_opcode);
|
||||
/*
|
||||
* Note: We only check if the old_opcode is UPROBE_SWBP_INSN here.
|
||||
* We do not check if it is any other 'trap variant' which could
|
||||
* be conditional trap instruction such as the one powerpc supports.
|
||||
*
|
||||
* The logic is that we do not care if the underlying instruction
|
||||
* is a trap variant; uprobes always wins over any other (gdb)
|
||||
* breakpoint.
|
||||
*/
|
||||
copy_from_page(page, vaddr, &old_opcode, UPROBE_SWBP_INSN_SIZE);
|
||||
is_swbp = is_swbp_insn(&old_opcode);
|
||||
|
||||
if (is_swbp_insn(new_opcode)) {
|
||||
@@ -204,7 +243,7 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
|
||||
* Expect the breakpoint instruction to be the smallest size instruction for
|
||||
* the architecture. If an arch has variable length instruction and the
|
||||
* breakpoint instruction is not of the smallest length instruction
|
||||
* supported by that architecture then we need to modify is_swbp_at_addr and
|
||||
* supported by that architecture then we need to modify is_trap_at_addr and
|
||||
* write_opcode accordingly. This would never be a problem for archs that
|
||||
* have fixed length instructions.
|
||||
*/
|
||||
@@ -225,7 +264,6 @@ static int write_opcode(struct mm_struct *mm, unsigned long vaddr,
|
||||
uprobe_opcode_t opcode)
|
||||
{
|
||||
struct page *old_page, *new_page;
|
||||
void *vaddr_old, *vaddr_new;
|
||||
struct vm_area_struct *vma;
|
||||
int ret;
|
||||
|
||||
@@ -246,15 +284,8 @@ retry:
|
||||
|
||||
__SetPageUptodate(new_page);
|
||||
|
||||
/* copy the page now that we've got it stable */
|
||||
vaddr_old = kmap_atomic(old_page);
|
||||
vaddr_new = kmap_atomic(new_page);
|
||||
|
||||
memcpy(vaddr_new, vaddr_old, PAGE_SIZE);
|
||||
memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE);
|
||||
|
||||
kunmap_atomic(vaddr_new);
|
||||
kunmap_atomic(vaddr_old);
|
||||
copy_highpage(new_page, old_page);
|
||||
copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
|
||||
|
||||
ret = anon_vma_prepare(vma);
|
||||
if (ret)
|
||||
@@ -477,30 +508,18 @@ __copy_insn(struct address_space *mapping, struct file *filp, char *insn,
|
||||
unsigned long nbytes, loff_t offset)
|
||||
{
|
||||
struct page *page;
|
||||
void *vaddr;
|
||||
unsigned long off;
|
||||
pgoff_t idx;
|
||||
|
||||
if (!filp)
|
||||
return -EINVAL;
|
||||
|
||||
if (!mapping->a_ops->readpage)
|
||||
return -EIO;
|
||||
|
||||
idx = offset >> PAGE_CACHE_SHIFT;
|
||||
off = offset & ~PAGE_MASK;
|
||||
|
||||
/*
|
||||
* Ensure that the page that has the original instruction is
|
||||
* populated and in page-cache.
|
||||
*/
|
||||
page = read_mapping_page(mapping, idx, filp);
|
||||
page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp);
|
||||
if (IS_ERR(page))
|
||||
return PTR_ERR(page);
|
||||
|
||||
vaddr = kmap_atomic(page);
|
||||
memcpy(insn, vaddr + off, nbytes);
|
||||
kunmap_atomic(vaddr);
|
||||
copy_from_page(page, offset, insn, nbytes);
|
||||
page_cache_release(page);
|
||||
|
||||
return 0;
|
||||
@@ -550,7 +569,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
|
||||
goto out;
|
||||
|
||||
ret = -ENOTSUPP;
|
||||
if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn))
|
||||
if (is_trap_insn((uprobe_opcode_t *)uprobe->arch.insn))
|
||||
goto out;
|
||||
|
||||
ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr);
|
||||
@@ -758,7 +777,7 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
|
||||
down_write(&mm->mmap_sem);
|
||||
vma = find_vma(mm, info->vaddr);
|
||||
if (!vma || !valid_vma(vma, is_register) ||
|
||||
vma->vm_file->f_mapping->host != uprobe->inode)
|
||||
file_inode(vma->vm_file) != uprobe->inode)
|
||||
goto unlock;
|
||||
|
||||
if (vma->vm_start > info->vaddr ||
|
||||
@@ -828,6 +847,10 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
|
||||
struct uprobe *uprobe;
|
||||
int ret;
|
||||
|
||||
/* Uprobe must have at least one set consumer */
|
||||
if (!uc->handler && !uc->ret_handler)
|
||||
return -EINVAL;
|
||||
|
||||
/* Racy, just to catch the obvious mistakes */
|
||||
if (offset > i_size_read(inode))
|
||||
return -EINVAL;
|
||||
@@ -917,7 +940,7 @@ static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm)
|
||||
loff_t offset;
|
||||
|
||||
if (!valid_vma(vma, false) ||
|
||||
vma->vm_file->f_mapping->host != uprobe->inode)
|
||||
file_inode(vma->vm_file) != uprobe->inode)
|
||||
continue;
|
||||
|
||||
offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
|
||||
@@ -1010,7 +1033,7 @@ int uprobe_mmap(struct vm_area_struct *vma)
|
||||
if (no_uprobe_events() || !valid_vma(vma, true))
|
||||
return 0;
|
||||
|
||||
inode = vma->vm_file->f_mapping->host;
|
||||
inode = file_inode(vma->vm_file);
|
||||
if (!inode)
|
||||
return 0;
|
||||
|
||||
@@ -1041,7 +1064,7 @@ vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long e
|
||||
struct inode *inode;
|
||||
struct rb_node *n;
|
||||
|
||||
inode = vma->vm_file->f_mapping->host;
|
||||
inode = file_inode(vma->vm_file);
|
||||
|
||||
min = vaddr_to_offset(vma, start);
|
||||
max = min + (end - start) - 1;
|
||||
@@ -1114,6 +1137,7 @@ static struct xol_area *get_xol_area(void)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct xol_area *area;
|
||||
uprobe_opcode_t insn = UPROBE_SWBP_INSN;
|
||||
|
||||
area = mm->uprobes_state.xol_area;
|
||||
if (area)
|
||||
@@ -1131,7 +1155,12 @@ static struct xol_area *get_xol_area(void)
|
||||
if (!area->page)
|
||||
goto free_bitmap;
|
||||
|
||||
/* allocate first slot of task's xol_area for the return probes */
|
||||
set_bit(0, area->bitmap);
|
||||
copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE);
|
||||
atomic_set(&area->slot_count, 1);
|
||||
init_waitqueue_head(&area->wq);
|
||||
|
||||
if (!xol_add_vma(area))
|
||||
return area;
|
||||
|
||||
@@ -1216,9 +1245,7 @@ static unsigned long xol_take_insn_slot(struct xol_area *area)
|
||||
static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
|
||||
{
|
||||
struct xol_area *area;
|
||||
unsigned long offset;
|
||||
unsigned long xol_vaddr;
|
||||
void *vaddr;
|
||||
|
||||
area = get_xol_area();
|
||||
if (!area)
|
||||
@@ -1229,10 +1256,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
|
||||
return 0;
|
||||
|
||||
/* Initialize the slot */
|
||||
offset = xol_vaddr & ~PAGE_MASK;
|
||||
vaddr = kmap_atomic(area->page);
|
||||
memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES);
|
||||
kunmap_atomic(vaddr);
|
||||
copy_to_page(area->page, xol_vaddr, uprobe->arch.insn, MAX_UINSN_BYTES);
|
||||
/*
|
||||
* We probably need flush_icache_user_range() but it needs vma.
|
||||
* This should work on supported architectures too.
|
||||
@@ -1298,6 +1322,7 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs)
|
||||
void uprobe_free_utask(struct task_struct *t)
|
||||
{
|
||||
struct uprobe_task *utask = t->utask;
|
||||
struct return_instance *ri, *tmp;
|
||||
|
||||
if (!utask)
|
||||
return;
|
||||
@@ -1305,6 +1330,15 @@ void uprobe_free_utask(struct task_struct *t)
|
||||
if (utask->active_uprobe)
|
||||
put_uprobe(utask->active_uprobe);
|
||||
|
||||
ri = utask->return_instances;
|
||||
while (ri) {
|
||||
tmp = ri;
|
||||
ri = ri->next;
|
||||
|
||||
put_uprobe(tmp->uprobe);
|
||||
kfree(tmp);
|
||||
}
|
||||
|
||||
xol_free_insn_slot(t);
|
||||
kfree(utask);
|
||||
t->utask = NULL;
|
||||
@@ -1333,6 +1367,93 @@ static struct uprobe_task *get_utask(void)
|
||||
return current->utask;
|
||||
}
|
||||
|
||||
/*
|
||||
* Current area->vaddr notion assume the trampoline address is always
|
||||
* equal area->vaddr.
|
||||
*
|
||||
* Returns -1 in case the xol_area is not allocated.
|
||||
*/
|
||||
static unsigned long get_trampoline_vaddr(void)
|
||||
{
|
||||
struct xol_area *area;
|
||||
unsigned long trampoline_vaddr = -1;
|
||||
|
||||
area = current->mm->uprobes_state.xol_area;
|
||||
smp_read_barrier_depends();
|
||||
if (area)
|
||||
trampoline_vaddr = area->vaddr;
|
||||
|
||||
return trampoline_vaddr;
|
||||
}
|
||||
|
||||
static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
|
||||
{
|
||||
struct return_instance *ri;
|
||||
struct uprobe_task *utask;
|
||||
unsigned long orig_ret_vaddr, trampoline_vaddr;
|
||||
bool chained = false;
|
||||
|
||||
if (!get_xol_area())
|
||||
return;
|
||||
|
||||
utask = get_utask();
|
||||
if (!utask)
|
||||
return;
|
||||
|
||||
if (utask->depth >= MAX_URETPROBE_DEPTH) {
|
||||
printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to"
|
||||
" nestedness limit pid/tgid=%d/%d\n",
|
||||
current->pid, current->tgid);
|
||||
return;
|
||||
}
|
||||
|
||||
ri = kzalloc(sizeof(struct return_instance), GFP_KERNEL);
|
||||
if (!ri)
|
||||
goto fail;
|
||||
|
||||
trampoline_vaddr = get_trampoline_vaddr();
|
||||
orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
|
||||
if (orig_ret_vaddr == -1)
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* We don't want to keep trampoline address in stack, rather keep the
|
||||
* original return address of first caller thru all the consequent
|
||||
* instances. This also makes breakpoint unwrapping easier.
|
||||
*/
|
||||
if (orig_ret_vaddr == trampoline_vaddr) {
|
||||
if (!utask->return_instances) {
|
||||
/*
|
||||
* This situation is not possible. Likely we have an
|
||||
* attack from user-space.
|
||||
*/
|
||||
pr_warn("uprobe: unable to set uretprobe pid/tgid=%d/%d\n",
|
||||
current->pid, current->tgid);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
chained = true;
|
||||
orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
|
||||
}
|
||||
|
||||
atomic_inc(&uprobe->ref);
|
||||
ri->uprobe = uprobe;
|
||||
ri->func = instruction_pointer(regs);
|
||||
ri->orig_ret_vaddr = orig_ret_vaddr;
|
||||
ri->chained = chained;
|
||||
|
||||
utask->depth++;
|
||||
|
||||
/* add instance to the stack */
|
||||
ri->next = utask->return_instances;
|
||||
utask->return_instances = ri;
|
||||
|
||||
return;
|
||||
|
||||
fail:
|
||||
kfree(ri);
|
||||
}
|
||||
|
||||
/* Prepare to single-step probed instruction out of line. */
|
||||
static int
|
||||
pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
|
||||
@@ -1431,7 +1552,7 @@ static void mmf_recalc_uprobes(struct mm_struct *mm)
|
||||
clear_bit(MMF_HAS_UPROBES, &mm->flags);
|
||||
}
|
||||
|
||||
static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)
|
||||
static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
|
||||
{
|
||||
struct page *page;
|
||||
uprobe_opcode_t opcode;
|
||||
@@ -1449,10 +1570,11 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)
|
||||
if (result < 0)
|
||||
return result;
|
||||
|
||||
copy_opcode(page, vaddr, &opcode);
|
||||
copy_from_page(page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
|
||||
put_page(page);
|
||||
out:
|
||||
return is_swbp_insn(&opcode);
|
||||
/* This needs to return true for any variant of the trap insn */
|
||||
return is_trap_insn(&opcode);
|
||||
}
|
||||
|
||||
static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
|
||||
@@ -1465,14 +1587,14 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
|
||||
vma = find_vma(mm, bp_vaddr);
|
||||
if (vma && vma->vm_start <= bp_vaddr) {
|
||||
if (valid_vma(vma, false)) {
|
||||
struct inode *inode = vma->vm_file->f_mapping->host;
|
||||
struct inode *inode = file_inode(vma->vm_file);
|
||||
loff_t offset = vaddr_to_offset(vma, bp_vaddr);
|
||||
|
||||
uprobe = find_uprobe(inode, offset);
|
||||
}
|
||||
|
||||
if (!uprobe)
|
||||
*is_swbp = is_swbp_at_addr(mm, bp_vaddr);
|
||||
*is_swbp = is_trap_at_addr(mm, bp_vaddr);
|
||||
} else {
|
||||
*is_swbp = -EFAULT;
|
||||
}
|
||||
@@ -1488,16 +1610,27 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
|
||||
{
|
||||
struct uprobe_consumer *uc;
|
||||
int remove = UPROBE_HANDLER_REMOVE;
|
||||
bool need_prep = false; /* prepare return uprobe, when needed */
|
||||
|
||||
down_read(&uprobe->register_rwsem);
|
||||
for (uc = uprobe->consumers; uc; uc = uc->next) {
|
||||
int rc = uc->handler(uc, regs);
|
||||
int rc = 0;
|
||||
|
||||
if (uc->handler) {
|
||||
rc = uc->handler(uc, regs);
|
||||
WARN(rc & ~UPROBE_HANDLER_MASK,
|
||||
"bad rc=0x%x from %pf()\n", rc, uc->handler);
|
||||
}
|
||||
|
||||
if (uc->ret_handler)
|
||||
need_prep = true;
|
||||
|
||||
WARN(rc & ~UPROBE_HANDLER_MASK,
|
||||
"bad rc=0x%x from %pf()\n", rc, uc->handler);
|
||||
remove &= rc;
|
||||
}
|
||||
|
||||
if (need_prep && !remove)
|
||||
prepare_uretprobe(uprobe, regs); /* put bp at return */
|
||||
|
||||
if (remove && uprobe->consumers) {
|
||||
WARN_ON(!uprobe_is_active(uprobe));
|
||||
unapply_uprobe(uprobe, current->mm);
|
||||
@@ -1505,6 +1638,64 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
|
||||
up_read(&uprobe->register_rwsem);
|
||||
}
|
||||
|
||||
static void
|
||||
handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
|
||||
{
|
||||
struct uprobe *uprobe = ri->uprobe;
|
||||
struct uprobe_consumer *uc;
|
||||
|
||||
down_read(&uprobe->register_rwsem);
|
||||
for (uc = uprobe->consumers; uc; uc = uc->next) {
|
||||
if (uc->ret_handler)
|
||||
uc->ret_handler(uc, ri->func, regs);
|
||||
}
|
||||
up_read(&uprobe->register_rwsem);
|
||||
}
|
||||
|
||||
static bool handle_trampoline(struct pt_regs *regs)
|
||||
{
|
||||
struct uprobe_task *utask;
|
||||
struct return_instance *ri, *tmp;
|
||||
bool chained;
|
||||
|
||||
utask = current->utask;
|
||||
if (!utask)
|
||||
return false;
|
||||
|
||||
ri = utask->return_instances;
|
||||
if (!ri)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* TODO: we should throw out return_instance's invalidated by
|
||||
* longjmp(), currently we assume that the probed function always
|
||||
* returns.
|
||||
*/
|
||||
instruction_pointer_set(regs, ri->orig_ret_vaddr);
|
||||
|
||||
for (;;) {
|
||||
handle_uretprobe_chain(ri, regs);
|
||||
|
||||
chained = ri->chained;
|
||||
put_uprobe(ri->uprobe);
|
||||
|
||||
tmp = ri;
|
||||
ri = ri->next;
|
||||
kfree(tmp);
|
||||
|
||||
if (!chained)
|
||||
break;
|
||||
|
||||
utask->depth--;
|
||||
|
||||
BUG_ON(!ri);
|
||||
}
|
||||
|
||||
utask->return_instances = ri;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Run handler and ask thread to singlestep.
|
||||
* Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
|
||||
@@ -1516,8 +1707,15 @@ static void handle_swbp(struct pt_regs *regs)
|
||||
int uninitialized_var(is_swbp);
|
||||
|
||||
bp_vaddr = uprobe_get_swbp_addr(regs);
|
||||
uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
|
||||
if (bp_vaddr == get_trampoline_vaddr()) {
|
||||
if (handle_trampoline(regs))
|
||||
return;
|
||||
|
||||
pr_warn("uprobe: unable to handle uretprobe pid/tgid=%d/%d\n",
|
||||
current->pid, current->tgid);
|
||||
}
|
||||
|
||||
uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
|
||||
if (!uprobe) {
|
||||
if (is_swbp > 0) {
|
||||
/* No matching uprobe; signal SIGTRAP. */
|
||||
@@ -1616,7 +1814,11 @@ void uprobe_notify_resume(struct pt_regs *regs)
|
||||
*/
|
||||
int uprobe_pre_sstep_notifier(struct pt_regs *regs)
|
||||
{
|
||||
if (!current->mm || !test_bit(MMF_HAS_UPROBES, ¤t->mm->flags))
|
||||
if (!current->mm)
|
||||
return 0;
|
||||
|
||||
if (!test_bit(MMF_HAS_UPROBES, ¤t->mm->flags) &&
|
||||
(!current->utask || !current->utask->return_instances))
|
||||
return 0;
|
||||
|
||||
set_thread_flag(TIF_UPROBE);
|
||||
|
@@ -109,11 +109,6 @@ struct kretprobe_trace_entry_head {
|
||||
unsigned long ret_ip;
|
||||
};
|
||||
|
||||
struct uprobe_trace_entry_head {
|
||||
struct trace_entry ent;
|
||||
unsigned long ip;
|
||||
};
|
||||
|
||||
/*
|
||||
* trace_flag_type is an enumeration that holds different
|
||||
* states when a trace occurs. These are:
|
||||
|
@@ -28,6 +28,18 @@
|
||||
|
||||
#define UPROBE_EVENT_SYSTEM "uprobes"
|
||||
|
||||
struct uprobe_trace_entry_head {
|
||||
struct trace_entry ent;
|
||||
unsigned long vaddr[];
|
||||
};
|
||||
|
||||
#define SIZEOF_TRACE_ENTRY(is_return) \
|
||||
(sizeof(struct uprobe_trace_entry_head) + \
|
||||
sizeof(unsigned long) * (is_return ? 2 : 1))
|
||||
|
||||
#define DATAOF_TRACE_ENTRY(entry, is_return) \
|
||||
((void*)(entry) + SIZEOF_TRACE_ENTRY(is_return))
|
||||
|
||||
struct trace_uprobe_filter {
|
||||
rwlock_t rwlock;
|
||||
int nr_systemwide;
|
||||
@@ -64,6 +76,8 @@ static DEFINE_MUTEX(uprobe_lock);
|
||||
static LIST_HEAD(uprobe_list);
|
||||
|
||||
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
|
||||
static int uretprobe_dispatcher(struct uprobe_consumer *con,
|
||||
unsigned long func, struct pt_regs *regs);
|
||||
|
||||
static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter)
|
||||
{
|
||||
@@ -77,11 +91,16 @@ static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter)
|
||||
return !filter->nr_systemwide && list_empty(&filter->perf_events);
|
||||
}
|
||||
|
||||
static inline bool is_ret_probe(struct trace_uprobe *tu)
|
||||
{
|
||||
return tu->consumer.ret_handler != NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate new trace_uprobe and initialize it (including uprobes).
|
||||
*/
|
||||
static struct trace_uprobe *
|
||||
alloc_trace_uprobe(const char *group, const char *event, int nargs)
|
||||
alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
|
||||
{
|
||||
struct trace_uprobe *tu;
|
||||
|
||||
@@ -106,6 +125,8 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs)
|
||||
|
||||
INIT_LIST_HEAD(&tu->list);
|
||||
tu->consumer.handler = uprobe_dispatcher;
|
||||
if (is_ret)
|
||||
tu->consumer.ret_handler = uretprobe_dispatcher;
|
||||
init_trace_uprobe_filter(&tu->filter);
|
||||
return tu;
|
||||
|
||||
@@ -180,7 +201,7 @@ end:
|
||||
|
||||
/*
|
||||
* Argument syntax:
|
||||
* - Add uprobe: p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS]
|
||||
* - Add uprobe: p|r[:[GRP/]EVENT] PATH:SYMBOL [FETCHARGS]
|
||||
*
|
||||
* - Remove uprobe: -:[GRP/]EVENT
|
||||
*/
|
||||
@@ -192,20 +213,23 @@ static int create_trace_uprobe(int argc, char **argv)
|
||||
char buf[MAX_EVENT_NAME_LEN];
|
||||
struct path path;
|
||||
unsigned long offset;
|
||||
bool is_delete;
|
||||
bool is_delete, is_return;
|
||||
int i, ret;
|
||||
|
||||
inode = NULL;
|
||||
ret = 0;
|
||||
is_delete = false;
|
||||
is_return = false;
|
||||
event = NULL;
|
||||
group = NULL;
|
||||
|
||||
/* argc must be >= 1 */
|
||||
if (argv[0][0] == '-')
|
||||
is_delete = true;
|
||||
else if (argv[0][0] == 'r')
|
||||
is_return = true;
|
||||
else if (argv[0][0] != 'p') {
|
||||
pr_info("Probe definition must be started with 'p' or '-'.\n");
|
||||
pr_info("Probe definition must be started with 'p', 'r' or '-'.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -303,7 +327,7 @@ static int create_trace_uprobe(int argc, char **argv)
|
||||
kfree(tail);
|
||||
}
|
||||
|
||||
tu = alloc_trace_uprobe(group, event, argc);
|
||||
tu = alloc_trace_uprobe(group, event, argc, is_return);
|
||||
if (IS_ERR(tu)) {
|
||||
pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu));
|
||||
ret = PTR_ERR(tu);
|
||||
@@ -414,9 +438,10 @@ static void probes_seq_stop(struct seq_file *m, void *v)
|
||||
static int probes_seq_show(struct seq_file *m, void *v)
|
||||
{
|
||||
struct trace_uprobe *tu = v;
|
||||
char c = is_ret_probe(tu) ? 'r' : 'p';
|
||||
int i;
|
||||
|
||||
seq_printf(m, "p:%s/%s", tu->call.class->system, tu->call.name);
|
||||
seq_printf(m, "%c:%s/%s", c, tu->call.class->system, tu->call.name);
|
||||
seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset);
|
||||
|
||||
for (i = 0; i < tu->nr_args; i++)
|
||||
@@ -485,65 +510,81 @@ static const struct file_operations uprobe_profile_ops = {
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
/* uprobe handler */
|
||||
static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
|
||||
static void uprobe_trace_print(struct trace_uprobe *tu,
|
||||
unsigned long func, struct pt_regs *regs)
|
||||
{
|
||||
struct uprobe_trace_entry_head *entry;
|
||||
struct ring_buffer_event *event;
|
||||
struct ring_buffer *buffer;
|
||||
u8 *data;
|
||||
int size, i, pc;
|
||||
unsigned long irq_flags;
|
||||
void *data;
|
||||
int size, i;
|
||||
struct ftrace_event_call *call = &tu->call;
|
||||
|
||||
local_save_flags(irq_flags);
|
||||
pc = preempt_count();
|
||||
|
||||
size = sizeof(*entry) + tu->size;
|
||||
|
||||
size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
|
||||
event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
|
||||
size, irq_flags, pc);
|
||||
size + tu->size, 0, 0);
|
||||
if (!event)
|
||||
return 0;
|
||||
return;
|
||||
|
||||
entry = ring_buffer_event_data(event);
|
||||
entry->ip = instruction_pointer(task_pt_regs(current));
|
||||
data = (u8 *)&entry[1];
|
||||
if (is_ret_probe(tu)) {
|
||||
entry->vaddr[0] = func;
|
||||
entry->vaddr[1] = instruction_pointer(regs);
|
||||
data = DATAOF_TRACE_ENTRY(entry, true);
|
||||
} else {
|
||||
entry->vaddr[0] = instruction_pointer(regs);
|
||||
data = DATAOF_TRACE_ENTRY(entry, false);
|
||||
}
|
||||
|
||||
for (i = 0; i < tu->nr_args; i++)
|
||||
call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
|
||||
|
||||
if (!filter_current_check_discard(buffer, call, entry, event))
|
||||
trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
|
||||
trace_buffer_unlock_commit(buffer, event, 0, 0);
|
||||
}
|
||||
|
||||
/* uprobe handler */
|
||||
static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
|
||||
{
|
||||
if (!is_ret_probe(tu))
|
||||
uprobe_trace_print(tu, 0, regs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
uprobe_trace_print(tu, func, regs);
|
||||
}
|
||||
|
||||
/* Event entry printers */
|
||||
static enum print_line_t
|
||||
print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event)
|
||||
{
|
||||
struct uprobe_trace_entry_head *field;
|
||||
struct uprobe_trace_entry_head *entry;
|
||||
struct trace_seq *s = &iter->seq;
|
||||
struct trace_uprobe *tu;
|
||||
u8 *data;
|
||||
int i;
|
||||
|
||||
field = (struct uprobe_trace_entry_head *)iter->ent;
|
||||
entry = (struct uprobe_trace_entry_head *)iter->ent;
|
||||
tu = container_of(event, struct trace_uprobe, call.event);
|
||||
|
||||
if (!trace_seq_printf(s, "%s: (", tu->call.name))
|
||||
goto partial;
|
||||
if (is_ret_probe(tu)) {
|
||||
if (!trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", tu->call.name,
|
||||
entry->vaddr[1], entry->vaddr[0]))
|
||||
goto partial;
|
||||
data = DATAOF_TRACE_ENTRY(entry, true);
|
||||
} else {
|
||||
if (!trace_seq_printf(s, "%s: (0x%lx)", tu->call.name,
|
||||
entry->vaddr[0]))
|
||||
goto partial;
|
||||
data = DATAOF_TRACE_ENTRY(entry, false);
|
||||
}
|
||||
|
||||
if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
|
||||
goto partial;
|
||||
|
||||
if (!trace_seq_puts(s, ")"))
|
||||
goto partial;
|
||||
|
||||
data = (u8 *)&field[1];
|
||||
for (i = 0; i < tu->nr_args; i++) {
|
||||
if (!tu->args[i].type->print(s, tu->args[i].name,
|
||||
data + tu->args[i].offset, field))
|
||||
data + tu->args[i].offset, entry))
|
||||
goto partial;
|
||||
}
|
||||
|
||||
@@ -595,16 +636,23 @@ static void probe_event_disable(struct trace_uprobe *tu, int flag)
|
||||
|
||||
static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
|
||||
{
|
||||
int ret, i;
|
||||
int ret, i, size;
|
||||
struct uprobe_trace_entry_head field;
|
||||
struct trace_uprobe *tu = (struct trace_uprobe *)event_call->data;
|
||||
struct trace_uprobe *tu = event_call->data;
|
||||
|
||||
DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
|
||||
if (is_ret_probe(tu)) {
|
||||
DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_FUNC, 0);
|
||||
DEFINE_FIELD(unsigned long, vaddr[1], FIELD_STRING_RETIP, 0);
|
||||
size = SIZEOF_TRACE_ENTRY(true);
|
||||
} else {
|
||||
DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_IP, 0);
|
||||
size = SIZEOF_TRACE_ENTRY(false);
|
||||
}
|
||||
/* Set argument names as fields */
|
||||
for (i = 0; i < tu->nr_args; i++) {
|
||||
ret = trace_define_field(event_call, tu->args[i].type->fmttype,
|
||||
tu->args[i].name,
|
||||
sizeof(field) + tu->args[i].offset,
|
||||
size + tu->args[i].offset,
|
||||
tu->args[i].type->size,
|
||||
tu->args[i].type->is_signed,
|
||||
FILTER_OTHER);
|
||||
@@ -622,8 +670,13 @@ static int __set_print_fmt(struct trace_uprobe *tu, char *buf, int len)
|
||||
int i;
|
||||
int pos = 0;
|
||||
|
||||
fmt = "(%lx)";
|
||||
arg = "REC->" FIELD_STRING_IP;
|
||||
if (is_ret_probe(tu)) {
|
||||
fmt = "(%lx <- %lx)";
|
||||
arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
|
||||
} else {
|
||||
fmt = "(%lx)";
|
||||
arg = "REC->" FIELD_STRING_IP;
|
||||
}
|
||||
|
||||
/* When len=0, we just calculate the needed length */
|
||||
|
||||
@@ -752,49 +805,68 @@ static bool uprobe_perf_filter(struct uprobe_consumer *uc,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* uprobe profile handler */
|
||||
static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
|
||||
static void uprobe_perf_print(struct trace_uprobe *tu,
|
||||
unsigned long func, struct pt_regs *regs)
|
||||
{
|
||||
struct ftrace_event_call *call = &tu->call;
|
||||
struct uprobe_trace_entry_head *entry;
|
||||
struct hlist_head *head;
|
||||
u8 *data;
|
||||
int size, __size, i;
|
||||
int rctx;
|
||||
void *data;
|
||||
int size, rctx, i;
|
||||
|
||||
if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
|
||||
return UPROBE_HANDLER_REMOVE;
|
||||
|
||||
__size = sizeof(*entry) + tu->size;
|
||||
size = ALIGN(__size + sizeof(u32), sizeof(u64));
|
||||
size -= sizeof(u32);
|
||||
size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
|
||||
size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32);
|
||||
if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
|
||||
return 0;
|
||||
return;
|
||||
|
||||
preempt_disable();
|
||||
head = this_cpu_ptr(call->perf_events);
|
||||
if (hlist_empty(head))
|
||||
goto out;
|
||||
|
||||
entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
|
||||
if (!entry)
|
||||
goto out;
|
||||
|
||||
entry->ip = instruction_pointer(task_pt_regs(current));
|
||||
data = (u8 *)&entry[1];
|
||||
if (is_ret_probe(tu)) {
|
||||
entry->vaddr[0] = func;
|
||||
entry->vaddr[1] = instruction_pointer(regs);
|
||||
data = DATAOF_TRACE_ENTRY(entry, true);
|
||||
} else {
|
||||
entry->vaddr[0] = instruction_pointer(regs);
|
||||
data = DATAOF_TRACE_ENTRY(entry, false);
|
||||
}
|
||||
|
||||
for (i = 0; i < tu->nr_args; i++)
|
||||
call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
|
||||
|
||||
head = this_cpu_ptr(call->perf_events);
|
||||
perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head, NULL);
|
||||
|
||||
perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
|
||||
out:
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/* uprobe profile handler */
|
||||
static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
|
||||
{
|
||||
if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
|
||||
return UPROBE_HANDLER_REMOVE;
|
||||
|
||||
if (!is_ret_probe(tu))
|
||||
uprobe_perf_print(tu, 0, regs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
uprobe_perf_print(tu, func, regs);
|
||||
}
|
||||
#endif /* CONFIG_PERF_EVENTS */
|
||||
|
||||
static
|
||||
int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data)
|
||||
{
|
||||
struct trace_uprobe *tu = (struct trace_uprobe *)event->data;
|
||||
struct trace_uprobe *tu = event->data;
|
||||
|
||||
switch (type) {
|
||||
case TRACE_REG_REGISTER:
|
||||
@@ -843,6 +915,23 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int uretprobe_dispatcher(struct uprobe_consumer *con,
|
||||
unsigned long func, struct pt_regs *regs)
|
||||
{
|
||||
struct trace_uprobe *tu;
|
||||
|
||||
tu = container_of(con, struct trace_uprobe, consumer);
|
||||
|
||||
if (tu->flags & TP_FLAG_TRACE)
|
||||
uretprobe_trace_func(tu, func, regs);
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
if (tu->flags & TP_FLAG_PROFILE)
|
||||
uretprobe_perf_func(tu, func, regs);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct trace_event_functions uprobe_funcs = {
|
||||
.trace = print_uprobe_event
|
||||
};
|
||||
|
@@ -517,6 +517,11 @@ int proc_dowatchdog(struct ctl_table *table, int write,
|
||||
return ret;
|
||||
|
||||
set_sample_period();
|
||||
/*
|
||||
* Watchdog threads shouldn't be enabled if they are
|
||||
* disabled. The 'watchdog_disabled' variable check in
|
||||
* watchdog_*_all_cpus() function takes care of this.
|
||||
*/
|
||||
if (watchdog_enabled && watchdog_thresh)
|
||||
watchdog_enable_all_cpus();
|
||||
else
|
||||
|
Reference in New Issue
Block a user