Merge tag 'trace-v4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace

Pull tracing updates from Steven Rostedt:
 "The biggest change here is the updates to kprobes

  Back in January I posted patches to create function based events.
  These were the events that you suggested I make to allow developers to
  easily create events in code where no trace event exists. After
  posting those changes for review, it was suggested that we implement
  this instead with kprobes.

  The problem with kprobes is that the interface is too complex and
  needs to be simplified. Masami Hiramatsu posted patches in March and
  I've been playing with them a bit. There's been a bit of clean up in
  the kprobe code that was inspired by the function based event patches,
  and a couple of enhancements to the kprobe event interface.

   - If the arch supports it (we added support for x86), you can place a
     kprobe event at the start of a function and use $arg1, $arg2, etc
     to reference the arguments of a function. (Before you needed to
     know what register or where on the stack the argument was).

   - The second is a way to see array of events. For example, if you
     reference a mac address, you can add:

	echo 'p:mac ip_rcv perm_addr=+574($arg2):x8[6]' > kprobe_events

     And this will produce:

	mac: (ip_rcv+0x0/0x140) perm_addr={0x52,0x54,0x0,0xc0,0x76,0xec}

  Other changes include

   - Exporting trace_dump_stack to modules

   - Have the stack tracer trace the entire stack (stop trying to remove
     tracing itself, as we keep removing too much).

   - Added support for SDT in uprobes"

[ SDT - "Statically Defined Tracing" are userspace markers for tracing.
  Let's not use random TLA's in explanations unless they are fairly
  well-established as generic (at least for kernel people) - Linus ]

* tag 'trace-v4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: (24 commits)
  tracing: Have stack tracer trace full stack
  tracing: Export trace_dump_stack to modules
  tracing: probeevent: Fix uninitialized used of offset in parse args
  tracing/kprobes: Allow kprobe-events to record module symbol
  tracing/kprobes: Check the probe on unloaded module correctly
  tracing/uprobes: Fix to return -EFAULT if copy_from_user failed
  tracing: probeevent: Add $argN for accessing function args
  x86: ptrace: Add function argument access API
  tracing: probeevent: Add array type support
  tracing: probeevent: Add symbol type
  tracing: probeevent: Unify fetch_insn processing common part
  tracing: probeevent: Append traceprobe_ for exported function
  tracing: probeevent: Return consumed bytes of dynamic area
  tracing: probeevent: Unify fetch type tables
  tracing: probeevent: Introduce new argument fetching code
  tracing: probeevent: Remove NOKPROBE_SYMBOL from print functions
  tracing: probeevent: Cleanup argument field definition
  tracing: probeevent: Cleanup print argument functions
  trace_uprobe: support reference counter in fd-based uprobe
  perf probe: Support SDT markers having reference counter (semaphore)
  ...
This commit is contained in:
Linus Torvalds
2018-10-30 09:49:56 -07:00
22 changed files with 1438 additions and 957 deletions

View File

@@ -8376,30 +8376,39 @@ static struct pmu perf_tracepoint = {
*
* PERF_PROBE_CONFIG_IS_RETPROBE if set, create kretprobe/uretprobe
* if not set, create kprobe/uprobe
*
* The following values specify a reference counter (or semaphore in the
* terminology of tools like dtrace, systemtap, etc.) Userspace Statically
* Defined Tracepoints (USDT). Currently, we use 40 bit for the offset.
*
* PERF_UPROBE_REF_CTR_OFFSET_BITS # of bits in config as th offset
* PERF_UPROBE_REF_CTR_OFFSET_SHIFT # of bits to shift left
*/
enum perf_probe_config {
PERF_PROBE_CONFIG_IS_RETPROBE = 1U << 0, /* [k,u]retprobe */
PERF_UPROBE_REF_CTR_OFFSET_BITS = 32,
PERF_UPROBE_REF_CTR_OFFSET_SHIFT = 64 - PERF_UPROBE_REF_CTR_OFFSET_BITS,
};
PMU_FORMAT_ATTR(retprobe, "config:0");
#endif
static struct attribute *probe_attrs[] = {
#ifdef CONFIG_KPROBE_EVENTS
static struct attribute *kprobe_attrs[] = {
&format_attr_retprobe.attr,
NULL,
};
static struct attribute_group probe_format_group = {
static struct attribute_group kprobe_format_group = {
.name = "format",
.attrs = probe_attrs,
.attrs = kprobe_attrs,
};
static const struct attribute_group *probe_attr_groups[] = {
&probe_format_group,
static const struct attribute_group *kprobe_attr_groups[] = {
&kprobe_format_group,
NULL,
};
#endif
#ifdef CONFIG_KPROBE_EVENTS
static int perf_kprobe_event_init(struct perf_event *event);
static struct pmu perf_kprobe = {
.task_ctx_nr = perf_sw_context,
@@ -8409,7 +8418,7 @@ static struct pmu perf_kprobe = {
.start = perf_swevent_start,
.stop = perf_swevent_stop,
.read = perf_swevent_read,
.attr_groups = probe_attr_groups,
.attr_groups = kprobe_attr_groups,
};
static int perf_kprobe_event_init(struct perf_event *event)
@@ -8441,6 +8450,24 @@ static int perf_kprobe_event_init(struct perf_event *event)
#endif /* CONFIG_KPROBE_EVENTS */
#ifdef CONFIG_UPROBE_EVENTS
PMU_FORMAT_ATTR(ref_ctr_offset, "config:32-63");
static struct attribute *uprobe_attrs[] = {
&format_attr_retprobe.attr,
&format_attr_ref_ctr_offset.attr,
NULL,
};
static struct attribute_group uprobe_format_group = {
.name = "format",
.attrs = uprobe_attrs,
};
static const struct attribute_group *uprobe_attr_groups[] = {
&uprobe_format_group,
NULL,
};
static int perf_uprobe_event_init(struct perf_event *event);
static struct pmu perf_uprobe = {
.task_ctx_nr = perf_sw_context,
@@ -8450,12 +8477,13 @@ static struct pmu perf_uprobe = {
.start = perf_swevent_start,
.stop = perf_swevent_stop,
.read = perf_swevent_read,
.attr_groups = probe_attr_groups,
.attr_groups = uprobe_attr_groups,
};
static int perf_uprobe_event_init(struct perf_event *event)
{
int err;
unsigned long ref_ctr_offset;
bool is_retprobe;
if (event->attr.type != perf_uprobe.type)
@@ -8471,7 +8499,8 @@ static int perf_uprobe_event_init(struct perf_event *event)
return -EOPNOTSUPP;
is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
err = perf_uprobe_init(event, is_retprobe);
ref_ctr_offset = event->attr.config >> PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
err = perf_uprobe_init(event, ref_ctr_offset, is_retprobe);
if (err)
return err;

View File

@@ -73,6 +73,7 @@ struct uprobe {
struct uprobe_consumer *consumers;
struct inode *inode; /* Also hold a ref to inode */
loff_t offset;
loff_t ref_ctr_offset;
unsigned long flags;
/*
@@ -88,6 +89,15 @@ struct uprobe {
struct arch_uprobe arch;
};
struct delayed_uprobe {
struct list_head list;
struct uprobe *uprobe;
struct mm_struct *mm;
};
static DEFINE_MUTEX(delayed_uprobe_lock);
static LIST_HEAD(delayed_uprobe_list);
/*
* Execute out of line area: anonymous executable mapping installed
* by the probed task to execute the copy of the original instruction
@@ -282,6 +292,166 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
return 1;
}
static struct delayed_uprobe *
delayed_uprobe_check(struct uprobe *uprobe, struct mm_struct *mm)
{
struct delayed_uprobe *du;
list_for_each_entry(du, &delayed_uprobe_list, list)
if (du->uprobe == uprobe && du->mm == mm)
return du;
return NULL;
}
static int delayed_uprobe_add(struct uprobe *uprobe, struct mm_struct *mm)
{
struct delayed_uprobe *du;
if (delayed_uprobe_check(uprobe, mm))
return 0;
du = kzalloc(sizeof(*du), GFP_KERNEL);
if (!du)
return -ENOMEM;
du->uprobe = uprobe;
du->mm = mm;
list_add(&du->list, &delayed_uprobe_list);
return 0;
}
static void delayed_uprobe_delete(struct delayed_uprobe *du)
{
if (WARN_ON(!du))
return;
list_del(&du->list);
kfree(du);
}
static void delayed_uprobe_remove(struct uprobe *uprobe, struct mm_struct *mm)
{
struct list_head *pos, *q;
struct delayed_uprobe *du;
if (!uprobe && !mm)
return;
list_for_each_safe(pos, q, &delayed_uprobe_list) {
du = list_entry(pos, struct delayed_uprobe, list);
if (uprobe && du->uprobe != uprobe)
continue;
if (mm && du->mm != mm)
continue;
delayed_uprobe_delete(du);
}
}
static bool valid_ref_ctr_vma(struct uprobe *uprobe,
struct vm_area_struct *vma)
{
unsigned long vaddr = offset_to_vaddr(vma, uprobe->ref_ctr_offset);
return uprobe->ref_ctr_offset &&
vma->vm_file &&
file_inode(vma->vm_file) == uprobe->inode &&
(vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE &&
vma->vm_start <= vaddr &&
vma->vm_end > vaddr;
}
static struct vm_area_struct *
find_ref_ctr_vma(struct uprobe *uprobe, struct mm_struct *mm)
{
struct vm_area_struct *tmp;
for (tmp = mm->mmap; tmp; tmp = tmp->vm_next)
if (valid_ref_ctr_vma(uprobe, tmp))
return tmp;
return NULL;
}
static int
__update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d)
{
void *kaddr;
struct page *page;
struct vm_area_struct *vma;
int ret;
short *ptr;
if (!vaddr || !d)
return -EINVAL;
ret = get_user_pages_remote(NULL, mm, vaddr, 1,
FOLL_WRITE, &page, &vma, NULL);
if (unlikely(ret <= 0)) {
/*
* We are asking for 1 page. If get_user_pages_remote() fails,
* it may return 0, in that case we have to return error.
*/
return ret == 0 ? -EBUSY : ret;
}
kaddr = kmap_atomic(page);
ptr = kaddr + (vaddr & ~PAGE_MASK);
if (unlikely(*ptr + d < 0)) {
pr_warn("ref_ctr going negative. vaddr: 0x%lx, "
"curr val: %d, delta: %d\n", vaddr, *ptr, d);
ret = -EINVAL;
goto out;
}
*ptr += d;
ret = 0;
out:
kunmap_atomic(kaddr);
put_page(page);
return ret;
}
static void update_ref_ctr_warn(struct uprobe *uprobe,
struct mm_struct *mm, short d)
{
pr_warn("ref_ctr %s failed for inode: 0x%lx offset: "
"0x%llx ref_ctr_offset: 0x%llx of mm: 0x%pK\n",
d > 0 ? "increment" : "decrement", uprobe->inode->i_ino,
(unsigned long long) uprobe->offset,
(unsigned long long) uprobe->ref_ctr_offset, mm);
}
static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm,
short d)
{
struct vm_area_struct *rc_vma;
unsigned long rc_vaddr;
int ret = 0;
rc_vma = find_ref_ctr_vma(uprobe, mm);
if (rc_vma) {
rc_vaddr = offset_to_vaddr(rc_vma, uprobe->ref_ctr_offset);
ret = __update_ref_ctr(mm, rc_vaddr, d);
if (ret)
update_ref_ctr_warn(uprobe, mm, d);
if (d > 0)
return ret;
}
mutex_lock(&delayed_uprobe_lock);
if (d > 0)
ret = delayed_uprobe_add(uprobe, mm);
else
delayed_uprobe_remove(uprobe, mm);
mutex_unlock(&delayed_uprobe_lock);
return ret;
}
/*
* NOTE:
* Expect the breakpoint instruction to be the smallest size instruction for
@@ -302,9 +472,13 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
unsigned long vaddr, uprobe_opcode_t opcode)
{
struct uprobe *uprobe;
struct page *old_page, *new_page;
struct vm_area_struct *vma;
int ret;
int ret, is_register, ref_ctr_updated = 0;
is_register = is_swbp_insn(&opcode);
uprobe = container_of(auprobe, struct uprobe, arch);
retry:
/* Read the page with vaddr into memory */
@@ -317,6 +491,15 @@ retry:
if (ret <= 0)
goto put_old;
/* We are going to replace instruction, update ref_ctr. */
if (!ref_ctr_updated && uprobe->ref_ctr_offset) {
ret = update_ref_ctr(uprobe, mm, is_register ? 1 : -1);
if (ret)
goto put_old;
ref_ctr_updated = 1;
}
ret = anon_vma_prepare(vma);
if (ret)
goto put_old;
@@ -337,6 +520,11 @@ put_old:
if (unlikely(ret == -EAGAIN))
goto retry;
/* Revert back reference counter if instruction update failed. */
if (ret && is_register && ref_ctr_updated)
update_ref_ctr(uprobe, mm, -1);
return ret;
}
@@ -378,8 +566,15 @@ static struct uprobe *get_uprobe(struct uprobe *uprobe)
static void put_uprobe(struct uprobe *uprobe)
{
if (atomic_dec_and_test(&uprobe->ref))
if (atomic_dec_and_test(&uprobe->ref)) {
/*
* If application munmap(exec_vma) before uprobe_unregister()
* gets called, we don't get a chance to remove uprobe from
* delayed_uprobe_list from remove_breakpoint(). Do it here.
*/
delayed_uprobe_remove(uprobe, NULL);
kfree(uprobe);
}
}
static int match_uprobe(struct uprobe *l, struct uprobe *r)
@@ -484,7 +679,18 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
return u;
}
static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
static void
ref_ctr_mismatch_warn(struct uprobe *cur_uprobe, struct uprobe *uprobe)
{
pr_warn("ref_ctr_offset mismatch. inode: 0x%lx offset: 0x%llx "
"ref_ctr_offset(old): 0x%llx ref_ctr_offset(new): 0x%llx\n",
uprobe->inode->i_ino, (unsigned long long) uprobe->offset,
(unsigned long long) cur_uprobe->ref_ctr_offset,
(unsigned long long) uprobe->ref_ctr_offset);
}
static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
loff_t ref_ctr_offset)
{
struct uprobe *uprobe, *cur_uprobe;
@@ -494,6 +700,7 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
uprobe->inode = inode;
uprobe->offset = offset;
uprobe->ref_ctr_offset = ref_ctr_offset;
init_rwsem(&uprobe->register_rwsem);
init_rwsem(&uprobe->consumer_rwsem);
@@ -501,6 +708,12 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
cur_uprobe = insert_uprobe(uprobe);
/* a uprobe exists for this inode:offset combination */
if (cur_uprobe) {
if (cur_uprobe->ref_ctr_offset != uprobe->ref_ctr_offset) {
ref_ctr_mismatch_warn(cur_uprobe, uprobe);
put_uprobe(cur_uprobe);
kfree(uprobe);
return ERR_PTR(-EINVAL);
}
kfree(uprobe);
uprobe = cur_uprobe;
}
@@ -895,7 +1108,7 @@ EXPORT_SYMBOL_GPL(uprobe_unregister);
* else return 0 (success)
*/
static int __uprobe_register(struct inode *inode, loff_t offset,
struct uprobe_consumer *uc)
loff_t ref_ctr_offset, struct uprobe_consumer *uc)
{
struct uprobe *uprobe;
int ret;
@@ -912,9 +1125,12 @@ static int __uprobe_register(struct inode *inode, loff_t offset,
return -EINVAL;
retry:
uprobe = alloc_uprobe(inode, offset);
uprobe = alloc_uprobe(inode, offset, ref_ctr_offset);
if (!uprobe)
return -ENOMEM;
if (IS_ERR(uprobe))
return PTR_ERR(uprobe);
/*
* We can race with uprobe_unregister()->delete_uprobe().
* Check uprobe_is_active() and retry if it is false.
@@ -938,10 +1154,17 @@ static int __uprobe_register(struct inode *inode, loff_t offset,
int uprobe_register(struct inode *inode, loff_t offset,
struct uprobe_consumer *uc)
{
return __uprobe_register(inode, offset, uc);
return __uprobe_register(inode, offset, 0, uc);
}
EXPORT_SYMBOL_GPL(uprobe_register);
int uprobe_register_refctr(struct inode *inode, loff_t offset,
loff_t ref_ctr_offset, struct uprobe_consumer *uc)
{
return __uprobe_register(inode, offset, ref_ctr_offset, uc);
}
EXPORT_SYMBOL_GPL(uprobe_register_refctr);
/*
* uprobe_apply - unregister an already registered probe.
* @inode: the file in which the probe has to be removed.
@@ -1060,6 +1283,35 @@ static void build_probe_list(struct inode *inode,
spin_unlock(&uprobes_treelock);
}
/* @vma contains reference counter, not the probed instruction. */
static int delayed_ref_ctr_inc(struct vm_area_struct *vma)
{
struct list_head *pos, *q;
struct delayed_uprobe *du;
unsigned long vaddr;
int ret = 0, err = 0;
mutex_lock(&delayed_uprobe_lock);
list_for_each_safe(pos, q, &delayed_uprobe_list) {
du = list_entry(pos, struct delayed_uprobe, list);
if (du->mm != vma->vm_mm ||
!valid_ref_ctr_vma(du->uprobe, vma))
continue;
vaddr = offset_to_vaddr(vma, du->uprobe->ref_ctr_offset);
ret = __update_ref_ctr(vma->vm_mm, vaddr, 1);
if (ret) {
update_ref_ctr_warn(du->uprobe, vma->vm_mm, 1);
if (!err)
err = ret;
}
delayed_uprobe_delete(du);
}
mutex_unlock(&delayed_uprobe_lock);
return err;
}
/*
* Called from mmap_region/vma_adjust with mm->mmap_sem acquired.
*
@@ -1072,7 +1324,15 @@ int uprobe_mmap(struct vm_area_struct *vma)
struct uprobe *uprobe, *u;
struct inode *inode;
if (no_uprobe_events() || !valid_vma(vma, true))
if (no_uprobe_events())
return 0;
if (vma->vm_file &&
(vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE &&
test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags))
delayed_ref_ctr_inc(vma);
if (!valid_vma(vma, true))
return 0;
inode = file_inode(vma->vm_file);
@@ -1246,6 +1506,10 @@ void uprobe_clear_state(struct mm_struct *mm)
{
struct xol_area *area = mm->uprobes_state.xol_area;
mutex_lock(&delayed_uprobe_lock);
delayed_uprobe_remove(NULL, mm);
mutex_unlock(&delayed_uprobe_lock);
if (!area)
return;