|
|
|
@@ -1941,6 +1941,11 @@ static void perf_put_aux_event(struct perf_event *event)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool perf_need_aux_event(struct perf_event *event)
|
|
|
|
|
{
|
|
|
|
|
return !!event->attr.aux_output || !!event->attr.aux_sample_size;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int perf_get_aux_event(struct perf_event *event,
|
|
|
|
|
struct perf_event *group_leader)
|
|
|
|
|
{
|
|
|
|
@@ -1953,7 +1958,17 @@ static int perf_get_aux_event(struct perf_event *event,
|
|
|
|
|
if (!group_leader)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
if (!perf_aux_output_match(event, group_leader))
|
|
|
|
|
/*
|
|
|
|
|
* aux_output and aux_sample_size are mutually exclusive.
|
|
|
|
|
*/
|
|
|
|
|
if (event->attr.aux_output && event->attr.aux_sample_size)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
if (event->attr.aux_output &&
|
|
|
|
|
!perf_aux_output_match(event, group_leader))
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
if (event->attr.aux_sample_size && !group_leader->pmu->snapshot_aux)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
if (!atomic_long_inc_not_zero(&group_leader->refcount))
|
|
|
|
@@ -2666,6 +2681,25 @@ perf_install_in_context(struct perf_event_context *ctx,
|
|
|
|
|
*/
|
|
|
|
|
smp_store_release(&event->ctx, ctx);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* perf_event_attr::disabled events will not run and can be initialized
|
|
|
|
|
* without IPI. Except when this is the first event for the context, in
|
|
|
|
|
* that case we need the magic of the IPI to set ctx->is_active.
|
|
|
|
|
*
|
|
|
|
|
* The IOC_ENABLE that is sure to follow the creation of a disabled
|
|
|
|
|
* event will issue the IPI and reprogram the hardware.
|
|
|
|
|
*/
|
|
|
|
|
if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && ctx->nr_events) {
|
|
|
|
|
raw_spin_lock_irq(&ctx->lock);
|
|
|
|
|
if (ctx->task == TASK_TOMBSTONE) {
|
|
|
|
|
raw_spin_unlock_irq(&ctx->lock);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
add_event_to_ctx(event, ctx);
|
|
|
|
|
raw_spin_unlock_irq(&ctx->lock);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!task) {
|
|
|
|
|
cpu_function_call(cpu, __perf_install_in_context, event);
|
|
|
|
|
return;
|
|
|
|
@@ -3204,10 +3238,21 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
|
|
|
|
|
raw_spin_lock(&ctx->lock);
|
|
|
|
|
raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
|
|
|
|
|
if (context_equiv(ctx, next_ctx)) {
|
|
|
|
|
struct pmu *pmu = ctx->pmu;
|
|
|
|
|
|
|
|
|
|
WRITE_ONCE(ctx->task, next);
|
|
|
|
|
WRITE_ONCE(next_ctx->task, task);
|
|
|
|
|
|
|
|
|
|
swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
|
|
|
|
|
/*
|
|
|
|
|
* PMU specific parts of task perf context can require
|
|
|
|
|
* additional synchronization. As an example of such
|
|
|
|
|
* synchronization see implementation details of Intel
|
|
|
|
|
* LBR call stack data profiling;
|
|
|
|
|
*/
|
|
|
|
|
if (pmu->swap_task_ctx)
|
|
|
|
|
pmu->swap_task_ctx(ctx, next_ctx);
|
|
|
|
|
else
|
|
|
|
|
swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* RCU_INIT_POINTER here is safe because we've not
|
|
|
|
@@ -4229,8 +4274,9 @@ find_get_context(struct pmu *pmu, struct task_struct *task,
|
|
|
|
|
|
|
|
|
|
if (!task) {
|
|
|
|
|
/* Must be root to operate on a CPU event: */
|
|
|
|
|
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
|
|
|
|
|
return ERR_PTR(-EACCES);
|
|
|
|
|
err = perf_allow_cpu(&event->attr);
|
|
|
|
|
if (err)
|
|
|
|
|
return ERR_PTR(err);
|
|
|
|
|
|
|
|
|
|
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
|
|
|
|
|
ctx = &cpuctx->ctx;
|
|
|
|
@@ -4539,6 +4585,8 @@ static void _free_event(struct perf_event *event)
|
|
|
|
|
|
|
|
|
|
unaccount_event(event);
|
|
|
|
|
|
|
|
|
|
security_perf_event_free(event);
|
|
|
|
|
|
|
|
|
|
if (event->rb) {
|
|
|
|
|
/*
|
|
|
|
|
* Can happen when we close an event with re-directed output.
|
|
|
|
@@ -4992,6 +5040,10 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
|
|
|
|
|
struct perf_event_context *ctx;
|
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
|
|
ret = security_perf_event_read(event);
|
|
|
|
|
if (ret)
|
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
|
|
ctx = perf_event_ctx_lock(event);
|
|
|
|
|
ret = __perf_read(event, buf, count);
|
|
|
|
|
perf_event_ctx_unlock(event, ctx);
|
|
|
|
@@ -5288,6 +5340,11 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
|
|
|
|
struct perf_event_context *ctx;
|
|
|
|
|
long ret;
|
|
|
|
|
|
|
|
|
|
/* Treat ioctl like writes as it is likely a mutating operation. */
|
|
|
|
|
ret = security_perf_event_write(event);
|
|
|
|
|
if (ret)
|
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
|
|
ctx = perf_event_ctx_lock(event);
|
|
|
|
|
ret = _perf_ioctl(event, cmd, arg);
|
|
|
|
|
perf_event_ctx_unlock(event, ctx);
|
|
|
|
@@ -5639,10 +5696,8 @@ static void perf_mmap_close(struct vm_area_struct *vma)
|
|
|
|
|
perf_pmu_output_stop(event);
|
|
|
|
|
|
|
|
|
|
/* now it's safe to free the pages */
|
|
|
|
|
if (!rb->aux_mmap_locked)
|
|
|
|
|
atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
|
|
|
|
|
else
|
|
|
|
|
atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm);
|
|
|
|
|
atomic_long_sub(rb->aux_nr_pages - rb->aux_mmap_locked, &mmap_user->locked_vm);
|
|
|
|
|
atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm);
|
|
|
|
|
|
|
|
|
|
/* this has to be the last one */
|
|
|
|
|
rb_free_aux(rb);
|
|
|
|
@@ -5753,6 +5808,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
|
|
|
|
|
if (!(vma->vm_flags & VM_SHARED))
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
|
|
ret = security_perf_event_read(event);
|
|
|
|
|
if (ret)
|
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
|
|
vma_size = vma->vm_end - vma->vm_start;
|
|
|
|
|
|
|
|
|
|
if (vma->vm_pgoff == 0) {
|
|
|
|
@@ -5859,13 +5918,7 @@ accounting:
|
|
|
|
|
|
|
|
|
|
user_locked = atomic_long_read(&user->locked_vm) + user_extra;
|
|
|
|
|
|
|
|
|
|
if (user_locked <= user_lock_limit) {
|
|
|
|
|
/* charge all to locked_vm */
|
|
|
|
|
} else if (atomic_long_read(&user->locked_vm) >= user_lock_limit) {
|
|
|
|
|
/* charge all to pinned_vm */
|
|
|
|
|
extra = user_extra;
|
|
|
|
|
user_extra = 0;
|
|
|
|
|
} else {
|
|
|
|
|
if (user_locked > user_lock_limit) {
|
|
|
|
|
/*
|
|
|
|
|
* charge locked_vm until it hits user_lock_limit;
|
|
|
|
|
* charge the rest from pinned_vm
|
|
|
|
@@ -5878,7 +5931,7 @@ accounting:
|
|
|
|
|
lock_limit >>= PAGE_SHIFT;
|
|
|
|
|
locked = atomic64_read(&vma->vm_mm->pinned_vm) + extra;
|
|
|
|
|
|
|
|
|
|
if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
|
|
|
|
|
if ((locked > lock_limit) && perf_is_paranoid() &&
|
|
|
|
|
!capable(CAP_IPC_LOCK)) {
|
|
|
|
|
ret = -EPERM;
|
|
|
|
|
goto unlock;
|
|
|
|
@@ -6208,6 +6261,122 @@ perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static unsigned long perf_prepare_sample_aux(struct perf_event *event,
|
|
|
|
|
struct perf_sample_data *data,
|
|
|
|
|
size_t size)
|
|
|
|
|
{
|
|
|
|
|
struct perf_event *sampler = event->aux_event;
|
|
|
|
|
struct ring_buffer *rb;
|
|
|
|
|
|
|
|
|
|
data->aux_size = 0;
|
|
|
|
|
|
|
|
|
|
if (!sampler)
|
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
|
|
if (WARN_ON_ONCE(READ_ONCE(sampler->state) != PERF_EVENT_STATE_ACTIVE))
|
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
|
|
if (WARN_ON_ONCE(READ_ONCE(sampler->oncpu) != smp_processor_id()))
|
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
|
|
rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler);
|
|
|
|
|
if (!rb)
|
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If this is an NMI hit inside sampling code, don't take
|
|
|
|
|
* the sample. See also perf_aux_sample_output().
|
|
|
|
|
*/
|
|
|
|
|
if (READ_ONCE(rb->aux_in_sampling)) {
|
|
|
|
|
data->aux_size = 0;
|
|
|
|
|
} else {
|
|
|
|
|
size = min_t(size_t, size, perf_aux_size(rb));
|
|
|
|
|
data->aux_size = ALIGN(size, sizeof(u64));
|
|
|
|
|
}
|
|
|
|
|
ring_buffer_put(rb);
|
|
|
|
|
|
|
|
|
|
out:
|
|
|
|
|
return data->aux_size;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long perf_pmu_snapshot_aux(struct ring_buffer *rb,
|
|
|
|
|
struct perf_event *event,
|
|
|
|
|
struct perf_output_handle *handle,
|
|
|
|
|
unsigned long size)
|
|
|
|
|
{
|
|
|
|
|
unsigned long flags;
|
|
|
|
|
long ret;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Normal ->start()/->stop() callbacks run in IRQ mode in scheduler
|
|
|
|
|
* paths. If we start calling them in NMI context, they may race with
|
|
|
|
|
* the IRQ ones, that is, for example, re-starting an event that's just
|
|
|
|
|
* been stopped, which is why we're using a separate callback that
|
|
|
|
|
* doesn't change the event state.
|
|
|
|
|
*
|
|
|
|
|
* IRQs need to be disabled to prevent IPIs from racing with us.
|
|
|
|
|
*/
|
|
|
|
|
local_irq_save(flags);
|
|
|
|
|
/*
|
|
|
|
|
* Guard against NMI hits inside the critical section;
|
|
|
|
|
* see also perf_prepare_sample_aux().
|
|
|
|
|
*/
|
|
|
|
|
WRITE_ONCE(rb->aux_in_sampling, 1);
|
|
|
|
|
barrier();
|
|
|
|
|
|
|
|
|
|
ret = event->pmu->snapshot_aux(event, handle, size);
|
|
|
|
|
|
|
|
|
|
barrier();
|
|
|
|
|
WRITE_ONCE(rb->aux_in_sampling, 0);
|
|
|
|
|
local_irq_restore(flags);
|
|
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void perf_aux_sample_output(struct perf_event *event,
|
|
|
|
|
struct perf_output_handle *handle,
|
|
|
|
|
struct perf_sample_data *data)
|
|
|
|
|
{
|
|
|
|
|
struct perf_event *sampler = event->aux_event;
|
|
|
|
|
unsigned long pad;
|
|
|
|
|
struct ring_buffer *rb;
|
|
|
|
|
long size;
|
|
|
|
|
|
|
|
|
|
if (WARN_ON_ONCE(!sampler || !data->aux_size))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler);
|
|
|
|
|
if (!rb)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
size = perf_pmu_snapshot_aux(rb, sampler, handle, data->aux_size);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* An error here means that perf_output_copy() failed (returned a
|
|
|
|
|
* non-zero surplus that it didn't copy), which in its current
|
|
|
|
|
* enlightened implementation is not possible. If that changes, we'd
|
|
|
|
|
* like to know.
|
|
|
|
|
*/
|
|
|
|
|
if (WARN_ON_ONCE(size < 0))
|
|
|
|
|
goto out_put;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The pad comes from ALIGN()ing data->aux_size up to u64 in
|
|
|
|
|
* perf_prepare_sample_aux(), so should not be more than that.
|
|
|
|
|
*/
|
|
|
|
|
pad = data->aux_size - size;
|
|
|
|
|
if (WARN_ON_ONCE(pad >= sizeof(u64)))
|
|
|
|
|
pad = 8;
|
|
|
|
|
|
|
|
|
|
if (pad) {
|
|
|
|
|
u64 zero = 0;
|
|
|
|
|
perf_output_copy(handle, &zero, pad);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
out_put:
|
|
|
|
|
ring_buffer_put(rb);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void __perf_event_header__init_id(struct perf_event_header *header,
|
|
|
|
|
struct perf_sample_data *data,
|
|
|
|
|
struct perf_event *event)
|
|
|
|
@@ -6527,6 +6696,13 @@ void perf_output_sample(struct perf_output_handle *handle,
|
|
|
|
|
if (sample_type & PERF_SAMPLE_PHYS_ADDR)
|
|
|
|
|
perf_output_put(handle, data->phys_addr);
|
|
|
|
|
|
|
|
|
|
if (sample_type & PERF_SAMPLE_AUX) {
|
|
|
|
|
perf_output_put(handle, data->aux_size);
|
|
|
|
|
|
|
|
|
|
if (data->aux_size)
|
|
|
|
|
perf_aux_sample_output(event, handle, data);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!event->attr.watermark) {
|
|
|
|
|
int wakeup_events = event->attr.wakeup_events;
|
|
|
|
|
|
|
|
|
@@ -6715,6 +6891,35 @@ void perf_prepare_sample(struct perf_event_header *header,
|
|
|
|
|
|
|
|
|
|
if (sample_type & PERF_SAMPLE_PHYS_ADDR)
|
|
|
|
|
data->phys_addr = perf_virt_to_phys(data->addr);
|
|
|
|
|
|
|
|
|
|
if (sample_type & PERF_SAMPLE_AUX) {
|
|
|
|
|
u64 size;
|
|
|
|
|
|
|
|
|
|
header->size += sizeof(u64); /* size */
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Given the 16bit nature of header::size, an AUX sample can
|
|
|
|
|
* easily overflow it, what with all the preceding sample bits.
|
|
|
|
|
* Make sure this doesn't happen by using up to U16_MAX bytes
|
|
|
|
|
* per sample in total (rounded down to 8 byte boundary).
|
|
|
|
|
*/
|
|
|
|
|
size = min_t(size_t, U16_MAX - header->size,
|
|
|
|
|
event->attr.aux_sample_size);
|
|
|
|
|
size = rounddown(size, 8);
|
|
|
|
|
size = perf_prepare_sample_aux(event, data, size);
|
|
|
|
|
|
|
|
|
|
WARN_ON_ONCE(size + header->size > U16_MAX);
|
|
|
|
|
header->size += size;
|
|
|
|
|
}
|
|
|
|
|
/*
|
|
|
|
|
* If you're adding more sample types here, you likely need to do
|
|
|
|
|
* something about the overflowing header::size, like repurpose the
|
|
|
|
|
* lowest 3 bits of size, which should be always zero at the moment.
|
|
|
|
|
* This raises a more important question, do we really need 512k sized
|
|
|
|
|
* samples and why, so good argumentation is in order for whatever you
|
|
|
|
|
* do here next.
|
|
|
|
|
*/
|
|
|
|
|
WARN_ON_ONCE(header->size & 7);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static __always_inline int
|
|
|
|
@@ -10066,7 +10271,7 @@ static struct lock_class_key cpuctx_lock;
|
|
|
|
|
|
|
|
|
|
int perf_pmu_register(struct pmu *pmu, const char *name, int type)
|
|
|
|
|
{
|
|
|
|
|
int cpu, ret;
|
|
|
|
|
int cpu, ret, max = PERF_TYPE_MAX;
|
|
|
|
|
|
|
|
|
|
mutex_lock(&pmus_lock);
|
|
|
|
|
ret = -ENOMEM;
|
|
|
|
@@ -10079,12 +10284,17 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
|
|
|
|
|
goto skip_type;
|
|
|
|
|
pmu->name = name;
|
|
|
|
|
|
|
|
|
|
if (type < 0) {
|
|
|
|
|
type = idr_alloc(&pmu_idr, pmu, PERF_TYPE_MAX, 0, GFP_KERNEL);
|
|
|
|
|
if (type < 0) {
|
|
|
|
|
ret = type;
|
|
|
|
|
if (type != PERF_TYPE_SOFTWARE) {
|
|
|
|
|
if (type >= 0)
|
|
|
|
|
max = type;
|
|
|
|
|
|
|
|
|
|
ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL);
|
|
|
|
|
if (ret < 0)
|
|
|
|
|
goto free_pdc;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
WARN_ON(type >= 0 && ret != type);
|
|
|
|
|
|
|
|
|
|
type = ret;
|
|
|
|
|
}
|
|
|
|
|
pmu->type = type;
|
|
|
|
|
|
|
|
|
@@ -10161,7 +10371,16 @@ got_cpu_context:
|
|
|
|
|
if (!pmu->event_idx)
|
|
|
|
|
pmu->event_idx = perf_event_idx_default;
|
|
|
|
|
|
|
|
|
|
list_add_rcu(&pmu->entry, &pmus);
|
|
|
|
|
/*
|
|
|
|
|
* Ensure the TYPE_SOFTWARE PMUs are at the head of the list,
|
|
|
|
|
* since these cannot be in the IDR. This way the linear search
|
|
|
|
|
* is fast, provided a valid software event is provided.
|
|
|
|
|
*/
|
|
|
|
|
if (type == PERF_TYPE_SOFTWARE || !name)
|
|
|
|
|
list_add_rcu(&pmu->entry, &pmus);
|
|
|
|
|
else
|
|
|
|
|
list_add_tail_rcu(&pmu->entry, &pmus);
|
|
|
|
|
|
|
|
|
|
atomic_set(&pmu->exclusive_cnt, 0);
|
|
|
|
|
ret = 0;
|
|
|
|
|
unlock:
|
|
|
|
@@ -10174,7 +10393,7 @@ free_dev:
|
|
|
|
|
put_device(pmu->dev);
|
|
|
|
|
|
|
|
|
|
free_idr:
|
|
|
|
|
if (pmu->type >= PERF_TYPE_MAX)
|
|
|
|
|
if (pmu->type != PERF_TYPE_SOFTWARE)
|
|
|
|
|
idr_remove(&pmu_idr, pmu->type);
|
|
|
|
|
|
|
|
|
|
free_pdc:
|
|
|
|
@@ -10196,7 +10415,7 @@ void perf_pmu_unregister(struct pmu *pmu)
|
|
|
|
|
synchronize_rcu();
|
|
|
|
|
|
|
|
|
|
free_percpu(pmu->pmu_disable_count);
|
|
|
|
|
if (pmu->type >= PERF_TYPE_MAX)
|
|
|
|
|
if (pmu->type != PERF_TYPE_SOFTWARE)
|
|
|
|
|
idr_remove(&pmu_idr, pmu->type);
|
|
|
|
|
if (pmu_bus_running) {
|
|
|
|
|
if (pmu->nr_addr_filters)
|
|
|
|
@@ -10266,9 +10485,8 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
|
|
|
|
|
|
|
|
|
|
static struct pmu *perf_init_event(struct perf_event *event)
|
|
|
|
|
{
|
|
|
|
|
int idx, type, ret;
|
|
|
|
|
struct pmu *pmu;
|
|
|
|
|
int idx;
|
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
|
|
idx = srcu_read_lock(&pmus_srcu);
|
|
|
|
|
|
|
|
|
@@ -10280,13 +10498,28 @@ static struct pmu *perf_init_event(struct perf_event *event)
|
|
|
|
|
goto unlock;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE
|
|
|
|
|
* are often aliases for PERF_TYPE_RAW.
|
|
|
|
|
*/
|
|
|
|
|
type = event->attr.type;
|
|
|
|
|
if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE)
|
|
|
|
|
type = PERF_TYPE_RAW;
|
|
|
|
|
|
|
|
|
|
again:
|
|
|
|
|
rcu_read_lock();
|
|
|
|
|
pmu = idr_find(&pmu_idr, event->attr.type);
|
|
|
|
|
pmu = idr_find(&pmu_idr, type);
|
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
if (pmu) {
|
|
|
|
|
ret = perf_try_init_event(pmu, event);
|
|
|
|
|
if (ret == -ENOENT && event->attr.type != type) {
|
|
|
|
|
type = event->attr.type;
|
|
|
|
|
goto again;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (ret)
|
|
|
|
|
pmu = ERR_PTR(ret);
|
|
|
|
|
|
|
|
|
|
goto unlock;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@@ -10618,11 +10851,20 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
err = security_perf_event_alloc(event);
|
|
|
|
|
if (err)
|
|
|
|
|
goto err_callchain_buffer;
|
|
|
|
|
|
|
|
|
|
/* symmetric to unaccount_event() in _free_event() */
|
|
|
|
|
account_event(event);
|
|
|
|
|
|
|
|
|
|
return event;
|
|
|
|
|
|
|
|
|
|
err_callchain_buffer:
|
|
|
|
|
if (!event->parent) {
|
|
|
|
|
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
|
|
|
|
|
put_callchain_buffers();
|
|
|
|
|
}
|
|
|
|
|
err_addr_filters:
|
|
|
|
|
kfree(event->addr_filter_ranges);
|
|
|
|
|
|
|
|
|
@@ -10673,7 +10915,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
|
|
|
|
|
|
|
|
|
|
attr->size = size;
|
|
|
|
|
|
|
|
|
|
if (attr->__reserved_1 || attr->__reserved_2)
|
|
|
|
|
if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
|
|
if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
|
|
|
|
@@ -10711,9 +10953,11 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
|
|
|
|
|
attr->branch_sample_type = mask;
|
|
|
|
|
}
|
|
|
|
|
/* privileged levels capture (kernel, hv): check permissions */
|
|
|
|
|
if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM)
|
|
|
|
|
&& perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
|
|
|
|
|
return -EACCES;
|
|
|
|
|
if (mask & PERF_SAMPLE_BRANCH_PERM_PLM) {
|
|
|
|
|
ret = perf_allow_kernel(attr);
|
|
|
|
|
if (ret)
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (attr->sample_type & PERF_SAMPLE_REGS_USER) {
|
|
|
|
@@ -10926,13 +11170,19 @@ SYSCALL_DEFINE5(perf_event_open,
|
|
|
|
|
if (flags & ~PERF_FLAG_ALL)
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
|
|
/* Do we allow access to perf_event_open(2) ? */
|
|
|
|
|
err = security_perf_event_open(&attr, PERF_SECURITY_OPEN);
|
|
|
|
|
if (err)
|
|
|
|
|
return err;
|
|
|
|
|
|
|
|
|
|
err = perf_copy_attr(attr_uptr, &attr);
|
|
|
|
|
if (err)
|
|
|
|
|
return err;
|
|
|
|
|
|
|
|
|
|
if (!attr.exclude_kernel) {
|
|
|
|
|
if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
|
|
|
|
|
return -EACCES;
|
|
|
|
|
err = perf_allow_kernel(&attr);
|
|
|
|
|
if (err)
|
|
|
|
|
return err;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (attr.namespaces) {
|
|
|
|
@@ -10949,9 +11199,11 @@ SYSCALL_DEFINE5(perf_event_open,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Only privileged users can get physical addresses */
|
|
|
|
|
if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) &&
|
|
|
|
|
perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
|
|
|
|
|
return -EACCES;
|
|
|
|
|
if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR)) {
|
|
|
|
|
err = perf_allow_kernel(&attr);
|
|
|
|
|
if (err)
|
|
|
|
|
return err;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
err = security_locked_down(LOCKDOWN_PERF);
|
|
|
|
|
if (err && (attr.sample_type & PERF_SAMPLE_REGS_INTR))
|
|
|
|
@@ -11213,7 +11465,7 @@ SYSCALL_DEFINE5(perf_event_open,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (event->attr.aux_output && !perf_get_aux_event(event, group_leader))
|
|
|
|
|
if (perf_need_aux_event(event) && !perf_get_aux_event(event, group_leader))
|
|
|
|
|
goto err_locked;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|