Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar: "On the kernel side there's a bunch of ring-buffer ordering fixes for a reproducible bug, plus a PEBS constraints regression fix. Plus tooling fixes" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: tools headers UAPI: Sync kvm.h headers with the kernel sources perf record: Fix s390 missing module symbol and warning for non-root users perf machine: Read also the end of the kernel perf test vmlinux-kallsyms: Ignore aliases to _etext when searching on kallsyms perf session: Add missing swap ops for namespace events perf namespace: Protect reading thread's namespace tools headers UAPI: Sync drm/drm.h with the kernel tools headers UAPI: Sync drm/i915_drm.h with the kernel tools headers UAPI: Sync linux/fs.h with the kernel tools headers UAPI: Sync linux/sched.h with the kernel tools arch x86: Sync asm/cpufeatures.h with the with the kernel tools include UAPI: Update copy of files related to new fspick, fsmount, fsconfig, fsopen, move_mount and open_tree syscalls perf arm64: Fix mksyscalltbl when system kernel headers are ahead of the kernel perf data: Fix 'strncat may truncate' build failure with recent gcc perf/ring-buffer: Use regular variables for nesting perf/ring-buffer: Always use {READ,WRITE}_ONCE() for rb->user_page data perf/ring_buffer: Add ordering to rb->nest increment perf/ring_buffer: Fix exposing a temporarily decreased data_head perf/x86/intel/ds: Fix EVENT vs. UEVENT PEBS constraints
This commit is contained in:
@@ -24,7 +24,7 @@ struct ring_buffer {
|
||||
atomic_t poll; /* POLL_ for wakeups */
|
||||
|
||||
local_t head; /* write position */
|
||||
local_t nest; /* nested writers */
|
||||
unsigned int nest; /* nested writers */
|
||||
local_t events; /* event limit */
|
||||
local_t wakeup; /* wakeup stamp */
|
||||
local_t lost; /* nr records lost */
|
||||
@@ -41,7 +41,7 @@ struct ring_buffer {
|
||||
|
||||
/* AUX area */
|
||||
long aux_head;
|
||||
local_t aux_nest;
|
||||
unsigned int aux_nest;
|
||||
long aux_wakeup; /* last aux_watermark boundary crossed by aux_head */
|
||||
unsigned long aux_pgoff;
|
||||
int aux_nr_pages;
|
||||
|
@@ -38,7 +38,12 @@ static void perf_output_get_handle(struct perf_output_handle *handle)
|
||||
struct ring_buffer *rb = handle->rb;
|
||||
|
||||
preempt_disable();
|
||||
local_inc(&rb->nest);
|
||||
|
||||
/*
|
||||
* Avoid an explicit LOAD/STORE such that architectures with memops
|
||||
* can use them.
|
||||
*/
|
||||
(*(volatile unsigned int *)&rb->nest)++;
|
||||
handle->wakeup = local_read(&rb->wakeup);
|
||||
}
|
||||
|
||||
@@ -46,17 +51,35 @@ static void perf_output_put_handle(struct perf_output_handle *handle)
|
||||
{
|
||||
struct ring_buffer *rb = handle->rb;
|
||||
unsigned long head;
|
||||
unsigned int nest;
|
||||
|
||||
/*
|
||||
* If this isn't the outermost nesting, we don't have to update
|
||||
* @rb->user_page->data_head.
|
||||
*/
|
||||
nest = READ_ONCE(rb->nest);
|
||||
if (nest > 1) {
|
||||
WRITE_ONCE(rb->nest, nest - 1);
|
||||
goto out;
|
||||
}
|
||||
|
||||
again:
|
||||
/*
|
||||
* In order to avoid publishing a head value that goes backwards,
|
||||
* we must ensure the load of @rb->head happens after we've
|
||||
* incremented @rb->nest.
|
||||
*
|
||||
* Otherwise we can observe a @rb->head value before one published
|
||||
* by an IRQ/NMI happening between the load and the increment.
|
||||
*/
|
||||
barrier();
|
||||
head = local_read(&rb->head);
|
||||
|
||||
/*
|
||||
* IRQ/NMI can happen here, which means we can miss a head update.
|
||||
* IRQ/NMI can happen here and advance @rb->head, causing our
|
||||
* load above to be stale.
|
||||
*/
|
||||
|
||||
if (!local_dec_and_test(&rb->nest))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Since the mmap() consumer (userspace) can run on a different CPU:
|
||||
*
|
||||
@@ -84,14 +107,23 @@ again:
|
||||
* See perf_output_begin().
|
||||
*/
|
||||
smp_wmb(); /* B, matches C */
|
||||
rb->user_page->data_head = head;
|
||||
WRITE_ONCE(rb->user_page->data_head, head);
|
||||
|
||||
/*
|
||||
* Now check if we missed an update -- rely on previous implied
|
||||
* compiler barriers to force a re-read.
|
||||
* We must publish the head before decrementing the nest count,
|
||||
* otherwise an IRQ/NMI can publish a more recent head value and our
|
||||
* write will (temporarily) publish a stale value.
|
||||
*/
|
||||
barrier();
|
||||
WRITE_ONCE(rb->nest, 0);
|
||||
|
||||
/*
|
||||
* Ensure we decrement @rb->nest before we validate the @rb->head.
|
||||
* Otherwise we cannot be sure we caught the 'last' nested update.
|
||||
*/
|
||||
barrier();
|
||||
if (unlikely(head != local_read(&rb->head))) {
|
||||
local_inc(&rb->nest);
|
||||
WRITE_ONCE(rb->nest, 1);
|
||||
goto again;
|
||||
}
|
||||
|
||||
@@ -330,6 +362,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
|
||||
struct perf_event *output_event = event;
|
||||
unsigned long aux_head, aux_tail;
|
||||
struct ring_buffer *rb;
|
||||
unsigned int nest;
|
||||
|
||||
if (output_event->parent)
|
||||
output_event = output_event->parent;
|
||||
@@ -360,13 +393,16 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
|
||||
if (!refcount_inc_not_zero(&rb->aux_refcount))
|
||||
goto err;
|
||||
|
||||
nest = READ_ONCE(rb->aux_nest);
|
||||
/*
|
||||
* Nesting is not supported for AUX area, make sure nested
|
||||
* writers are caught early
|
||||
*/
|
||||
if (WARN_ON_ONCE(local_xchg(&rb->aux_nest, 1)))
|
||||
if (WARN_ON_ONCE(nest))
|
||||
goto err_put;
|
||||
|
||||
WRITE_ONCE(rb->aux_nest, nest + 1);
|
||||
|
||||
aux_head = rb->aux_head;
|
||||
|
||||
handle->rb = rb;
|
||||
@@ -394,7 +430,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
|
||||
if (!handle->size) { /* A, matches D */
|
||||
event->pending_disable = smp_processor_id();
|
||||
perf_output_wakeup(handle);
|
||||
local_set(&rb->aux_nest, 0);
|
||||
WRITE_ONCE(rb->aux_nest, 0);
|
||||
goto err_put;
|
||||
}
|
||||
}
|
||||
@@ -471,7 +507,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
|
||||
perf_event_aux_event(handle->event, aux_head, size,
|
||||
handle->aux_flags);
|
||||
|
||||
rb->user_page->aux_head = rb->aux_head;
|
||||
WRITE_ONCE(rb->user_page->aux_head, rb->aux_head);
|
||||
if (rb_need_aux_wakeup(rb))
|
||||
wakeup = true;
|
||||
|
||||
@@ -483,7 +519,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
|
||||
|
||||
handle->event = NULL;
|
||||
|
||||
local_set(&rb->aux_nest, 0);
|
||||
WRITE_ONCE(rb->aux_nest, 0);
|
||||
/* can't be last */
|
||||
rb_free_aux(rb);
|
||||
ring_buffer_put(rb);
|
||||
@@ -503,7 +539,7 @@ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size)
|
||||
|
||||
rb->aux_head += size;
|
||||
|
||||
rb->user_page->aux_head = rb->aux_head;
|
||||
WRITE_ONCE(rb->user_page->aux_head, rb->aux_head);
|
||||
if (rb_need_aux_wakeup(rb)) {
|
||||
perf_output_wakeup(handle);
|
||||
handle->wakeup = rb->aux_wakeup + rb->aux_watermark;
|
||||
|
Referens i nytt ärende
Block a user