drm/i915: Mark i915_request.timeline as a volatile, rcu pointer

The request->timeline is only valid until the request is retired (i.e.
before it is completed). Upon retiring the request, the context may be
unpinned and freed, and along with it the timeline may be freed. We
therefore need to be very careful when chasing rq->timeline that the
pointer does not disappear beneath us. The vast majority of users are in
a protected context, either during request construction or retirement,
where the timeline->mutex is held and the timeline cannot disappear. It
is those few off the beaten path (where we access a second timeline) that
need extra scrutiny -- to be added in the next patch after first adding
the warnings about dangerous access.

One complication, where we cannot use the timeline->mutex itself, is
during request submission onto hardware (under spinlocks). Here, we want
to check on the timeline to finalize the breadcrumb, and so we need to
impose a second rule to ensure that the request->timeline is indeed
valid. As we are submitting the request, it's context and timeline must
be pinned, as it will be used by the hardware. Since it is pinned, we
know the request->timeline must still be valid, and we cannot submit the
idle barrier until after we release the engine->active.lock, ergo while
submitting and holding that spinlock, a second thread cannot release the
timeline.

v2: Don't be lazy inside selftests; hold the timeline->mutex for as long
as we need it, and tidy up acquiring the timeline with a bit of
refactoring (i915_active_add_request)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190919111912.21631-1-chris@chris-wilson.co.uk
This commit is contained in:
Chris Wilson
2019-09-19 12:19:10 +01:00
parent c45e788d95
commit d19d71fc2b
20 changed files with 147 additions and 63 deletions

View File

@@ -220,7 +220,6 @@ static bool i915_request_retire(struct i915_request *rq)
{
struct i915_active_request *active, *next;
lockdep_assert_held(&rq->timeline->mutex);
if (!i915_request_completed(rq))
return false;
@@ -241,7 +240,8 @@ static bool i915_request_retire(struct i915_request *rq)
* Note this requires that we are always called in request
* completion order.
*/
GEM_BUG_ON(!list_is_first(&rq->link, &rq->timeline->requests));
GEM_BUG_ON(!list_is_first(&rq->link,
&i915_request_timeline(rq)->requests));
rq->ring->head = rq->postfix;
/*
@@ -317,7 +317,7 @@ static bool i915_request_retire(struct i915_request *rq)
void i915_request_retire_upto(struct i915_request *rq)
{
struct intel_timeline * const tl = rq->timeline;
struct intel_timeline * const tl = i915_request_timeline(rq);
struct i915_request *tmp;
GEM_TRACE("%s fence %llx:%lld, current %d\n",
@@ -325,7 +325,6 @@ void i915_request_retire_upto(struct i915_request *rq)
rq->fence.context, rq->fence.seqno,
hwsp_seqno(rq));
lockdep_assert_held(&tl->mutex);
GEM_BUG_ON(!i915_request_completed(rq));
do {
@@ -661,9 +660,11 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
rq->gem_context = ce->gem_context;
rq->engine = ce->engine;
rq->ring = ce->ring;
rq->timeline = tl;
rcu_assign_pointer(rq->timeline, tl);
rq->hwsp_seqno = tl->hwsp_seqno;
rq->hwsp_cacheline = tl->hwsp_cacheline;
rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
spin_lock_init(&rq->lock);
@@ -771,7 +772,8 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
return 0;
signal = list_prev_entry(signal, link);
if (intel_timeline_sync_is_later(rq->timeline, &signal->fence))
if (intel_timeline_sync_is_later(i915_request_timeline(rq),
&signal->fence))
return 0;
return i915_sw_fence_await_dma_fence(&rq->submit,
@@ -947,7 +949,8 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
/* Squash repeated waits to the same timelines */
if (fence->context &&
intel_timeline_sync_is_later(rq->timeline, fence))
intel_timeline_sync_is_later(i915_request_timeline(rq),
fence))
continue;
if (dma_fence_is_i915(fence))
@@ -961,7 +964,8 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
/* Record the latest fence used against each timeline */
if (fence->context)
intel_timeline_sync_set(rq->timeline, fence);
intel_timeline_sync_set(i915_request_timeline(rq),
fence);
} while (--nchild);
return 0;
@@ -1103,7 +1107,7 @@ void i915_request_skip(struct i915_request *rq, int error)
static struct i915_request *
__i915_request_add_to_timeline(struct i915_request *rq)
{
struct intel_timeline *timeline = rq->timeline;
struct intel_timeline *timeline = i915_request_timeline(rq);
struct i915_request *prev;
/*
@@ -1216,7 +1220,7 @@ void __i915_request_queue(struct i915_request *rq,
void i915_request_add(struct i915_request *rq)
{
struct i915_sched_attr attr = rq->gem_context->sched;
struct intel_timeline * const tl = rq->timeline;
struct intel_timeline * const tl = i915_request_timeline(rq);
struct i915_request *prev;
lockdep_assert_held(&tl->mutex);
@@ -1271,7 +1275,9 @@ void i915_request_add(struct i915_request *rq)
* work on behalf of others -- but instead we should benefit from
* improved resource management. (Well, that's the theory at least.)
*/
if (prev && i915_request_completed(prev) && prev->timeline == tl)
if (prev &&
i915_request_completed(prev) &&
rcu_access_pointer(prev->timeline) == tl)
i915_request_retire_upto(prev);
mutex_unlock(&tl->mutex);