drm/i915: Protect request retirement with timeline->mutex

Forgo the struct_mutex requirement for request retirement as we have
been transitioning over to only using the timeline->mutex for
controlling the lifetime of a request on that timeline.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190815205709.24285-4-chris@chris-wilson.co.uk
This commit is contained in:
Chris Wilson
2019-08-15 21:57:09 +01:00
parent ccb23d2dcc
commit e5dadff4b0
12 changed files with 209 additions and 189 deletions

View File

@@ -181,40 +181,6 @@ i915_request_remove_from_client(struct i915_request *request)
spin_unlock(&file_priv->mm.lock);
}
static void advance_ring(struct i915_request *request)
{
struct intel_ring *ring = request->ring;
unsigned int tail;
/*
* We know the GPU must have read the request to have
* sent us the seqno + interrupt, so use the position
* of tail of the request to update the last known position
* of the GPU head.
*
* Note this requires that we are always called in request
* completion order.
*/
GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list));
if (list_is_last(&request->ring_link, &ring->request_list)) {
/*
* We may race here with execlists resubmitting this request
* as we retire it. The resubmission will move the ring->tail
* forwards (to request->wa_tail). We either read the
* current value that was written to hw, or the value that
* is just about to be. Either works, if we miss the last two
* noops - they are safe to be replayed on a reset.
*/
tail = READ_ONCE(request->tail);
list_del(&ring->active_link);
} else {
tail = request->postfix;
}
list_del_init(&request->ring_link);
ring->head = tail;
}
static void free_capture_list(struct i915_request *request)
{
struct i915_capture_list *capture;
@@ -232,7 +198,7 @@ static bool i915_request_retire(struct i915_request *rq)
{
struct i915_active_request *active, *next;
lockdep_assert_held(&rq->i915->drm.struct_mutex);
lockdep_assert_held(&rq->timeline->mutex);
if (!i915_request_completed(rq))
return false;
@@ -244,7 +210,17 @@ static bool i915_request_retire(struct i915_request *rq)
GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
trace_i915_request_retire(rq);
advance_ring(rq);
/*
* We know the GPU must have read the request to have
* sent us the seqno + interrupt, so use the position
* of tail of the request to update the last known position
* of the GPU head.
*
* Note this requires that we are always called in request
* completion order.
*/
GEM_BUG_ON(!list_is_first(&rq->link, &rq->timeline->requests));
rq->ring->head = rq->postfix;
/*
* Walk through the active list, calling retire on each. This allows
@@ -321,7 +297,7 @@ static bool i915_request_retire(struct i915_request *rq)
void i915_request_retire_upto(struct i915_request *rq)
{
struct intel_ring *ring = rq->ring;
struct intel_timeline * const tl = rq->timeline;
struct i915_request *tmp;
GEM_TRACE("%s fence %llx:%lld, current %d\n",
@@ -329,15 +305,11 @@ void i915_request_retire_upto(struct i915_request *rq)
rq->fence.context, rq->fence.seqno,
hwsp_seqno(rq));
lockdep_assert_held(&rq->i915->drm.struct_mutex);
lockdep_assert_held(&tl->mutex);
GEM_BUG_ON(!i915_request_completed(rq));
if (list_empty(&rq->ring_link))
return;
do {
tmp = list_first_entry(&ring->request_list,
typeof(*tmp), ring_link);
tmp = list_first_entry(&tl->requests, typeof(*tmp), link);
} while (i915_request_retire(tmp) && tmp != rq);
}
@@ -564,29 +536,28 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
return NOTIFY_DONE;
}
static void ring_retire_requests(struct intel_ring *ring)
static void retire_requests(struct intel_timeline *tl)
{
struct i915_request *rq, *rn;
list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link)
list_for_each_entry_safe(rq, rn, &tl->requests, link)
if (!i915_request_retire(rq))
break;
}
static noinline struct i915_request *
request_alloc_slow(struct intel_context *ce, gfp_t gfp)
request_alloc_slow(struct intel_timeline *tl, gfp_t gfp)
{
struct intel_ring *ring = ce->ring;
struct i915_request *rq;
if (list_empty(&ring->request_list))
if (list_empty(&tl->requests))
goto out;
if (!gfpflags_allow_blocking(gfp))
goto out;
/* Move our oldest request to the slab-cache (if not in use!) */
rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link);
rq = list_first_entry(&tl->requests, typeof(*rq), link);
i915_request_retire(rq);
rq = kmem_cache_alloc(global.slab_requests,
@@ -595,11 +566,11 @@ request_alloc_slow(struct intel_context *ce, gfp_t gfp)
return rq;
/* Ratelimit ourselves to prevent oom from malicious clients */
rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link);
rq = list_last_entry(&tl->requests, typeof(*rq), link);
cond_synchronize_rcu(rq->rcustate);
/* Retire our old requests in the hope that we free some */
ring_retire_requests(ring);
retire_requests(tl);
out:
return kmem_cache_alloc(global.slab_requests, gfp);
@@ -650,7 +621,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
rq = kmem_cache_alloc(global.slab_requests,
gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
if (unlikely(!rq)) {
rq = request_alloc_slow(ce, gfp);
rq = request_alloc_slow(tl, gfp);
if (!rq) {
ret = -ENOMEM;
goto err_unreserve;
@@ -742,15 +713,15 @@ struct i915_request *
i915_request_create(struct intel_context *ce)
{
struct i915_request *rq;
int err;
struct intel_timeline *tl;
err = intel_context_timeline_lock(ce);
if (err)
return ERR_PTR(err);
tl = intel_context_timeline_lock(ce);
if (IS_ERR(tl))
return ERR_CAST(tl);
/* Move our oldest request to the slab-cache (if not in use!) */
rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
if (!list_is_last(&rq->ring_link, &ce->ring->request_list))
rq = list_first_entry(&tl->requests, typeof(*rq), link);
if (!list_is_last(&rq->link, &tl->requests))
i915_request_retire(rq);
intel_context_enter(ce);
@@ -760,22 +731,22 @@ i915_request_create(struct intel_context *ce)
goto err_unlock;
/* Check that we do not interrupt ourselves with a new request */
rq->cookie = lockdep_pin_lock(&ce->timeline->mutex);
rq->cookie = lockdep_pin_lock(&tl->mutex);
return rq;
err_unlock:
intel_context_timeline_unlock(ce);
intel_context_timeline_unlock(tl);
return rq;
}
static int
i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
{
if (list_is_first(&signal->ring_link, &signal->ring->request_list))
if (list_is_first(&signal->link, &signal->timeline->requests))
return 0;
signal = list_prev_entry(signal, ring_link);
signal = list_prev_entry(signal, link);
if (intel_timeline_sync_is_later(rq->timeline, &signal->fence))
return 0;
@@ -1155,7 +1126,6 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
{
struct intel_engine_cs *engine = rq->engine;
struct intel_ring *ring = rq->ring;
struct i915_request *prev;
u32 *cs;
GEM_TRACE("%s fence %llx:%lld\n",
@@ -1168,6 +1138,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
*/
GEM_BUG_ON(rq->reserved_space > ring->space);
rq->reserved_space = 0;
rq->emitted_jiffies = jiffies;
/*
* Record the position of the start of the breadcrumb so that
@@ -1179,14 +1150,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
GEM_BUG_ON(IS_ERR(cs));
rq->postfix = intel_ring_offset(rq, cs);
prev = __i915_request_add_to_timeline(rq);
list_add_tail(&rq->ring_link, &ring->request_list);
if (list_is_first(&rq->ring_link, &ring->request_list))
list_add(&ring->active_link, &rq->i915->gt.active_rings);
rq->emitted_jiffies = jiffies;
return prev;
return __i915_request_add_to_timeline(rq);
}
void __i915_request_queue(struct i915_request *rq,
@@ -1212,10 +1176,11 @@ void __i915_request_queue(struct i915_request *rq,
void i915_request_add(struct i915_request *rq)
{
struct i915_sched_attr attr = rq->gem_context->sched;
struct intel_timeline * const tl = rq->timeline;
struct i915_request *prev;
lockdep_assert_held(&rq->timeline->mutex);
lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie);
lockdep_assert_held(&tl->mutex);
lockdep_unpin_lock(&tl->mutex, rq->cookie);
trace_i915_request_add(rq);
@@ -1266,10 +1231,10 @@ void i915_request_add(struct i915_request *rq)
* work on behalf of others -- but instead we should benefit from
* improved resource management. (Well, that's the theory at least.)
*/
if (prev && i915_request_completed(prev))
if (prev && i915_request_completed(prev) && prev->timeline == tl)
i915_request_retire_upto(prev);
mutex_unlock(&rq->timeline->mutex);
mutex_unlock(&tl->mutex);
}
static unsigned long local_clock_us(unsigned int *cpu)
@@ -1489,18 +1454,43 @@ out:
bool i915_retire_requests(struct drm_i915_private *i915)
{
struct intel_ring *ring, *tmp;
struct intel_gt_timelines *timelines = &i915->gt.timelines;
struct intel_timeline *tl, *tn;
LIST_HEAD(free);
lockdep_assert_held(&i915->drm.struct_mutex);
spin_lock(&timelines->lock);
list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
if (!mutex_trylock(&tl->mutex))
continue;
list_for_each_entry_safe(ring, tmp,
&i915->gt.active_rings, active_link) {
intel_ring_get(ring); /* last rq holds reference! */
ring_retire_requests(ring);
intel_ring_put(ring);
intel_timeline_get(tl);
GEM_BUG_ON(!tl->active_count);
tl->active_count++; /* pin the list element */
spin_unlock(&timelines->lock);
retire_requests(tl);
spin_lock(&timelines->lock);
/* Resume iteration after dropping lock */
list_safe_reset_next(tl, tn, link);
if (!--tl->active_count)
list_del(&tl->link);
mutex_unlock(&tl->mutex);
/* Defer the final release to after the spinlock */
if (refcount_dec_and_test(&tl->kref.refcount)) {
GEM_BUG_ON(tl->active_count);
list_add(&tl->link, &free);
}
}
spin_unlock(&timelines->lock);
return !list_empty(&i915->gt.active_rings);
list_for_each_entry_safe(tl, tn, &free, link)
__intel_timeline_free(&tl->kref);
return !list_empty(&timelines->active_list);
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)