drm/i915: Reduce i915_request.lock contention for i915_request_wait
Currently, we use i915_request_completed() directly in i915_request_wait() and follow up with a manual invocation of dma_fence_signal(). This appears to cause a large number of contentions on i915_request.lock as when the process is woken up after the fence is signaled by an interrupt, we will then try and call dma_fence_signal() ourselves while the signaler is still holding the lock. dma_fence_is_signaled() has the benefit of checking the DMA_FENCE_FLAG_SIGNALED_BIT prior to calling dma_fence_signal() and so avoids most of that contention. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Matthew Auld <matthew.auld@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200716100754.5670-1-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
This commit is contained in:

committed by
Joonas Lahtinen

parent
3393649977
commit
3f6a6f343c
@@ -1640,7 +1640,7 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
|
||||
return this_cpu != cpu;
|
||||
}
|
||||
|
||||
static bool __i915_spin_request(const struct i915_request * const rq, int state)
|
||||
static bool __i915_spin_request(struct i915_request * const rq, int state)
|
||||
{
|
||||
unsigned long timeout_ns;
|
||||
unsigned int cpu;
|
||||
@@ -1673,7 +1673,7 @@ static bool __i915_spin_request(const struct i915_request * const rq, int state)
|
||||
timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns);
|
||||
timeout_ns += local_clock_ns(&cpu);
|
||||
do {
|
||||
if (i915_request_completed(rq))
|
||||
if (dma_fence_is_signaled(&rq->fence))
|
||||
return true;
|
||||
|
||||
if (signal_pending_state(state, current))
|
||||
@@ -1697,7 +1697,7 @@ static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb)
|
||||
{
|
||||
struct request_wait *wait = container_of(cb, typeof(*wait), cb);
|
||||
|
||||
wake_up_process(wait->tsk);
|
||||
wake_up_process(fetch_and_zero(&wait->tsk));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1766,10 +1766,8 @@ long i915_request_wait(struct i915_request *rq,
|
||||
* duration, which we currently lack.
|
||||
*/
|
||||
if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) &&
|
||||
__i915_spin_request(rq, state)) {
|
||||
dma_fence_signal(&rq->fence);
|
||||
__i915_spin_request(rq, state))
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* This client is about to stall waiting for the GPU. In many cases
|
||||
@@ -1793,10 +1791,8 @@ long i915_request_wait(struct i915_request *rq,
|
||||
for (;;) {
|
||||
set_current_state(state);
|
||||
|
||||
if (i915_request_completed(rq)) {
|
||||
dma_fence_signal(&rq->fence);
|
||||
if (dma_fence_is_signaled(&rq->fence))
|
||||
break;
|
||||
}
|
||||
|
||||
intel_engine_flush_submission(rq->engine);
|
||||
|
||||
@@ -1814,7 +1810,9 @@ long i915_request_wait(struct i915_request *rq,
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
dma_fence_remove_callback(&rq->fence, &wait.cb);
|
||||
if (READ_ONCE(wait.tsk))
|
||||
dma_fence_remove_callback(&rq->fence, &wait.cb);
|
||||
GEM_BUG_ON(!list_empty(&wait.cb.node));
|
||||
|
||||
out:
|
||||
mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_);
|
||||
|
Reference in New Issue
Block a user