drm/i915: Apply rps waitboosting for dma_fence_wait_timeout()

As time goes by, usage of generic ioctls such as drm_syncobj and
sync_file are on the increase bypassing i915-specific ioctls like
GEM_WAIT. Currently, we only apply waitboosting to our driver ioctls as
we track the file/client and account the waitboosting to them. However,
since commit 7b92c1bd05 ("drm/i915: Avoid keeping waitboost active for
signaling threads"), we no longer have been applying the client
ratelimiting on waitboosts and so that information has only been used
for debug tracking.

Push the application of waitboosting down to the common
i915_request_wait, and apply it to all foreign fence waits as well.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Eero Tamminen <eero.t.tamminen@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190213092504.25709-1-chris@chris-wilson.co.uk
This commit is contained in:
Chris Wilson
2019-02-13 09:25:04 +00:00
parent e6ed078d6d
commit 62eb3c24b3
7 changed files with 44 additions and 98 deletions

View File

@@ -416,8 +416,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
static long
i915_gem_object_wait_fence(struct dma_fence *fence,
unsigned int flags,
long timeout,
struct intel_rps_client *rps_client)
long timeout)
{
struct i915_request *rq;
@@ -435,27 +434,6 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
if (i915_request_completed(rq))
goto out;
/*
* This client is about to stall waiting for the GPU. In many cases
* this is undesirable and limits the throughput of the system, as
* many clients cannot continue processing user input/output whilst
* blocked. RPS autotuning may take tens of milliseconds to respond
* to the GPU load and thus incurs additional latency for the client.
* We can circumvent that by promoting the GPU frequency to maximum
* before we wait. This makes the GPU throttle up much more quickly
* (good for benchmarks and user experience, e.g. window animations),
* but at a cost of spending more power processing the workload
* (bad for battery). Not all clients even want their results
* immediately and for them we should just let the GPU select its own
* frequency to maximise efficiency. To prevent a single client from
* forcing the clocks too high for the whole system, we only allow
* each client to waitboost once in a busy period.
*/
if (rps_client && !i915_request_started(rq)) {
if (INTEL_GEN(rq->i915) >= 6)
gen6_rps_boost(rq, rps_client);
}
timeout = i915_request_wait(rq, flags, timeout);
out:
@@ -468,8 +446,7 @@ out:
static long
i915_gem_object_wait_reservation(struct reservation_object *resv,
unsigned int flags,
long timeout,
struct intel_rps_client *rps_client)
long timeout)
{
unsigned int seq = __read_seqcount_begin(&resv->seq);
struct dma_fence *excl;
@@ -487,8 +464,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
for (i = 0; i < count; i++) {
timeout = i915_gem_object_wait_fence(shared[i],
flags, timeout,
rps_client);
flags, timeout);
if (timeout < 0)
break;
@@ -514,8 +490,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
}
if (excl && timeout >= 0)
timeout = i915_gem_object_wait_fence(excl, flags, timeout,
rps_client);
timeout = i915_gem_object_wait_fence(excl, flags, timeout);
dma_fence_put(excl);
@@ -609,30 +584,19 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
* @obj: i915 gem object
* @flags: how to wait (under a lock, for all rendering or just for writes etc)
* @timeout: how long to wait
* @rps_client: client (user process) to charge for any waitboosting
*/
int
i915_gem_object_wait(struct drm_i915_gem_object *obj,
unsigned int flags,
long timeout,
struct intel_rps_client *rps_client)
long timeout)
{
might_sleep();
GEM_BUG_ON(timeout < 0);
timeout = i915_gem_object_wait_reservation(obj->resv,
flags, timeout,
rps_client);
timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout);
return timeout < 0 ? timeout : 0;
}
static struct intel_rps_client *to_rps_client(struct drm_file *file)
{
struct drm_i915_file_private *fpriv = file->driver_priv;
return &fpriv->rps_client;
}
static int
i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
struct drm_i915_gem_pwrite *args,
@@ -838,8 +802,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED,
MAX_SCHEDULE_TIMEOUT,
NULL);
MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
@@ -891,8 +854,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED |
I915_WAIT_ALL,
MAX_SCHEDULE_TIMEOUT,
NULL);
MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
@@ -1154,8 +1116,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT,
to_rps_client(file));
MAX_SCHEDULE_TIMEOUT);
if (ret)
goto out;
@@ -1454,8 +1415,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_ALL,
MAX_SCHEDULE_TIMEOUT,
to_rps_client(file));
MAX_SCHEDULE_TIMEOUT);
if (ret)
goto err;
@@ -1553,8 +1513,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_PRIORITY |
(write_domain ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT,
to_rps_client(file));
MAX_SCHEDULE_TIMEOUT);
if (err)
goto out;
@@ -1863,8 +1822,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
*/
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT,
NULL);
MAX_SCHEDULE_TIMEOUT);
if (ret)
goto err;
@@ -3195,8 +3153,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_PRIORITY |
I915_WAIT_ALL,
to_wait_timeout(args->timeout_ns),
to_rps_client(file));
to_wait_timeout(args->timeout_ns));
if (args->timeout_ns > 0) {
args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
@@ -3265,7 +3222,7 @@ wait_for_timelines(struct drm_i915_private *i915,
* stalls, so allow the gpu to boost to maximum clocks.
*/
if (flags & I915_WAIT_FOR_IDLE_BOOST)
gen6_rps_boost(rq, NULL);
gen6_rps_boost(rq);
timeout = i915_request_wait(rq, flags, timeout);
i915_request_put(rq);
@@ -3360,8 +3317,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED |
(write ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT,
NULL);
MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
@@ -3423,8 +3379,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED |
(write ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT,
NULL);
MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
@@ -3539,8 +3494,7 @@ restart:
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED |
I915_WAIT_ALL,
MAX_SCHEDULE_TIMEOUT,
NULL);
MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
@@ -3678,8 +3632,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT,
to_rps_client(file));
MAX_SCHEDULE_TIMEOUT);
if (ret)
goto out;
@@ -3805,8 +3758,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED |
(write ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT,
NULL);
MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;