drm/i915: Only insert the mb() before updating the fence parameter
With a fence, we only need to insert a memory barrier around the actual fence alteration for CPU accesses through the GTT. Performing the barrier in flush-fence was inserting unnecessary and expensive barriers for never fenced objects. Note removing the barriers from flush-fence, which was effectively a barrier before every direct access through the GTT, revealed that we where missing a barrier before the first access through the GTT. Lack of that barrier was sufficient to cause GPU hangs. v2: Add a couple more comments to explain the new barriers Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
This commit is contained in:

committed by
Daniel Vetter

parent
1f83fee08d
commit
d0a57789d5
@@ -2611,9 +2611,22 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg,
|
|||||||
POSTING_READ(FENCE_REG_830_0 + reg * 4);
|
POSTING_READ(FENCE_REG_830_0 + reg * 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
|
||||||
|
{
|
||||||
|
return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
|
||||||
|
}
|
||||||
|
|
||||||
static void i915_gem_write_fence(struct drm_device *dev, int reg,
|
static void i915_gem_write_fence(struct drm_device *dev, int reg,
|
||||||
struct drm_i915_gem_object *obj)
|
struct drm_i915_gem_object *obj)
|
||||||
{
|
{
|
||||||
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
||||||
|
|
||||||
|
/* Ensure that all CPU reads are completed before installing a fence
|
||||||
|
* and all writes before removing the fence.
|
||||||
|
*/
|
||||||
|
if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
|
||||||
|
mb();
|
||||||
|
|
||||||
switch (INTEL_INFO(dev)->gen) {
|
switch (INTEL_INFO(dev)->gen) {
|
||||||
case 7:
|
case 7:
|
||||||
case 6:
|
case 6:
|
||||||
@@ -2623,6 +2636,12 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg,
|
|||||||
case 2: i830_write_fence_reg(dev, reg, obj); break;
|
case 2: i830_write_fence_reg(dev, reg, obj); break;
|
||||||
default: BUG();
|
default: BUG();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* And similarly be paranoid that no direct access to this region
|
||||||
|
* is reordered to before the fence is installed.
|
||||||
|
*/
|
||||||
|
if (i915_gem_object_needs_mb(obj))
|
||||||
|
mb();
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int fence_number(struct drm_i915_private *dev_priv,
|
static inline int fence_number(struct drm_i915_private *dev_priv,
|
||||||
@@ -2652,7 +2671,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
|
i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
|
||||||
{
|
{
|
||||||
if (obj->last_fenced_seqno) {
|
if (obj->last_fenced_seqno) {
|
||||||
int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
|
int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
|
||||||
@@ -2662,12 +2681,6 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
|
|||||||
obj->last_fenced_seqno = 0;
|
obj->last_fenced_seqno = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Ensure that all CPU reads are completed before installing a fence
|
|
||||||
* and all writes before removing the fence.
|
|
||||||
*/
|
|
||||||
if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
|
|
||||||
mb();
|
|
||||||
|
|
||||||
obj->fenced_gpu_access = false;
|
obj->fenced_gpu_access = false;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -2678,7 +2691,7 @@ i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
|
|||||||
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
|
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
ret = i915_gem_object_flush_fence(obj);
|
ret = i915_gem_object_wait_fence(obj);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
@@ -2752,7 +2765,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
|
|||||||
* will need to serialise the write to the associated fence register?
|
* will need to serialise the write to the associated fence register?
|
||||||
*/
|
*/
|
||||||
if (obj->fence_dirty) {
|
if (obj->fence_dirty) {
|
||||||
ret = i915_gem_object_flush_fence(obj);
|
ret = i915_gem_object_wait_fence(obj);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@@ -2773,7 +2786,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
|
|||||||
if (reg->obj) {
|
if (reg->obj) {
|
||||||
struct drm_i915_gem_object *old = reg->obj;
|
struct drm_i915_gem_object *old = reg->obj;
|
||||||
|
|
||||||
ret = i915_gem_object_flush_fence(old);
|
ret = i915_gem_object_wait_fence(old);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
@@ -3068,6 +3081,13 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
|
|||||||
|
|
||||||
i915_gem_object_flush_cpu_write_domain(obj);
|
i915_gem_object_flush_cpu_write_domain(obj);
|
||||||
|
|
||||||
|
/* Serialise direct access to this object with the barriers for
|
||||||
|
* coherent writes from the GPU, by effectively invalidating the
|
||||||
|
* GTT domain upon first access.
|
||||||
|
*/
|
||||||
|
if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
|
||||||
|
mb();
|
||||||
|
|
||||||
old_write_domain = obj->base.write_domain;
|
old_write_domain = obj->base.write_domain;
|
||||||
old_read_domains = obj->base.read_domains;
|
old_read_domains = obj->base.read_domains;
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user