drm/i915: Write RING_TAIL once per-request
Ignoring the legacy DRI1 code, and a couple of special cases (to be discussed later), all access to the ring is mediated through requests. The first write to a ring will grab a seqno and mark the ring as having an outstanding_lazy_request. Either through explicitly adding a request after an execbuffer or through an implicit wait (either by the CPU or by a semaphore), that sequence of writes will be terminated with a request. So we can ellide all the intervening writes to the tail register and send the entire command stream to the GPU at once. This will reduce the number of *serialising* writes to the tail register by a factor or 3-5 times (depending upon architecture and number of workarounds, context switches, etc involved). This becomes even more noticeable when the register write is overloaded with a number of debugging tools. The astute reader will wonder if it is then possible to overflow the ring with a single command. It is not. When we start a command sequence to the ring, we check for available space and issue a wait in case we have not. The ring wait will in this case be forced to flush the outstanding register write and then poll the ACTHD for sufficient space to continue. The exception to the rule where everything is inside a request are a few initialisation cases where we may want to write GPU commands via the CS before userspace wakes up and page flips. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
This commit is contained in:

committed by
Daniel Vetter

parent
ad0d6dc485
commit
092467327c
@@ -41,6 +41,16 @@ static inline int ring_space(struct intel_ring_buffer *ring)
|
||||
return space;
|
||||
}
|
||||
|
||||
void __intel_ring_advance(struct intel_ring_buffer *ring)
|
||||
{
|
||||
struct drm_i915_private *dev_priv = ring->dev->dev_private;
|
||||
|
||||
ring->tail &= ring->size - 1;
|
||||
if (dev_priv->gpu_error.stop_rings & intel_ring_flag(ring))
|
||||
return;
|
||||
ring->write_tail(ring, ring->tail);
|
||||
}
|
||||
|
||||
static int
|
||||
gen2_render_ring_flush(struct intel_ring_buffer *ring,
|
||||
u32 invalidate_domains,
|
||||
@@ -631,7 +641,7 @@ gen6_add_request(struct intel_ring_buffer *ring)
|
||||
intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
|
||||
intel_ring_emit(ring, ring->outstanding_lazy_seqno);
|
||||
intel_ring_emit(ring, MI_USER_INTERRUPT);
|
||||
intel_ring_advance(ring);
|
||||
__intel_ring_advance(ring);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -744,7 +754,7 @@ pc_render_add_request(struct intel_ring_buffer *ring)
|
||||
intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
|
||||
intel_ring_emit(ring, ring->outstanding_lazy_seqno);
|
||||
intel_ring_emit(ring, 0);
|
||||
intel_ring_advance(ring);
|
||||
__intel_ring_advance(ring);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -965,7 +975,7 @@ i9xx_add_request(struct intel_ring_buffer *ring)
|
||||
intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
|
||||
intel_ring_emit(ring, ring->outstanding_lazy_seqno);
|
||||
intel_ring_emit(ring, MI_USER_INTERRUPT);
|
||||
intel_ring_advance(ring);
|
||||
__intel_ring_advance(ring);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1414,6 +1424,9 @@ static int ring_wait_for_space(struct intel_ring_buffer *ring, int n)
|
||||
if (ret != -ENOSPC)
|
||||
return ret;
|
||||
|
||||
/* force the tail write in case we have been skipping them */
|
||||
__intel_ring_advance(ring);
|
||||
|
||||
trace_i915_ring_wait_begin(ring);
|
||||
/* With GEM the hangcheck timer should kick us out of the loop,
|
||||
* leaving it early runs the risk of corrupting GEM state (due
|
||||
@@ -1568,17 +1581,6 @@ void intel_ring_init_seqno(struct intel_ring_buffer *ring, u32 seqno)
|
||||
ring->hangcheck.seqno = seqno;
|
||||
}
|
||||
|
||||
void intel_ring_advance(struct intel_ring_buffer *ring)
|
||||
{
|
||||
struct drm_i915_private *dev_priv = ring->dev->dev_private;
|
||||
|
||||
ring->tail &= ring->size - 1;
|
||||
if (dev_priv->gpu_error.stop_rings & intel_ring_flag(ring))
|
||||
return;
|
||||
ring->write_tail(ring, ring->tail);
|
||||
}
|
||||
|
||||
|
||||
static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
|
||||
u32 value)
|
||||
{
|
||||
|
Reference in New Issue
Block a user