drm/vc4: Fix races when the CS reads from render targets.

With the introduction of bin/render pipelining, the previous job may
not be completed when we start binning the next one.  If the previous
job wrote our VBO, IB, or CS textures, then the binning stage might
get stale or uninitialized results.

Fixes the major rendering failure in glmark2 -b terrain.

Signed-off-by: Eric Anholt <eric@anholt.net>
Fixes: ca26d28bba ("drm/vc4: improve throughput by pipelining binning and rendering jobs")
Cc: stable@vger.kernel.org
This commit is contained in:
Eric Anholt
2016-09-27 09:03:13 -07:00
parent 57b9f56944
commit 7edabee06a
4 changed files with 62 additions and 8 deletions

View File

@@ -267,6 +267,9 @@ validate_indexed_prim_list(VALIDATE_ARGS)
if (!ib)
return -EINVAL;
exec->bin_dep_seqno = max(exec->bin_dep_seqno,
to_vc4_bo(&ib->base)->write_seqno);
if (offset > ib->base.size ||
(ib->base.size - offset) / index_size < length) {
DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n",
@@ -555,8 +558,7 @@ static bool
reloc_tex(struct vc4_exec_info *exec,
void *uniform_data_u,
struct vc4_texture_sample_info *sample,
uint32_t texture_handle_index)
uint32_t texture_handle_index, bool is_cs)
{
struct drm_gem_cma_object *tex;
uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
@@ -714,6 +716,11 @@ reloc_tex(struct vc4_exec_info *exec,
*validated_p0 = tex->paddr + p0;
if (is_cs) {
exec->bin_dep_seqno = max(exec->bin_dep_seqno,
to_vc4_bo(&tex->base)->write_seqno);
}
return true;
fail:
DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
@@ -835,7 +842,8 @@ validate_gl_shader_rec(struct drm_device *dev,
if (!reloc_tex(exec,
uniform_data_u,
&validated_shader->texture_samples[tex],
texture_handles_u[tex])) {
texture_handles_u[tex],
i == 2)) {
return -EINVAL;
}
}
@@ -867,6 +875,9 @@ validate_gl_shader_rec(struct drm_device *dev,
uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
uint32_t max_index;
exec->bin_dep_seqno = max(exec->bin_dep_seqno,
to_vc4_bo(&vbo->base)->write_seqno);
if (state->addr & 0x8)
stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;