drm/amdgpu: apply AMDGPU_IB_FLAG_EMIT_MEM_SYNC to compute IBs too (v3)
Compute IBs need this too. v2: split out version bump v3: squash in emit frame count fixes Signed-off-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
committed by
Alex Deucher
parent
2f9ce2a386
commit
d35745bbec
@@ -450,7 +450,7 @@
|
|||||||
# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
|
# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
|
||||||
# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
|
# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
|
||||||
# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
|
# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
|
||||||
#define PACKET3_AQUIRE_MEM 0x58
|
#define PACKET3_ACQUIRE_MEM 0x58
|
||||||
#define PACKET3_REWIND 0x59
|
#define PACKET3_REWIND 0x59
|
||||||
#define PACKET3_LOAD_UCONFIG_REG 0x5E
|
#define PACKET3_LOAD_UCONFIG_REG 0x5E
|
||||||
#define PACKET3_LOAD_SH_REG 0x5F
|
#define PACKET3_LOAD_SH_REG 0x5F
|
||||||
|
|||||||
@@ -8133,7 +8133,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
|
|||||||
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
|
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
|
||||||
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
|
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
|
||||||
2 + /* gfx_v10_0_ring_emit_vm_flush */
|
2 + /* gfx_v10_0_ring_emit_vm_flush */
|
||||||
8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
|
8 + 8 + 8 + /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
|
||||||
|
8, /* gfx_v10_0_emit_mem_sync */
|
||||||
.emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
|
.emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
|
||||||
.emit_ib = gfx_v10_0_ring_emit_ib_compute,
|
.emit_ib = gfx_v10_0_ring_emit_ib_compute,
|
||||||
.emit_fence = gfx_v10_0_ring_emit_fence,
|
.emit_fence = gfx_v10_0_ring_emit_fence,
|
||||||
@@ -8148,6 +8149,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
|
|||||||
.emit_wreg = gfx_v10_0_ring_emit_wreg,
|
.emit_wreg = gfx_v10_0_ring_emit_wreg,
|
||||||
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
|
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
|
||||||
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
|
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
|
||||||
|
.emit_mem_sync = gfx_v10_0_emit_mem_sync,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
|
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
|
||||||
|
|||||||
@@ -3533,7 +3533,8 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
|
|||||||
5 + 5 + /* hdp flush / invalidate */
|
5 + 5 + /* hdp flush / invalidate */
|
||||||
7 + /* gfx_v6_0_ring_emit_pipeline_sync */
|
7 + /* gfx_v6_0_ring_emit_pipeline_sync */
|
||||||
SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v6_0_ring_emit_vm_flush */
|
SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v6_0_ring_emit_vm_flush */
|
||||||
14 + 14 + 14, /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
|
14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
|
||||||
|
5, /* SURFACE_SYNC */
|
||||||
.emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
|
.emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
|
||||||
.emit_ib = gfx_v6_0_ring_emit_ib,
|
.emit_ib = gfx_v6_0_ring_emit_ib,
|
||||||
.emit_fence = gfx_v6_0_ring_emit_fence,
|
.emit_fence = gfx_v6_0_ring_emit_fence,
|
||||||
@@ -3543,6 +3544,7 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
|
|||||||
.test_ib = gfx_v6_0_ring_test_ib,
|
.test_ib = gfx_v6_0_ring_test_ib,
|
||||||
.insert_nop = amdgpu_ring_insert_nop,
|
.insert_nop = amdgpu_ring_insert_nop,
|
||||||
.emit_wreg = gfx_v6_0_ring_emit_wreg,
|
.emit_wreg = gfx_v6_0_ring_emit_wreg,
|
||||||
|
.emit_mem_sync = gfx_v6_0_emit_mem_sync,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev)
|
static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev)
|
||||||
|
|||||||
@@ -5010,6 +5010,20 @@ static void gfx_v7_0_emit_mem_sync(struct amdgpu_ring *ring)
|
|||||||
amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
|
amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
|
||||||
|
{
|
||||||
|
amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
|
||||||
|
amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
|
||||||
|
PACKET3_TC_ACTION_ENA |
|
||||||
|
PACKET3_SH_KCACHE_ACTION_ENA |
|
||||||
|
PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */
|
||||||
|
amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
|
||||||
|
amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */
|
||||||
|
amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
|
||||||
|
amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
|
||||||
|
amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
|
||||||
|
}
|
||||||
|
|
||||||
static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
|
static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
|
||||||
.name = "gfx_v7_0",
|
.name = "gfx_v7_0",
|
||||||
.early_init = gfx_v7_0_early_init,
|
.early_init = gfx_v7_0_early_init,
|
||||||
@@ -5075,7 +5089,8 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
|
|||||||
5 + /* hdp invalidate */
|
5 + /* hdp invalidate */
|
||||||
7 + /* gfx_v7_0_ring_emit_pipeline_sync */
|
7 + /* gfx_v7_0_ring_emit_pipeline_sync */
|
||||||
CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */
|
CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */
|
||||||
7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
|
7 + 7 + 7 + /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
|
||||||
|
7, /* gfx_v7_0_emit_mem_sync_compute */
|
||||||
.emit_ib_size = 7, /* gfx_v7_0_ring_emit_ib_compute */
|
.emit_ib_size = 7, /* gfx_v7_0_ring_emit_ib_compute */
|
||||||
.emit_ib = gfx_v7_0_ring_emit_ib_compute,
|
.emit_ib = gfx_v7_0_ring_emit_ib_compute,
|
||||||
.emit_fence = gfx_v7_0_ring_emit_fence_compute,
|
.emit_fence = gfx_v7_0_ring_emit_fence_compute,
|
||||||
@@ -5088,6 +5103,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
|
|||||||
.insert_nop = amdgpu_ring_insert_nop,
|
.insert_nop = amdgpu_ring_insert_nop,
|
||||||
.pad_ib = amdgpu_ring_generic_pad_ib,
|
.pad_ib = amdgpu_ring_generic_pad_ib,
|
||||||
.emit_wreg = gfx_v7_0_ring_emit_wreg,
|
.emit_wreg = gfx_v7_0_ring_emit_wreg,
|
||||||
|
.emit_mem_sync = gfx_v7_0_emit_mem_sync_compute,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
|
static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
|
||||||
|
|||||||
@@ -6830,6 +6830,21 @@ static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
|
|||||||
amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
|
amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
|
||||||
|
{
|
||||||
|
amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
|
||||||
|
amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
|
||||||
|
PACKET3_TC_ACTION_ENA |
|
||||||
|
PACKET3_SH_KCACHE_ACTION_ENA |
|
||||||
|
PACKET3_SH_ICACHE_ACTION_ENA |
|
||||||
|
PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */
|
||||||
|
amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
|
||||||
|
amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */
|
||||||
|
amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
|
||||||
|
amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
|
||||||
|
amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
|
||||||
|
}
|
||||||
|
|
||||||
static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
|
static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
|
||||||
.name = "gfx_v8_0",
|
.name = "gfx_v8_0",
|
||||||
.early_init = gfx_v8_0_early_init,
|
.early_init = gfx_v8_0_early_init,
|
||||||
@@ -6912,7 +6927,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
|
|||||||
5 + /* hdp_invalidate */
|
5 + /* hdp_invalidate */
|
||||||
7 + /* gfx_v8_0_ring_emit_pipeline_sync */
|
7 + /* gfx_v8_0_ring_emit_pipeline_sync */
|
||||||
VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
|
VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
|
||||||
7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
|
7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
|
||||||
|
7, /* gfx_v8_0_emit_mem_sync_compute */
|
||||||
.emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
|
.emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
|
||||||
.emit_ib = gfx_v8_0_ring_emit_ib_compute,
|
.emit_ib = gfx_v8_0_ring_emit_ib_compute,
|
||||||
.emit_fence = gfx_v8_0_ring_emit_fence_compute,
|
.emit_fence = gfx_v8_0_ring_emit_fence_compute,
|
||||||
@@ -6925,6 +6941,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
|
|||||||
.insert_nop = amdgpu_ring_insert_nop,
|
.insert_nop = amdgpu_ring_insert_nop,
|
||||||
.pad_ib = amdgpu_ring_generic_pad_ib,
|
.pad_ib = amdgpu_ring_generic_pad_ib,
|
||||||
.emit_wreg = gfx_v8_0_ring_emit_wreg,
|
.emit_wreg = gfx_v8_0_ring_emit_wreg,
|
||||||
|
.emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
|
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
|
||||||
|
|||||||
@@ -6741,7 +6741,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
|
|||||||
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
|
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
|
||||||
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
|
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
|
||||||
2 + /* gfx_v9_0_ring_emit_vm_flush */
|
2 + /* gfx_v9_0_ring_emit_vm_flush */
|
||||||
8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
|
8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
|
||||||
|
7, /* gfx_v9_0_emit_mem_sync */
|
||||||
.emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
|
.emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
|
||||||
.emit_ib = gfx_v9_0_ring_emit_ib_compute,
|
.emit_ib = gfx_v9_0_ring_emit_ib_compute,
|
||||||
.emit_fence = gfx_v9_0_ring_emit_fence,
|
.emit_fence = gfx_v9_0_ring_emit_fence,
|
||||||
@@ -6756,6 +6757,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
|
|||||||
.emit_wreg = gfx_v9_0_ring_emit_wreg,
|
.emit_wreg = gfx_v9_0_ring_emit_wreg,
|
||||||
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
|
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
|
||||||
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
|
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
|
||||||
|
.emit_mem_sync = gfx_v9_0_emit_mem_sync,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
|
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
|
||||||
|
|||||||
@@ -332,7 +332,7 @@
|
|||||||
# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
|
# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
|
||||||
# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
|
# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
|
||||||
# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
|
# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
|
||||||
#define PACKET3_AQUIRE_MEM 0x58
|
#define PACKET3_ACQUIRE_MEM 0x58
|
||||||
#define PACKET3_REWIND 0x59
|
#define PACKET3_REWIND 0x59
|
||||||
#define PACKET3_LOAD_UCONFIG_REG 0x5E
|
#define PACKET3_LOAD_UCONFIG_REG 0x5E
|
||||||
#define PACKET3_LOAD_SH_REG 0x5F
|
#define PACKET3_LOAD_SH_REG 0x5F
|
||||||
|
|||||||
Reference in New Issue
Block a user