summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
diff options
context:
space:
mode:
authorSunil Khatri <sunil.khatri@amd.com>2024-07-31 13:39:20 +0530
committerAlex Deucher <alexander.deucher@amd.com>2024-08-06 10:43:49 -0400
commit847e387e00547b0cc728a5e61f5beb2ff861ed1d (patch)
tree3b2d5660c5ca0099c5f7f06bdb39040311dfa388 /drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
parentb41a382932263b2951bc9e83a22168d579a94865 (diff)
downloadlinux-847e387e00547b0cc728a5e61f5beb2ff861ed1d.tar.gz
linux-847e387e00547b0cc728a5e61f5beb2ff861ed1d.tar.bz2
linux-847e387e00547b0cc728a5e61f5beb2ff861ed1d.zip
drm/amdgpu: optimize the padding for gfx11
Adding NOP packets one by one in the ring does not use the CP efficiently. Solution: Use CP optimization while adding NOP packet's so PFP can discard NOP packets based on information of count from the Header instead of fetching all NOP packets one by one. Reviewed-by: Christian König <christian.koenig@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Cc: Tvrtko Ursulin <tursulin@igalia.com> Cc: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Sunil Khatri <sunil.khatri@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c22
1 files changed, 20 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 4a9766635933..e7c160b9d0fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -481,6 +481,24 @@ static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
amdgpu_ring_write(ring, inv); /* poll interval */
}
+static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+ int i;
+
+ /* Header itself is a NOP packet */
+ if (num_nop == 1) {
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ return;
+ }
+
+ /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+ /* Header is at index 0, followed by num_nops - 1 NOP packet's */
+ for (i = 1; i < num_nop; i++)
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
@@ -6709,7 +6727,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
.test_ring = gfx_v11_0_ring_test_ring,
.test_ib = gfx_v11_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v11_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
.emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
@@ -6751,7 +6769,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
.test_ring = gfx_v11_0_ring_test_ring,
.test_ib = gfx_v11_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v11_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v11_0_ring_emit_wreg,
.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,