From 22c01cc48301f6974868bd4a7b03e29883da1103 Mon Sep 17 00:00:00 2001 From: Anatoli Antonovitch <anatoli.antonovitch@amd.com> Date: Thu, 3 Sep 2015 11:13:31 -0400 Subject: drm/amdgpu: execution barrier after fence v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Insert wait for reg mem after EOP to fix potential issue with vm context switch v2: move wait to vm_flush() use equal instead of greater than. Signed-off-by: Anatoli Antonovitch <anatoli.antonovitch@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 53f07439a512..0f979abca235 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3965,6 +3965,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); amdgpu_ring_write(ring, lower_32_bits(seq)); amdgpu_ring_write(ring, upper_32_bits(seq)); + } /** @@ -4044,6 +4045,17 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); + uint32_t seq = ring->fence_drv.sync_seq[ring->idx]; + uint64_t addr = ring->fence_drv.gpu_addr; + + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ + WAIT_REG_MEM_FUNCTION(3))); /* equal */ + amdgpu_ring_write(ring, addr & 0xfffffffc); + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); + amdgpu_ring_write(ring, seq); + amdgpu_ring_write(ring, 0xffffffff); + amdgpu_ring_write(ring, 4); /* poll interval */ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | -- cgit v1.2.3 From 20a85ff846ffed84fba8637abbb6b1c96436c0ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> Date: Sat, 5 Sep 2015 11:59:50 +0200 Subject: drm/amdgpu: use write confirm for vm_flush() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure the CP waits for the write to be confirmed before invalidating. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 0f979abca235..d6d330bc6daa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4059,7 +4059,8 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | - WRITE_DATA_DST_SEL(0))); + WRITE_DATA_DST_SEL(0)) | + WR_CONFIRM); if (vm_id < 8) { amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); -- cgit v1.2.3 From 72d7668b5ba5180b651e8a07dd6ed62e4e26f207 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> Date: Thu, 3 Sep 2015 17:34:59 +0200 Subject: drm/amdgpu: export reservation_object from dmabuf to ttm (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an extra argument to amdgpu_bo_create, which is only used in amdgpu_prime.c. Port of radeon commit 831b6966a60fe72d85ae3576056b4e4e0775b112. v2: fix up kfd. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index d6d330bc6daa..78e5900d71cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -868,7 +868,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) r = amdgpu_bo_create(adev, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, &adev->gfx.mec.hpd_eop_obj); if (r) { dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); @@ -995,21 +995,21 @@ static int gfx_v8_0_sw_init(void *handle) /* reserve GDS, GWS and OA resource for gfx */ r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GDS, 0, + AMDGPU_GEM_DOMAIN_GDS, 0, NULL, NULL, &adev->gds.gds_gfx_bo); if (r) return r; r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GWS, 0, + AMDGPU_GEM_DOMAIN_GWS, 0, NULL, NULL, &adev->gds.gws_gfx_bo); if (r) return r; r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_OA, 0, + AMDGPU_GEM_DOMAIN_OA, 0, NULL, NULL, &adev->gds.oa_gfx_bo); if (r) return r; @@ -3106,7 +3106,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) sizeof(struct vi_mqd), PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, - &ring->mqd_obj); + NULL, &ring->mqd_obj); if (r) { dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); return r; -- cgit v1.2.3 From 5c3422b0b135b46c8dca9c1d909c1ae84f3561bd Mon Sep 17 00:00:00 2001 From: "monk.liu" <monk.liu@amd.com> Date: Wed, 23 Sep 2015 13:49:58 +0800 Subject: drm/amdgpu: sync ce and me with SWITCH_BUFFER(2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit we used to adopt wait_reg_mem to let CE wait before DE finish page updating, but from Tonga+, CE doesn't support wait_reg_mem package so this logic no longer works. so here is another approach to do same thing: Insert two of SWITCH_BUFFER at both front and end of vm_flush can guarantee that CE not go further to process IB_const before vm_flush done. Insert two of SWITCH_BUFFER also works on CI, so remove legency method to sync CE and ME v2: Insert double SWITCH_BUFFER at front of vm flush as well. Signed-off-by: monk.liu <monk.liu@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 58 ++++++++--------------------------- 1 file changed, 12 insertions(+), 46 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 78e5900d71cd..cb4f68f53f24 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -940,12 +940,6 @@ static int gfx_v8_0_sw_init(void *handle) return r; } - r = amdgpu_wb_get(adev, &adev->gfx.ce_sync_offs); - if (r) { - DRM_ERROR("(%d) gfx.ce_sync_offs wb alloc failed\n", r); - return r; - } - /* set up the gfx ring */ for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; @@ -1033,8 +1027,6 @@ static int gfx_v8_0_sw_fini(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - amdgpu_wb_free(adev, adev->gfx.ce_sync_offs); - gfx_v8_0_mec_fini(adev); return 0; @@ -4006,41 +3998,6 @@ static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring, return true; } -static void gfx_v8_0_ce_sync_me(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - u64 gpu_addr = adev->wb.gpu_addr + adev->gfx.ce_sync_offs * 4; - - /* instruct DE to set a magic number */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(5))); - amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); - amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff); - amdgpu_ring_write(ring, 1); - - /* let CE wait till condition satisfied */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); - amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ - WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ - WAIT_REG_MEM_FUNCTION(3) | /* == */ - WAIT_REG_MEM_ENGINE(2))); /* ce */ - amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); - amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff); - amdgpu_ring_write(ring, 1); - amdgpu_ring_write(ring, 0xffffffff); - amdgpu_ring_write(ring, 4); /* poll interval */ - - /* instruct CE to reset wb of ce_sync to zero */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | - WRITE_DATA_DST_SEL(5) | - WR_CONFIRM)); - amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); - amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff); - amdgpu_ring_write(ring, 0); -} - static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { @@ -4057,6 +4014,14 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, 0xffffffff); amdgpu_ring_write(ring, 4); /* poll interval */ + if (usepfp) { + /* synce CE with ME to prevent CE fetch CEIB before context switch done */ + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + amdgpu_ring_write(ring, 0); + } + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | WRITE_DATA_DST_SEL(0)) | @@ -4096,9 +4061,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, /* sync PFP to ME, otherwise we might get invalid PFP reads */ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); amdgpu_ring_write(ring, 0x0); - - /* synce CE with ME to prevent CE fetch CEIB before context switch done */ - gfx_v8_0_ce_sync_me(ring); + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + amdgpu_ring_write(ring, 0); } } -- cgit v1.2.3