diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 532 |
1 files changed, 413 insertions, 119 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ba67d1023264..9b413f6fa588 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -20,20 +20,26 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ + +#include <linux/delay.h> #include <linux/kernel.h> #include <linux/firmware.h> -#include <drm/drmP.h> +#include <linux/module.h> +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_gfx.h" #include "soc15.h" #include "soc15d.h" #include "amdgpu_atomfirmware.h" +#include "amdgpu_pm.h" #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" #include "vega10_enum.h" #include "hdp/hdp_4_0_offset.h" +#include "soc15.h" #include "soc15_common.h" #include "clearstate_gfx9.h" #include "v9_structs.h" @@ -96,6 +102,7 @@ MODULE_FIRMWARE("amdgpu/raven2_me.bin"); MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); +MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); static const struct soc15_reg_golden golden_settings_gc_9_0[] = { @@ -302,17 +309,20 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); +static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA10: - soc15_program_register_sequence(adev, - golden_settings_gc_9_0, - ARRAY_SIZE(golden_settings_gc_9_0)); - soc15_program_register_sequence(adev, - golden_settings_gc_9_0_vg10, - ARRAY_SIZE(golden_settings_gc_9_0_vg10)); + if (!amdgpu_virt_support_skip_setting(adev)) { + soc15_program_register_sequence(adev, + golden_settings_gc_9_0, + ARRAY_SIZE(golden_settings_gc_9_0)); + soc15_program_register_sequence(adev, + golden_settings_gc_9_0_vg10, + ARRAY_SIZE(golden_settings_gc_9_0_vg10)); + } break; case CHIP_VEGA12: soc15_program_register_sequence(adev, @@ -417,7 +427,7 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) @@ -588,7 +598,8 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) case CHIP_RAVEN: if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) break; - if ((adev->gfx.rlc_fw_version < 531) || + if ((adev->gfx.rlc_fw_version != 106 && + adev->gfx.rlc_fw_version < 531) || (adev->gfx.rlc_fw_version == 53815) || (adev->gfx.rlc_feature_version < 1) || !adev->gfx.rlc.is_rlc_v2_1) @@ -612,6 +623,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) unsigned int i = 0; uint16_t version_major; uint16_t version_minor; + uint32_t smu_version; DRM_DEBUG("\n"); @@ -682,6 +694,12 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); + else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && + (smu_version >= 0x41e2b)) + /** + *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. + */ + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); else snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); @@ -1458,8 +1476,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) /* GDS reserve memory: 64 bytes alignment */ adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); - adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size; - adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size; + adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size; adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); @@ -1567,7 +1584,7 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) gfx_v9_0_write_data_to_reg(ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), - (adev->gds.mem.total_size + + (adev->gds.gds_size + adev->gfx.ngg.gds_reserve_size)); amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); @@ -1708,7 +1725,7 @@ static int gfx_v9_0_sw_init(void *handle) ring->use_doorbell = true; ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); + &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); if (r) return r; } @@ -1744,7 +1761,7 @@ static int gfx_v9_0_sw_init(void *handle) return r; /* create MQD for all compute queues as wel as KIQ for SRIOV case */ - r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); + r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); if (r) return r; @@ -1781,24 +1798,18 @@ static int gfx_v9_0_sw_fini(void *handle) kfree(ras_if); } - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); - for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - amdgpu_gfx_compute_mqd_sw_fini(adev); + amdgpu_gfx_mqd_sw_fini(adev); amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); amdgpu_gfx_kiq_fini(adev); gfx_v9_0_mec_fini(adev); gfx_v9_0_ngg_fini(adev); - amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, - &adev->gfx.rlc.clear_state_gpu_addr, - (void **)&adev->gfx.rlc.cs_ptr); + amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); if (adev->asic_type == CHIP_RAVEN) { amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, &adev->gfx.rlc.cp_table_gpu_addr, @@ -1834,7 +1845,7 @@ static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh else data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); - WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); } static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) @@ -1902,8 +1913,8 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { soc15_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ - WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); - WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); + WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); + WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); } soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); @@ -1914,7 +1925,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) u32 tmp; int i; - WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); + WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); gfx_v9_0_tiling_mode_table_init(adev); @@ -1931,17 +1942,17 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) if (i == 0) { tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); - WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); - WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0); + WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); + WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); } else { tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); - WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); + WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, (adev->gmc.private_aperture_start >> 48)); tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, (adev->gmc.shared_aperture_start >> 48)); - WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp); + WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); } } soc15_grbm_select(adev, 0, 0, 0, 0); @@ -1957,7 +1968,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) */ gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE, + WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE, (adev->gfx.config.sc_prim_fifo_size_frontend << PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | (adev->gfx.config.sc_prim_fifo_size_backend << @@ -2024,11 +2035,11 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, static void gfx_v9_0_init_csb(struct amdgpu_device *adev) { /* csib */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), adev->gfx.rlc.clear_state_gpu_addr >> 32); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), adev->gfx.rlc.clear_state_size); } @@ -2498,7 +2509,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].sched.ready = false; } - WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); + WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); udelay(50); } @@ -2696,9 +2707,9 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) int i; if (enable) { - WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); } else { - WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, + WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); for (i = 0; i < adev->gfx.num_compute_rings; i++) adev->gfx.compute_ring[i].sched.ready = false; @@ -2759,9 +2770,9 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); tmp |= 0x80; - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); } static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) @@ -2979,67 +2990,67 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) /* disable wptr polling */ WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); /* enable doorbell? */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); /* disable the queue if it's active */ if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); for (j = 0; j < adev->usec_timeout; j++) { if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) break; udelay(1); } - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); } /* set the pointer to the MQD */ - WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, + WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); - WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); /* set MQD vmid to 0 */ - WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, mqd->cp_mqd_control); /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); /* set up the HQD, this is similar to CP_RB0_CNTL */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); /* set the wb address whether it's enabled or not */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->cp_hqd_pq_rptr_report_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->cp_hqd_pq_rptr_report_addr_hi); /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); /* enable the doorbell if requested */ @@ -3050,23 +3061,23 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) (adev->doorbell_index.userqueue_end * 2) << 2); } - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); /* set the vmid for the queue */ - WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); - WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); /* activate the queue */ - WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active); if (ring->use_doorbell) @@ -3083,7 +3094,7 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) /* disable the queue if it's active */ if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); for (j = 0; j < adev->usec_timeout; j++) { if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) @@ -3095,21 +3106,21 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) DRM_DEBUG("KIQ dequeue request failed.\n"); /* Manual disable if dequeue request times out */ - WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); } - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0); } - WREG32_SOC15(GC, 0, mmCP_HQD_IQ_TIMER, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); return 0; } @@ -3529,6 +3540,279 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); } +static const u32 vgpr_init_compute_shader[] = +{ + 0xb07c0000, 0xbe8000ff, + 0x000000f8, 0xbf110800, + 0x7e000280, 0x7e020280, + 0x7e040280, 0x7e060280, + 0x7e080280, 0x7e0a0280, + 0x7e0c0280, 0x7e0e0280, + 0x80808800, 0xbe803200, + 0xbf84fff5, 0xbf9c0000, + 0xd28c0001, 0x0001007f, + 0xd28d0001, 0x0002027e, + 0x10020288, 0xb8810904, + 0xb7814000, 0xd1196a01, + 0x00000301, 0xbe800087, + 0xbefc00c1, 0xd89c4000, + 0x00020201, 0xd89cc080, + 0x00040401, 0x320202ff, + 0x00000800, 0x80808100, + 0xbf84fff8, 0x7e020280, + 0xbf810000, 0x00000000, +}; + +static const u32 sgpr_init_compute_shader[] = +{ + 0xb07c0000, 0xbe8000ff, + 0x0000005f, 0xbee50080, + 0xbe812c65, 0xbe822c65, + 0xbe832c65, 0xbe842c65, + 0xbe852c65, 0xb77c0005, + 0x80808500, 0xbf84fff8, + 0xbe800080, 0xbf810000, +}; + +static const struct soc15_reg_entry vgpr_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ +}; + +static const struct soc15_reg_entry sgpr_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, +}; + +static const struct soc15_reg_entry sec_ded_counter_registers[] = { + { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, + { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, + { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, +}; + +static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; + int i, r; + + r = amdgpu_ring_alloc(ring, 7); + if (r) { + DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", + ring->name, r); + return r; + } + + WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); + WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); + + amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); + amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | + PACKET3_DMA_DATA_DST_SEL(1) | + PACKET3_DMA_DATA_SRC_SEL(2) | + PACKET3_DMA_DATA_ENGINE(0))); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | + adev->gds.gds_size); + + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + + WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); + + return r; +} + +static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + int r, i, j, k; + unsigned total_size, vgpr_offset, sgpr_offset; + u64 gpu_addr; + + /* only support when RAS is enabled */ + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return 0; + + /* bail if the compute ring is not ready */ + if (!ring->sched.ready) + return 0; + + total_size = + ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; + total_size += + ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; + total_size = ALIGN(total_size, 256); + vgpr_offset = total_size; + total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); + sgpr_offset = total_size; + total_size += sizeof(sgpr_init_compute_shader); + + /* allocate an indirect buffer to put the commands in */ + memset(&ib, 0, sizeof(ib)); + r = amdgpu_ib_get(adev, NULL, total_size, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); + return r; + } + + /* load the compute shaders */ + for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) + ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; + + for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) + ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; + + /* init the ib length to 0 */ + ib.length_dw = 0; + + /* VGPR */ + /* write the register state for the compute dispatch */ + for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); + ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value; + } + /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ + gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); + ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); + ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); + + /* write dispatch packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); + ib.ptr[ib.length_dw++] = 128; /* x */ + ib.ptr[ib.length_dw++] = 1; /* y */ + ib.ptr[ib.length_dw++] = 1; /* z */ + ib.ptr[ib.length_dw++] = + REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); + + /* write CS partial flush packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); + ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); + + /* SGPR */ + /* write the register state for the compute dispatch */ + for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); + ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; + } + /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ + gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); + ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); + ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); + + /* write dispatch packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); + ib.ptr[ib.length_dw++] = 128; /* x */ + ib.ptr[ib.length_dw++] = 1; /* y */ + ib.ptr[ib.length_dw++] = 1; /* z */ + ib.ptr[ib.length_dw++] = + REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); + + /* write CS partial flush packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); + ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); + + /* shedule the ib on the ring */ + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) { + DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); + goto fail; + } + + /* wait for the GPU to finish processing the IB */ + r = dma_fence_wait(f, false); + if (r) { + DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); + goto fail; + } + + /* read back registers to clear the counters */ + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { + for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { + for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { + gfx_v9_0_select_se_sh(adev, j, 0x0, k); + RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); + } + } + } + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); + mutex_unlock(&adev->grbm_idx_mutex); + +fail: + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); + + return r; +} + static int gfx_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -3570,8 +3854,35 @@ static int gfx_v9_0_ecc_late_init(void *handle) return 0; } - if (*ras_if) + r = gfx_v9_0_do_edc_gds_workarounds(adev); + if (r) + return r; + + /* requires IBs so do in late init after IB pool is initialized */ + r = gfx_v9_0_do_edc_gpr_workarounds(adev); + if (r) + return r; + + /* handle resume path. */ + if (*ras_if) { + /* resend ras TA enable cmd during resume. + * prepare to handle failure. + */ + ih_info.head = **ras_if; + r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); + if (r) { + if (r == -EAGAIN) { + /* request a gpu reset. will run again. */ + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__GFX); + return 0; + } + /* fail to enable ras, cleanup all. */ + goto irq; + } + /* enable successfully. continue. */ goto resume; + } *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); if (!*ras_if) @@ -3580,8 +3891,14 @@ static int gfx_v9_0_ecc_late_init(void *handle) **ras_if = ras_block; r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) + if (r) { + if (r == -EAGAIN) { + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__GFX); + r = 0; + } goto feature; + } ih_info.head = **ras_if; fs_info.head = **ras_if; @@ -3590,9 +3907,7 @@ static int gfx_v9_0_ecc_late_init(void *handle) if (r) goto interrupt; - r = amdgpu_ras_debugfs_create(adev, &fs_info); - if (r) - goto debugfs; + amdgpu_ras_debugfs_create(adev, &fs_info); r = amdgpu_ras_sysfs_create(adev, &fs_info); if (r) @@ -3607,14 +3922,13 @@ irq: amdgpu_ras_sysfs_remove(adev, *ras_if); sysfs: amdgpu_ras_debugfs_remove(adev, *ras_if); -debugfs: amdgpu_ras_interrupt_remove_handler(adev, &ih_info); interrupt: amdgpu_ras_feature_enable(adev, *ras_if, 0); feature: kfree(*ras_if); *ras_if = NULL; - return -EINVAL; + return r; } static int gfx_v9_0_late_init(void *handle) @@ -4268,7 +4582,7 @@ static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, struct amdgpu_ring *iring; mutex_lock(&adev->gfx.pipe_reserve_mutex); - pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); + pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0); if (acquire) set_bit(pipe, adev->gfx.pipe_reserve_bitmap); else @@ -4287,20 +4601,20 @@ static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, /* Lower all pipes without a current reservation */ for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { iring = &adev->gfx.gfx_ring[i]; - pipe = amdgpu_gfx_queue_to_bit(adev, - iring->me, - iring->pipe, - 0); + pipe = amdgpu_gfx_mec_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); gfx_v9_0_ring_set_pipe_percent(iring, reserve); } for (i = 0; i < adev->gfx.num_compute_rings; ++i) { iring = &adev->gfx.compute_ring[i]; - pipe = amdgpu_gfx_queue_to_bit(adev, - iring->me, - iring->pipe, - 0); + pipe = amdgpu_gfx_mec_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); gfx_v9_0_ring_set_pipe_percent(iring, reserve); } @@ -4319,8 +4633,8 @@ static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); - WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); @@ -4715,7 +5029,7 @@ static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { switch (type) { - case AMDGPU_CP_IRQ_GFX_EOP: + case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); break; case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: @@ -5056,13 +5370,13 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: - adev->gds.mem.total_size = 0x10000; + adev->gds.gds_size = 0x10000; break; case CHIP_RAVEN: - adev->gds.mem.total_size = 0x1000; + adev->gds.gds_size = 0x1000; break; default: - adev->gds.mem.total_size = 0x10000; + adev->gds.gds_size = 0x10000; break; } @@ -5086,28 +5400,8 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) break; } - adev->gds.gws.total_size = 64; - adev->gds.oa.total_size = 16; - - if (adev->gds.mem.total_size == 64 * 1024) { - adev->gds.mem.gfx_partition_size = 4096; - adev->gds.mem.cs_partition_size = 4096; - - adev->gds.gws.gfx_partition_size = 4; - adev->gds.gws.cs_partition_size = 4; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 1; - } else { - adev->gds.mem.gfx_partition_size = 1024; - adev->gds.mem.cs_partition_size = 1024; - - adev->gds.gws.gfx_partition_size = 16; - adev->gds.gws.cs_partition_size = 16; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 4; - } + adev->gds.gws_size = 64; + adev->gds.oa_size = 16; } static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, |