diff options
author | Sunil Khatri <sunil.khatri@amd.com> | 2024-04-16 16:14:12 +0530 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2024-04-26 17:22:39 -0400 |
commit | c395dbb68b294d1de9a5ac6c9faaf8ac081123c3 (patch) | |
tree | 611975e7db59412a27ef49d3b977720ae229e91f /drivers/gpu/drm/amd/amdgpu | |
parent | e21d253bd74bd422347d202ea2205cdc7623eed2 (diff) | |
download | linux-c395dbb68b294d1de9a5ac6c9faaf8ac081123c3.tar.gz linux-c395dbb68b294d1de9a5ac6c9faaf8ac081123c3.tar.bz2 linux-c395dbb68b294d1de9a5ac6c9faaf8ac081123c3.zip |
drm/amdgpu: add support of gfx10 register dump
Adding gfx10 gc registers to be used for register
dump via devcoredump during a gpu reset.
Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 130 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/soc15.h | 2 |
4 files changed, 143 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e0d7f4ee7e16..cac0ca64367b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -139,6 +139,14 @@ enum amdgpu_ss { AMDGPU_SS_DRV_UNLOAD }; +struct amdgpu_hwip_reg_entry { + u32 hwip; + u32 inst; + u32 seg; + u32 reg_offset; + const char *reg_name; +}; + struct amdgpu_watchdog_timer { bool timeout_fatal_disable; uint32_t period; /* maxCycles = (1 << period), the number of cycles before a timeout */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 04a86dff71e6..64f197bbc866 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -433,6 +433,10 @@ struct amdgpu_gfx { uint32_t num_xcc_per_xcp; struct mutex partition_mutex; bool mcbp; /* mid command buffer preemption */ + + /* IP reg dump */ + uint32_t *ip_dump; + uint32_t reg_count; }; struct amdgpu_gfx_ras_reg_entry { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 29384a2c03ec..c0cc3dd32f46 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -276,6 +276,99 @@ MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec2.bin"); MODULE_FIRMWARE("amdgpu/gc_10_3_7_rlc.bin"); +static const struct amdgpu_hwip_reg_entry gc_reg_list_10_1[] = { + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS3), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HPD_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS_2), + SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL0_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL0_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSQG_UTCL0_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL0_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_DEBUG_INTERRUPT_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_3), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_4), + SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_GPM_STAT_2), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SPP_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_BOOTLOAD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_A), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_B), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST) +}; + static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100), @@ -4490,6 +4583,22 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, hw_prio, NULL); } +static void gfx_v10_0_alloc_dump_mem(struct amdgpu_device *adev) +{ + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1); + uint32_t *ptr; + + ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for IP Dump\n"); + adev->gfx.ip_dump = NULL; + adev->gfx.reg_count = 0; + } else { + adev->gfx.ip_dump = ptr; + adev->gfx.reg_count = reg_count; + } +} + static int gfx_v10_0_sw_init(void *handle) { int i, j, k, r, ring_id = 0; @@ -4642,6 +4751,8 @@ static int gfx_v10_0_sw_init(void *handle) gfx_v10_0_gpu_early_init(adev); + gfx_v10_0_alloc_dump_mem(adev); + return 0; } @@ -4694,6 +4805,8 @@ static int gfx_v10_0_sw_fini(void *handle) gfx_v10_0_free_microcode(adev); + kfree(adev->gfx.ip_dump); + return 0; } @@ -9154,6 +9267,21 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } +static void gfx_v10_ip_dump(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1); + + if (!adev->gfx.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < reg_count; i++) + adev->gfx.ip_dump[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_10_1[i])); + amdgpu_gfx_off_ctrl(adev, true); +} + static const struct amd_ip_funcs gfx_v10_0_ip_funcs = { .name = "gfx_v10_0", .early_init = gfx_v10_0_early_init, @@ -9170,7 +9298,7 @@ static const struct amd_ip_funcs gfx_v10_0_ip_funcs = { .set_clockgating_state = gfx_v10_0_set_clockgating_state, .set_powergating_state = gfx_v10_0_set_powergating_state, .get_clockgating_state = gfx_v10_0_get_clockgating_state, - .dump_ip_state = NULL, + .dump_ip_state = gfx_v10_ip_dump, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 1444b7765e4b..282584a48be0 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -88,6 +88,8 @@ struct soc15_ras_field_entry { }; #define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg +#define SOC15_REG_ENTRY_STR(ip, inst, reg) \ + { ip##_HWIP, inst, reg##_BASE_IDX, reg, #reg } #define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset) |