diff options
author | Tao Zhou <tao.zhou1@amd.com> | 2019-08-13 15:46:03 +0800 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2019-08-15 10:51:50 -0500 |
commit | d6e0cbb152b35833a26772c86b338d8297ce609d (patch) | |
tree | badd7cbe79e7247e0ba795db8d2775506c35ffa6 /drivers/gpu/drm/amd/amdgpu | |
parent | f0f50dcfd427f9f4a179048f4b56bff176bacc94 (diff) | |
download | linux-d6e0cbb152b35833a26772c86b338d8297ce609d.tar.gz linux-d6e0cbb152b35833a26772c86b338d8297ce609d.tar.bz2 linux-d6e0cbb152b35833a26772c86b338d8297ce609d.zip |
drm/amdgpu: implement querying ras error count for mmhub
get mmhub ea ras error count by accessing EDC_CNT register
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Guchun Chen <guchun.chen@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index c3a98d964ce5..2bd7ada80088 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -21,11 +21,13 @@ * */ #include "amdgpu.h" +#include "amdgpu_ras.h" #include "mmhub_v1_0.h" #include "mmhub/mmhub_1_0_offset.h" #include "mmhub/mmhub_1_0_sh_mask.h" #include "mmhub/mmhub_1_0_default.h" +#include "mmhub/mmhub_9_4_0_offset.h" #include "vega10_enum.h" #include "soc15_common.h" @@ -33,6 +35,9 @@ #define mmDAGB0_CNTL_MISC2_RV 0x008f #define mmDAGB0_CNTL_MISC2_RV_BASE_IDX 0 +#define EA_EDC_CNT_MASK 0x3 +#define EA_EDC_CNT_SHIFT 0x2 + u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) { u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE); @@ -557,6 +562,56 @@ void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) static void mmhub_v1_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { + int i; + uint32_t ea0_edc_cnt, ea0_edc_cnt2; + uint32_t ea1_edc_cnt, ea1_edc_cnt2; + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + /* EDC CNT will be cleared automatically after read */ + ea0_edc_cnt = RREG32_SOC15(MMHUB, 0, mmMMEA0_EDC_CNT_VG20); + ea0_edc_cnt2 = RREG32_SOC15(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20); + ea1_edc_cnt = RREG32_SOC15(MMHUB, 0, mmMMEA1_EDC_CNT_VG20); + ea1_edc_cnt2 = RREG32_SOC15(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20); + + /* error count of each error type is recorded by 2 bits, + * ce and ue count in EDC_CNT + */ + for (i = 0; i < 5; i++) { + err_data->ce_count += (ea0_edc_cnt & EA_EDC_CNT_MASK); + err_data->ce_count += (ea1_edc_cnt & EA_EDC_CNT_MASK); + ea0_edc_cnt >>= EA_EDC_CNT_SHIFT; + ea1_edc_cnt >>= EA_EDC_CNT_SHIFT; + err_data->ue_count += (ea0_edc_cnt & EA_EDC_CNT_MASK); + err_data->ue_count += (ea1_edc_cnt & EA_EDC_CNT_MASK); + ea0_edc_cnt >>= EA_EDC_CNT_SHIFT; + ea1_edc_cnt >>= EA_EDC_CNT_SHIFT; + } + /* successive ue count in EDC_CNT */ + for (i = 0; i < 5; i++) { + err_data->ue_count += (ea0_edc_cnt & EA_EDC_CNT_MASK); + err_data->ue_count += (ea1_edc_cnt & EA_EDC_CNT_MASK); + ea0_edc_cnt >>= EA_EDC_CNT_SHIFT; + ea1_edc_cnt >>= EA_EDC_CNT_SHIFT; + } + + /* ce and ue count in EDC_CNT2 */ + for (i = 0; i < 3; i++) { + err_data->ce_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK); + err_data->ce_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK); + ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT; + ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT; + err_data->ue_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK); + err_data->ue_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK); + ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT; + ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT; + } + /* successive ue count in EDC_CNT2 */ + for (i = 0; i < 6; i++) { + err_data->ue_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK); + err_data->ue_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK); + ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT; + ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT; + } } const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = { |