drm/amdgpu: add RAS error count reset for gfx_v9_4_3

Add GFX RAS error count reset function. v2: remove xcp operation. only select_se_sh when instance number is more than 1. v3: add check for se_num before select_se_sh. change instance from 0 to xcc_id for register access. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
author: Tao Zhou <tao.zhou1@amd.com> 2023-02-08 14:54:01 +0800
committer: Alex Deucher <alexander.deucher@amd.com> 2023-06-09 10:37:44 -0400
commit: 30feef0676092bdb4b8697e68b8d5864d54f096f (patch)
tree: 7c0e3e05a3b489aa54782e114aa1790b6044078f /drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
parent: bfa84da6185cb1897fcee0ac3815625d162d39f0 (diff)
download: linux-30feef0676092bdb4b8697e68b8d5864d54f096f.tar.gz
linux-30feef0676092bdb4b8697e68b8d5864d54f096f.tar.bz2
linux-30feef0676092bdb4b8697e68b8d5864d54f096f.zip
1 files changed, 38 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index bfd041ba51d6..ac5270d5eff4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -3773,6 +3773,39 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev,
 	err_data->ue_count += ue_count;
 }
 
+static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev,
+					void *ras_error_status, int xcc_id)
+{
+	uint32_t i, j, k;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	for (i = 0; i < ARRAY_SIZE(gfx_v9_4_3_ce_reg_list); i++) {
+		for (j = 0; j < gfx_v9_4_3_ce_reg_list[i].se_num; j++) {
+			for (k = 0; k < gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst; k++) {
+				/* no need to select if instance number is 1 */
+				if (gfx_v9_4_3_ce_reg_list[i].se_num > 1 ||
+				    gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst > 1)
+					gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id);
+
+				amdgpu_ras_inst_reset_ras_error_count(adev,
+					&(gfx_v9_4_3_ce_reg_list[i].reg_entry),
+					1,
+					GET_INST(GC, xcc_id));
+
+				amdgpu_ras_inst_reset_ras_error_count(adev,
+					&(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+					1,
+					GET_INST(GC, xcc_id));
+			}
+		}
+	}
+
+	gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+			xcc_id);
+	mutex_unlock(&adev->grbm_idx_mutex);
+}
+
 static void gfx_v9_4_3_inst_query_ea_err_status(struct amdgpu_device *adev,
 					int xcc_id)
 {
@@ -3882,6 +3915,11 @@ static void gfx_v9_4_3_query_ras_error_count(struct amdgpu_device *adev,
 			gfx_v9_4_3_inst_query_ras_err_count);
 }
 
+static void gfx_v9_4_3_reset_ras_error_count(struct amdgpu_device *adev)
+{
+	amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_count);
+}
+
 static void gfx_v9_4_3_query_ras_error_status(struct amdgpu_device *adev)
 {
 	amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_query_ras_err_status);
author	Tao Zhou <tao.zhou1@amd.com>	2023-02-08 14:54:01 +0800
committer	Alex Deucher <alexander.deucher@amd.com>	2023-06-09 10:37:44 -0400
commit	30feef0676092bdb4b8697e68b8d5864d54f096f (patch)
tree	7c0e3e05a3b489aa54782e114aa1790b6044078f /drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
parent	bfa84da6185cb1897fcee0ac3815625d162d39f0 (diff)
download	linux-30feef0676092bdb4b8697e68b8d5864d54f096f.tar.gz linux-30feef0676092bdb4b8697e68b8d5864d54f096f.tar.bz2 linux-30feef0676092bdb4b8697e68b8d5864d54f096f.zip