diff options
author | Tao Zhou <tao.zhou1@amd.com> | 2024-05-23 17:58:47 +0800 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2024-06-14 16:18:26 -0400 |
commit | 5f7697bbc1a41d4799797204137be85121063f65 (patch) | |
tree | 100a17bb814d5622c72bd96682d0350f4debae9e /drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | |
parent | 5afbbcfe3b6cd00fa52e0375e1e276748929bc50 (diff) | |
download | linux-5f7697bbc1a41d4799797204137be85121063f65.tar.gz linux-5f7697bbc1a41d4799797204137be85121063f65.tar.bz2 linux-5f7697bbc1a41d4799797204137be85121063f65.zip |
drm/amdgpu: trigger mode1 reset for RAS RMA status
Check RMA status in bad page retirement flow.
v2: fix coding bugs in v1.
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 540e0f066b26..20e0e522fb51 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -195,7 +195,8 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); amdgpu_umc_handle_bad_pages(adev, ras_error_status); - if (err_data->ue_count && reset) { + if ((err_data->ue_count || err_data->de_count) && + (reset || (con && con->is_rma))) { con->gpu_reset_flags |= reset; amdgpu_ras_reset_gpu(adev); } @@ -211,6 +212,7 @@ int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, .block = AMDGPU_RAS_BLOCK__UMC, }; struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); uint32_t timeout = timeout_ms; memset(&err_data, 0, sizeof(err_data)); @@ -243,9 +245,7 @@ int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - if (reset) { - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - + if (reset || (err_data.err_addr_cnt && con && con->is_rma)) { con->gpu_reset_flags |= reset; amdgpu_ras_reset_gpu(adev); } |