diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 15 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 6 |
2 files changed, 20 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7838b5f9ad19..aff9d70347ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -88,6 +88,8 @@ MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 +#define AMDGPU_MAX_RETRY_LIMIT 2 +#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL) const char *amdgpu_asic_name[] = { "TAHITI", @@ -4366,7 +4368,9 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, { int r; struct amdgpu_hive_info *hive = NULL; + int retry_limit = 0; +retry: amdgpu_amdkfd_pre_reset(adev); amdgpu_amdkfd_pre_reset(adev); @@ -4415,6 +4419,14 @@ error: } amdgpu_virt_release_full_gpu(adev, true); + if (AMDGPU_RETRY_SRIOV_RESET(r)) { + if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) { + retry_limit++; + goto retry; + } else + DRM_ERROR("GPU reset retry is beyond the retry limit\n"); + } + return r; } @@ -5206,6 +5218,9 @@ skip_hw_reset: drm_helper_resume_force_mode(adev_to_drm(tmp_adev)); } + if (tmp_adev->asic_reset_res) + r = tmp_adev->asic_reset_res; + tmp_adev->asic_reset_res = 0; if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index bfc47bea23db..4870e093213d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -37,6 +37,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) struct amdgpu_task_info ti; struct amdgpu_device *adev = ring->adev; int idx; + int r; if (!drm_dev_enter(adev_to_drm(adev), &idx)) { DRM_INFO("%s - device unplugged skipping recovery on scheduler:%s", @@ -63,7 +64,10 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) ti.process_name, ti.tgid, ti.task_name, ti.pid); if (amdgpu_device_should_recover_gpu(ring->adev)) { - amdgpu_device_gpu_recover(ring->adev, job); + r = amdgpu_device_gpu_recover(ring->adev, job); + if (r) + DRM_ERROR("GPU Recovery Failed: %d\n", r); + } else { drm_sched_suspend_timeout(&ring->sched); if (amdgpu_sriov_vf(adev)) |