From b2fe31cf648156331991333c1d87346321cab056 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Wed, 15 Sep 2021 09:08:28 +0800 Subject: drm/amdgpu: Put drm_dev_enter/exit outside hot codepath We hit soft hang while doing memory pressure test on one numa system. After a qucik look, this is because kfd invalid/valid userptr memory frequently with process_info lock hold. Looks like update page table mapping use too much cpu time. perf top says below, 75.81% [kernel] [k] __srcu_read_unlock 6.19% [amdgpu] [k] amdgpu_gmc_set_pte_pde 3.56% [kernel] [k] __srcu_read_lock 2.20% [amdgpu] [k] amdgpu_vm_cpu_update 2.20% [kernel] [k] __sg_page_iter_dma_next 2.15% [drm] [k] drm_dev_enter 1.70% [drm] [k] drm_prime_sg_to_dma_addr_array 1.18% [kernel] [k] __sg_alloc_table_from_pages 1.09% [drm] [k] drm_dev_exit So move drm_dev_enter/exit outside gmc code, instead let caller do it. They are gart_unbind, gart_map, vm_clear_bo, vm_update_pdes and gmc_init_pdb0. vm_bo_update_mapping already calls it. Signed-off-by: xinhui pan Reviewed-and-tested-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6b15cad78de9..a1ddf74bbdba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -800,7 +800,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, struct amdgpu_bo *bo = &vmbo->bo; unsigned entries, ats_entries; uint64_t addr; - int r; + int r, idx; /* Figure out our place in the hierarchy */ if (ancestor->parent) { @@ -845,9 +845,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, return r; } + if (!drm_dev_enter(&adev->ddev, &idx)) + return -ENODEV; + r = vm->update_funcs->map_table(vmbo); if (r) - return r; + goto exit; memset(¶ms, 0, sizeof(params)); params.adev = adev; @@ -856,7 +859,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); if (r) - return r; + goto exit; addr = 0; if (ats_entries) { @@ -872,7 +875,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, r = vm->update_funcs->update(¶ms, vmbo, addr, 0, ats_entries, value, flags); if (r) - return r; + goto exit; addr += ats_entries * 8; } @@ -895,10 +898,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, value, flags); if (r) - return r; + goto exit; } - return vm->update_funcs->commit(¶ms, NULL); + r = vm->update_funcs->commit(¶ms, NULL); +exit: + drm_dev_exit(idx); + return r; } /** @@ -1384,11 +1390,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, struct amdgpu_vm *vm, bool immediate) { struct amdgpu_vm_update_params params; - int r; + int r, idx; if (list_empty(&vm->relocated)) return 0; + if (!drm_dev_enter(&adev->ddev, &idx)) + return -ENODEV; + memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; @@ -1396,7 +1405,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); if (r) - return r; + goto exit; while (!list_empty(&vm->relocated)) { struct amdgpu_vm_bo_base *entry; @@ -1414,10 +1423,13 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, r = vm->update_funcs->commit(¶ms, &vm->last_update); if (r) goto error; + drm_dev_exit(idx); return 0; error: amdgpu_vm_invalidate_pds(adev, vm); +exit: + drm_dev_exit(idx); return r; } -- cgit v1.2.3