summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c75
1 files changed, 46 insertions, 29 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1bbd39b3b0fc..8516c814bc9b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -109,6 +109,7 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
return r;
++(num_ibs[r]);
+ p->gang_leader_idx = r;
return 0;
}
@@ -287,20 +288,18 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
}
}
- if (!p->gang_size)
- return -EINVAL;
+ if (!p->gang_size) {
+ ret = -EINVAL;
+ goto free_partial_kdata;
+ }
for (i = 0; i < p->gang_size; ++i) {
- ret = amdgpu_job_alloc(p->adev, num_ibs[i], &p->jobs[i], vm);
- if (ret)
- goto free_all_kdata;
-
- ret = drm_sched_job_init(&p->jobs[i]->base, p->entities[i],
- &fpriv->vm);
+ ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm,
+ num_ibs[i], &p->jobs[i]);
if (ret)
goto free_all_kdata;
}
- p->gang_leader = p->jobs[p->gang_size - 1];
+ p->gang_leader = p->jobs[p->gang_leader_idx];
if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) {
ret = -ECANCELED;
@@ -430,7 +429,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
dma_fence_put(old);
}
- r = amdgpu_sync_fence(&p->gang_leader->sync, fence);
+ r = amdgpu_sync_fence(&p->sync, fence);
dma_fence_put(fence);
if (r)
return r;
@@ -452,9 +451,20 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
return r;
}
- r = amdgpu_sync_fence(&p->gang_leader->sync, fence);
- dma_fence_put(fence);
+ r = amdgpu_sync_fence(&p->sync, fence);
+ if (r)
+ goto error;
+
+ /*
+ * When we have an explicit dependency it might be necessary to insert a
+ * pipeline sync to make sure that all caches etc are flushed and the
+ * next job actually sees the results from the previous one.
+ */
+ if (fence->context == p->gang_leader->base.entity->fence_context)
+ r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence);
+error:
+ dma_fence_put(fence);
return r;
}
@@ -910,7 +920,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
goto out_free_user_pages;
}
- r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages);
+ r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range);
if (r) {
kvfree(e->user_pages);
e->user_pages = NULL;
@@ -988,10 +998,12 @@ out_free_user_pages:
if (!e->user_pages)
continue;
- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range);
kvfree(e->user_pages);
e->user_pages = NULL;
+ e->range = NULL;
}
+ mutex_unlock(&p->bo_list->bo_list_mutex);
return r;
}
@@ -1101,7 +1113,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&job->sync, fpriv->prt_va->last_pt_update);
+ r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update);
if (r)
return r;
@@ -1112,7 +1124,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update);
+ r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);
if (r)
return r;
}
@@ -1131,7 +1143,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update);
+ r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);
if (r)
return r;
}
@@ -1144,7 +1156,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&job->sync, vm->last_update);
+ r = amdgpu_sync_fence(&p->sync, vm->last_update);
if (r)
return r;
@@ -1176,7 +1188,6 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- struct amdgpu_job *leader = p->gang_leader;
struct amdgpu_bo_list_entry *e;
unsigned int i;
int r;
@@ -1188,22 +1199,21 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
sync_mode = amdgpu_bo_explicit_sync(bo) ?
AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
- r = amdgpu_sync_resv(p->adev, &leader->sync, resv, sync_mode,
+ r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode,
&fpriv->vm);
if (r)
return r;
}
- for (i = 0; i < p->gang_size - 1; ++i) {
- r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]->sync);
+ for (i = 0; i < p->gang_size; ++i) {
+ r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]);
if (r)
return r;
}
- r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_size - 1]);
+ r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);
if (r && r != -ERESTARTSYS)
DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
-
return r;
}
@@ -1237,11 +1247,14 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
for (i = 0; i < p->gang_size; ++i)
drm_sched_job_arm(&p->jobs[i]->base);
- for (i = 0; i < (p->gang_size - 1); ++i) {
+ for (i = 0; i < p->gang_size; ++i) {
struct dma_fence *fence;
+ if (p->jobs[i] == leader)
+ continue;
+
fence = &p->jobs[i]->base.s_fence->scheduled;
- r = amdgpu_sync_fence(&leader->sync, fence);
+ r = drm_sched_job_add_dependency(&leader->base, fence);
if (r)
goto error_cleanup;
}
@@ -1264,7 +1277,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
- r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+ r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range);
+ e->range = NULL;
}
if (r) {
r = -EAGAIN;
@@ -1275,7 +1289,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
list_for_each_entry(e, &p->validated, tv.head) {
/* Everybody except for the gang leader uses READ */
- for (i = 0; i < (p->gang_size - 1); ++i) {
+ for (i = 0; i < p->gang_size; ++i) {
+ if (p->jobs[i] == leader)
+ continue;
+
dma_resv_add_fence(e->tv.bo->base.resv,
&p->jobs[i]->base.s_fence->finished,
DMA_RESV_USAGE_READ);
@@ -1285,7 +1302,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
e->tv.num_shared = 0;
}
- seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_size - 1],
+ seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx],
p->fence);
amdgpu_cs_post_dependencies(p);