summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-09-19 16:24:24 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-09-19 16:24:24 -0700
commit574cc4539762561d96b456dbc0544d8898bd4c6e (patch)
tree07d84db8cf9fd30cbde6f539ce3a3f6116593e41 /drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
parent3c2edc36a77420d8be05d656019dbc8c31535992 (diff)
parent945b584c94f8c665b2df3834a8a6a8faf256cd5f (diff)
downloadlinux-574cc4539762561d96b456dbc0544d8898bd4c6e.tar.gz
linux-574cc4539762561d96b456dbc0544d8898bd4c6e.tar.bz2
linux-574cc4539762561d96b456dbc0544d8898bd4c6e.zip
Merge tag 'drm-next-2019-09-18' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "This is the main pull request for 5.4-rc1 merge window. I don't think there is anything outstanding so next week should just be fixes, but we'll see if I missed anything. I landed some fixes earlier in the week but got delayed writing summary and sending it out, due to a mix of sick kid and jetlag! There are some fixes pending, but I'd rather get the main merge out of the way instead of delaying it longer. It's also pretty large in commit count and new amd header file size. The largest thing is four new amdgpu products (navi12/14, arcturus and renoir APU support). Otherwise it's pretty much lots of work across the board, i915 has started landing tigerlake support, lots of icelake fixes and lots of locking reworking for future gpu support, lots of header file rework (drmP.h is nearly gone), some old legacy hacks (DRM_WAIT_ON) have been put into the places they are needed. uapi: - content protection type property for HDCP core: - rework include dependencies - lots of drmP.h removals - link rate calculation robustness fix - make fb helper map only when required - add connector->DDC adapter link - DRM_WAIT_ON removed - drop DRM_AUTH usage from drivers dma-buf: - reservation object fence helper dma-fence: - shrink dma_fence struct - merge signal functions - store timestamps in dma_fence - selftests ttm: - embed drm_get_object struct into ttm_buffer_object - release_notify callback bridges: - sii902x - audio graph card support - tc358767 - aux data handling rework - ti-snd64dsi86 - debugfs support, DSI mode flags support panels: - Support for GiantPlus GPM940B0, Sharp LQ070Y3DG3B, Ortustech COM37H3M, Novatek NT39016, Sharp LS020B1DD01D, Raydium RM67191, Boe Himax8279d, Sharp LD-D5116Z01B - TI nspire, NEC NL8048HL11, LG Philips LB035Q02, Sharp LS037V7DW01, Sony ACX565AKM, Toppoly TD028TTEC1 Toppoly TD043MTEA1 i915: - Initial tigerlake platform support - Locking simplification work, general all over refactoring. - Selftests - HDCP debug info improvements - DSI properties - Icelake display PLL fixes, colorspace fixes, bandwidth fixes, DSI suspend/resume - GuC fixes - Perf fixes - ElkhartLake enablement - DP MST fixes - GVT - command parser enhancements amdgpu: - add wipe memory on release flag for buffer creation - Navi12/14 support (may be marked experimental) - Arcturus support - Renoir APU support - mclk DPM for Navi - DC display fixes - Raven scatter/gather support - RAS support for GFX - Navi12 + Arcturus power features - GPU reset for Picasso - smu11 i2c controller support amdkfd: - navi12/14 support - Arcturus support radeon: - kexec fix nouveau: - improved display color management - detect lack of GPU power cables vmwgfx: - evicition priority support - remove unused security feature msm: - msm8998 display support - better async commit support for cursor updates etnaviv: - per-process address space support - performance counter fixes - softpin support mcde: - DCS transfers fix exynos: - drmP.h cleanup lima: - reduce logging kirin: - misc clenaups komeda: - dual-link support - DT memory regions hisilicon: - misc fixes imx: - IPUv3 image converter fixes - 32-bit RGB V4L2 pixel format support ingenic: - more support for panel related cases mgag200: - cursor support fix panfrost: - export GPU features register to userspace - gpu heap allocations - per-fd address space support pl111: - CLD pads wiring support removed from DT rockchip: - rework to use DRM PSR helpers - fix bug in VOP_WIN_GET macro - DSI DT binding rework sun4i: - improve support for color encoding and range - DDC enabled GPIO tinydrm: - rework SPI support - improve MIPI-DBI support - moved to drm/tiny vkms: - rework CRC tracking dw-hdmi: - get_eld and i2s improvements gm12u320: - misc fixes meson: - global code cleanup - vpu feature detect omap: - alpha/pixel blend mode properties rcar-du: - misc fixes" * tag 'drm-next-2019-09-18' of git://anongit.freedesktop.org/drm/drm: (2112 commits) drm/nouveau/bar/gm20b: Avoid BAR1 teardown during init drm/nouveau: Fix ordering between TTM and GEM release drm/nouveau/prime: Extend DMA reservation object lock drm/nouveau: Fix fallout from reservation object rework drm/nouveau/kms/nv50-: Don't create MSTMs for eDP connectors drm/i915: Use NOEVICT for first pass on attemping to pin a GGTT mmap drm/i915: to make vgpu ppgtt notificaiton as atomic operation drm/i915: Flush the existing fence before GGTT read/write drm/i915: Hold irq-off for the entire fake lock period drm/i915/gvt: update RING_START reg of vGPU when the context is submitted to i915 drm/i915/gvt: update vgpu workload head pointer correctly drm/mcde: Fix DSI transfers drm/msm: Use the correct dma_sync calls harder drm/msm: remove unlikely() from WARN_ON() conditions drm/msm/dsi: Fix return value check for clk_get_parent drm/msm: add atomic traces drm/msm/dpu: async commit support drm/msm: async commit support drm/msm: split power control from prepare/complete_commit drm/msm: add kms->flush_commit() ...
Diffstat (limited to 'drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c343
1 files changed, 201 insertions, 142 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 41dab9ea33cd..b5f6937369ea 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -5,7 +5,7 @@
*/
#include <linux/intel-iommu.h>
-#include <linux/reservation.h>
+#include <linux/dma-resv.h>
#include <linux/sync_file.h>
#include <linux/uaccess.h>
@@ -16,13 +16,15 @@
#include "gem/i915_gem_ioctls.h"
#include "gt/intel_context.h"
+#include "gt/intel_engine_pool.h"
+#include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h"
-#include "i915_gem_ioctls.h"
+#include "i915_drv.h"
#include "i915_gem_clflush.h"
#include "i915_gem_context.h"
+#include "i915_gem_ioctls.h"
#include "i915_trace.h"
-#include "intel_drv.h"
enum {
FORCE_CPU_RELOC = 1,
@@ -222,7 +224,6 @@ struct i915_execbuffer {
struct intel_engine_cs *engine; /** engine to queue the request to */
struct intel_context *context; /* logical state for the request */
struct i915_gem_context *gem_context; /** caller's context */
- struct i915_address_space *vm; /** GTT and vma for the request */
struct i915_request *request; /** our request to build */
struct i915_vma *batch; /** identity of the batch obj/vma */
@@ -696,7 +697,7 @@ static int eb_reserve(struct i915_execbuffer *eb)
case 1:
/* Too fragmented, unbind everything and retry */
- err = i915_gem_evict_vm(eb->vm);
+ err = i915_gem_evict_vm(eb->context->vm);
if (err)
return err;
break;
@@ -724,12 +725,8 @@ static int eb_select_context(struct i915_execbuffer *eb)
return -ENOENT;
eb->gem_context = ctx;
- if (ctx->vm) {
- eb->vm = ctx->vm;
+ if (ctx->vm)
eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
- } else {
- eb->vm = &eb->i915->ggtt.vm;
- }
eb->context_flags = 0;
if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags))
@@ -738,63 +735,6 @@ static int eb_select_context(struct i915_execbuffer *eb)
return 0;
}
-static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
-{
- struct i915_request *rq;
-
- /*
- * Completely unscientific finger-in-the-air estimates for suitable
- * maximum user request size (to avoid blocking) and then backoff.
- */
- if (intel_ring_update_space(ring) >= PAGE_SIZE)
- return NULL;
-
- /*
- * Find a request that after waiting upon, there will be at least half
- * the ring available. The hysteresis allows us to compete for the
- * shared ring and should mean that we sleep less often prior to
- * claiming our resources, but not so long that the ring completely
- * drains before we can submit our next request.
- */
- list_for_each_entry(rq, &ring->request_list, ring_link) {
- if (__intel_ring_space(rq->postfix,
- ring->emit, ring->size) > ring->size / 2)
- break;
- }
- if (&rq->ring_link == &ring->request_list)
- return NULL; /* weird, we will check again later for real */
-
- return i915_request_get(rq);
-}
-
-static int eb_wait_for_ring(const struct i915_execbuffer *eb)
-{
- struct i915_request *rq;
- int ret = 0;
-
- /*
- * Apply a light amount of backpressure to prevent excessive hogs
- * from blocking waiting for space whilst holding struct_mutex and
- * keeping all of their resources pinned.
- */
-
- rq = __eb_wait_for_ring(eb->context->ring);
- if (rq) {
- mutex_unlock(&eb->i915->drm.struct_mutex);
-
- if (i915_request_wait(rq,
- I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT) < 0)
- ret = -EINTR;
-
- i915_request_put(rq);
-
- mutex_lock(&eb->i915->drm.struct_mutex);
- }
-
- return ret;
-}
-
static int eb_lookup_vmas(struct i915_execbuffer *eb)
{
struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma;
@@ -831,7 +771,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
goto err_vma;
}
- vma = i915_vma_instance(obj, eb->vm, NULL);
+ vma = i915_vma_instance(obj, eb->context->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_obj;
@@ -994,7 +934,7 @@ static void reloc_gpu_flush(struct reloc_cache *cache)
__i915_gem_object_flush_map(cache->rq->batch->obj, 0, cache->rq_size);
i915_gem_object_unpin_map(cache->rq->batch->obj);
- i915_gem_chipset_flush(cache->rq->i915);
+ intel_gt_chipset_flush(cache->rq->engine->gt);
i915_request_add(cache->rq);
cache->rq = NULL;
@@ -1018,11 +958,12 @@ static void reloc_cache_reset(struct reloc_cache *cache)
kunmap_atomic(vaddr);
i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
} else {
- wmb();
+ struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+
+ intel_gt_flush_ggtt_writes(ggtt->vm.gt);
io_mapping_unmap_atomic((void __iomem *)vaddr);
- if (cache->node.allocated) {
- struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+ if (cache->node.allocated) {
ggtt->vm.clear_range(&ggtt->vm,
cache->node.start,
cache->node.size);
@@ -1077,11 +1018,15 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
void *vaddr;
if (cache->vaddr) {
+ intel_gt_flush_ggtt_writes(ggtt->vm.gt);
io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
} else {
struct i915_vma *vma;
int err;
+ if (i915_gem_object_is_tiled(obj))
+ return ERR_PTR(-EINVAL);
+
if (use_cpu_reloc(cache, obj))
return NULL;
@@ -1093,8 +1038,8 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
PIN_MAPPABLE |
- PIN_NONBLOCK |
- PIN_NONFAULT);
+ PIN_NONBLOCK /* NOWARN */ |
+ PIN_NOEVICT);
if (IS_ERR(vma)) {
memset(&cache->node, 0, sizeof(cache->node));
err = drm_mm_insert_node_in_range
@@ -1105,12 +1050,6 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
if (err) /* no inactive aperture space, use cpu reloc */
return NULL;
} else {
- err = i915_vma_put_fence(vma);
- if (err) {
- i915_vma_unpin(vma);
- return ERR_PTR(err);
- }
-
cache->node.start = vma->node.start;
cache->node.mm = (void *)vma;
}
@@ -1118,7 +1057,6 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
offset = cache->node.start;
if (cache->node.allocated) {
- wmb();
ggtt->vm.insert_page(&ggtt->vm,
i915_gem_object_get_dma_address(obj, page),
offset, I915_CACHE_NONE, 0);
@@ -1201,25 +1139,26 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
unsigned int len)
{
struct reloc_cache *cache = &eb->reloc_cache;
- struct drm_i915_gem_object *obj;
+ struct intel_engine_pool_node *pool;
struct i915_request *rq;
struct i915_vma *batch;
u32 *cmd;
int err;
- obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
+ pool = intel_engine_pool_get(&eb->engine->pool, PAGE_SIZE);
+ if (IS_ERR(pool))
+ return PTR_ERR(pool);
- cmd = i915_gem_object_pin_map(obj,
+ cmd = i915_gem_object_pin_map(pool->obj,
cache->has_llc ?
I915_MAP_FORCE_WB :
I915_MAP_FORCE_WC);
- i915_gem_object_unpin_pages(obj);
- if (IS_ERR(cmd))
- return PTR_ERR(cmd);
+ if (IS_ERR(cmd)) {
+ err = PTR_ERR(cmd);
+ goto out_pool;
+ }
- batch = i915_vma_instance(obj, vma->vm, NULL);
+ batch = i915_vma_instance(pool->obj, vma->vm, NULL);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto err_unmap;
@@ -1235,6 +1174,10 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
goto err_unpin;
}
+ err = intel_engine_pool_mark_active(pool, rq);
+ if (err)
+ goto err_request;
+
err = reloc_move_to_gpu(rq, vma);
if (err)
goto err_request;
@@ -1246,8 +1189,9 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
goto skip_request;
i915_vma_lock(batch);
- GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
- err = i915_vma_move_to_active(batch, rq, 0);
+ err = i915_request_await_object(rq, batch->obj, false);
+ if (err == 0)
+ err = i915_vma_move_to_active(batch, rq, 0);
i915_vma_unlock(batch);
if (err)
goto skip_request;
@@ -1260,7 +1204,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
cache->rq_size = 0;
/* Return with batch mapping (cmd) still pinned */
- return 0;
+ goto out_pool;
skip_request:
i915_request_skip(rq, err);
@@ -1269,7 +1213,9 @@ err_request:
err_unpin:
i915_vma_unpin(batch);
err_unmap:
- i915_gem_object_unpin_map(obj);
+ i915_gem_object_unpin_map(pool->obj);
+out_pool:
+ intel_engine_pool_put(pool);
return err;
}
@@ -1317,7 +1263,7 @@ relocate_entry(struct i915_vma *vma,
if (!eb->reloc_cache.vaddr &&
(DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
- !reservation_object_test_signaled_rcu(vma->resv, true))) {
+ !dma_resv_test_signaled_rcu(vma->resv, true))) {
const unsigned int gen = eb->reloc_cache.gen;
unsigned int len;
u32 *batch;
@@ -1952,7 +1898,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
eb->exec = NULL;
/* Unconditionally flush any chipset caches (for streaming writes). */
- i915_gem_chipset_flush(eb->i915);
+ intel_gt_chipset_flush(eb->engine->gt);
return 0;
err_skip:
@@ -2011,18 +1957,17 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
{
- struct drm_i915_gem_object *shadow_batch_obj;
+ struct intel_engine_pool_node *pool;
struct i915_vma *vma;
int err;
- shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool,
- PAGE_ALIGN(eb->batch_len));
- if (IS_ERR(shadow_batch_obj))
- return ERR_CAST(shadow_batch_obj);
+ pool = intel_engine_pool_get(&eb->engine->pool, eb->batch_len);
+ if (IS_ERR(pool))
+ return ERR_CAST(pool);
err = intel_engine_cmd_parser(eb->engine,
eb->batch->obj,
- shadow_batch_obj,
+ pool->obj,
eb->batch_start_offset,
eb->batch_len,
is_master);
@@ -2031,12 +1976,12 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
vma = NULL;
else
vma = ERR_PTR(err);
- goto out;
+ goto err;
}
- vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
+ vma = i915_gem_object_ggtt_pin(pool->obj, NULL, 0, 0, 0);
if (IS_ERR(vma))
- goto out;
+ goto err;
eb->vma[eb->buffer_count] = i915_vma_get(vma);
eb->flags[eb->buffer_count] =
@@ -2044,16 +1989,24 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
vma->exec_flags = &eb->flags[eb->buffer_count];
eb->buffer_count++;
-out:
- i915_gem_object_unpin_pages(shadow_batch_obj);
+ vma->private = pool;
+ return vma;
+
+err:
+ intel_engine_pool_put(pool);
return vma;
}
static void
add_to_client(struct i915_request *rq, struct drm_file *file)
{
- rq->file_priv = file->driver_priv;
- list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list);
+ struct drm_i915_file_private *file_priv = file->driver_priv;
+
+ rq->file_priv = file_priv;
+
+ spin_lock(&file_priv->mm.lock);
+ list_add_tail(&rq->client_link, &file_priv->mm.request_list);
+ spin_unlock(&file_priv->mm.lock);
}
static int eb_submit(struct i915_execbuffer *eb)
@@ -2093,6 +2046,12 @@ static int eb_submit(struct i915_execbuffer *eb)
return 0;
}
+static int num_vcs_engines(const struct drm_i915_private *i915)
+{
+ return hweight64(INTEL_INFO(i915)->engine_mask &
+ GENMASK_ULL(VCS0 + I915_MAX_VCS - 1, VCS0));
+}
+
/*
* Find one BSD ring to dispatch the corresponding BSD command.
* The engine index is returned.
@@ -2105,8 +2064,8 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
/* Check whether the file_priv has already selected one ring. */
if ((int)file_priv->bsd_engine < 0)
- file_priv->bsd_engine = atomic_fetch_xor(1,
- &dev_priv->mm.bsd_engine_dispatch_index);
+ file_priv->bsd_engine =
+ get_random_int() % num_vcs_engines(dev_priv);
return file_priv->bsd_engine;
}
@@ -2119,15 +2078,80 @@ static const enum intel_engine_id user_ring_map[] = {
[I915_EXEC_VEBOX] = VECS0
};
-static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
+static struct i915_request *eb_throttle(struct intel_context *ce)
+{
+ struct intel_ring *ring = ce->ring;
+ struct intel_timeline *tl = ce->timeline;
+ struct i915_request *rq;
+
+ /*
+ * Completely unscientific finger-in-the-air estimates for suitable
+ * maximum user request size (to avoid blocking) and then backoff.
+ */
+ if (intel_ring_update_space(ring) >= PAGE_SIZE)
+ return NULL;
+
+ /*
+ * Find a request that after waiting upon, there will be at least half
+ * the ring available. The hysteresis allows us to compete for the
+ * shared ring and should mean that we sleep less often prior to
+ * claiming our resources, but not so long that the ring completely
+ * drains before we can submit our next request.
+ */
+ list_for_each_entry(rq, &tl->requests, link) {
+ if (rq->ring != ring)
+ continue;
+
+ if (__intel_ring_space(rq->postfix,
+ ring->emit, ring->size) > ring->size / 2)
+ break;
+ }
+ if (&rq->link == &tl->requests)
+ return NULL; /* weird, we will check again later for real */
+
+ return i915_request_get(rq);
+}
+
+static int
+__eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
{
int err;
+ if (likely(atomic_inc_not_zero(&ce->pin_count)))
+ return 0;
+
+ err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex);
+ if (err)
+ return err;
+
+ err = __intel_context_do_pin(ce);
+ mutex_unlock(&eb->i915->drm.struct_mutex);
+
+ return err;
+}
+
+static void
+__eb_unpin_context(struct i915_execbuffer *eb, struct intel_context *ce)
+{
+ if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
+ return;
+
+ mutex_lock(&eb->i915->drm.struct_mutex);
+ intel_context_unpin(ce);
+ mutex_unlock(&eb->i915->drm.struct_mutex);
+}
+
+static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
+{
+ struct intel_timeline *tl;
+ struct i915_request *rq;
+ int err;
+
/*
* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
* EIO if the GPU is already wedged.
*/
- err = i915_terminally_wedged(eb->i915);
+ err = intel_gt_terminally_wedged(ce->engine->gt);
if (err)
return err;
@@ -2136,18 +2160,64 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
* GGTT space, so do this first before we reserve a seqno for
* ourselves.
*/
- err = intel_context_pin(ce);
+ err = __eb_pin_context(eb, ce);
if (err)
return err;
+ /*
+ * Take a local wakeref for preparing to dispatch the execbuf as
+ * we expect to access the hardware fairly frequently in the
+ * process, and require the engine to be kept awake between accesses.
+ * Upon dispatch, we acquire another prolonged wakeref that we hold
+ * until the timeline is idle, which in turn releases the wakeref
+ * taken on the engine, and the parent device.
+ */
+ tl = intel_context_timeline_lock(ce);
+ if (IS_ERR(tl)) {
+ err = PTR_ERR(tl);
+ goto err_unpin;
+ }
+
+ intel_context_enter(ce);
+ rq = eb_throttle(ce);
+
+ intel_context_timeline_unlock(tl);
+
+ if (rq) {
+ if (i915_request_wait(rq,
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT) < 0) {
+ i915_request_put(rq);
+ err = -EINTR;
+ goto err_exit;
+ }
+
+ i915_request_put(rq);
+ }
+
eb->engine = ce->engine;
eb->context = ce;
return 0;
+
+err_exit:
+ mutex_lock(&tl->mutex);
+ intel_context_exit(ce);
+ intel_context_timeline_unlock(tl);
+err_unpin:
+ __eb_unpin_context(eb, ce);
+ return err;
}
-static void eb_unpin_context(struct i915_execbuffer *eb)
+static void eb_unpin_engine(struct i915_execbuffer *eb)
{
- intel_context_unpin(eb->context);
+ struct intel_context *ce = eb->context;
+ struct intel_timeline *tl = ce->timeline;
+
+ mutex_lock(&tl->mutex);
+ intel_context_exit(ce);
+ mutex_unlock(&tl->mutex);
+
+ __eb_unpin_context(eb, ce);
}
static unsigned int
@@ -2165,7 +2235,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
return -1;
}
- if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(i915, VCS1)) {
+ if (user_ring_id == I915_EXEC_BSD && num_vcs_engines(i915) > 1) {
unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
@@ -2192,9 +2262,9 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
}
static int
-eb_select_engine(struct i915_execbuffer *eb,
- struct drm_file *file,
- struct drm_i915_gem_execbuffer2 *args)
+eb_pin_engine(struct i915_execbuffer *eb,
+ struct drm_file *file,
+ struct drm_i915_gem_execbuffer2 *args)
{
struct intel_context *ce;
unsigned int idx;
@@ -2209,7 +2279,7 @@ eb_select_engine(struct i915_execbuffer *eb,
if (IS_ERR(ce))
return PTR_ERR(ce);
- err = eb_pin_context(eb, ce);
+ err = __eb_pin_engine(eb, ce);
intel_context_put(ce);
return err;
@@ -2427,25 +2497,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (unlikely(err))
goto err_destroy;
- /*
- * Take a local wakeref for preparing to dispatch the execbuf as
- * we expect to access the hardware fairly frequently in the
- * process. Upon first dispatch, we acquire another prolonged
- * wakeref that we hold until the GPU has been idle for at least
- * 100ms.
- */
- intel_gt_pm_get(eb.i915);
+ err = eb_pin_engine(&eb, file, args);
+ if (unlikely(err))
+ goto err_context;
err = i915_mutex_lock_interruptible(dev);
if (err)
- goto err_rpm;
-
- err = eb_select_engine(&eb, file, args);
- if (unlikely(err))
- goto err_unlock;
-
- err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
- if (unlikely(err))
goto err_engine;
err = eb_relocate(&eb);
@@ -2572,6 +2629,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
* to explicitly hold another reference here.
*/
eb.request->batch = eb.batch;
+ if (eb.batch->private)
+ intel_engine_pool_mark_active(eb.batch->private, eb.request);
trace_i915_request_queue(eb.request, eb.batch_flags);
err = eb_submit(&eb);
@@ -2596,15 +2655,15 @@ err_request:
err_batch_unpin:
if (eb.batch_flags & I915_DISPATCH_SECURE)
i915_vma_unpin(eb.batch);
+ if (eb.batch->private)
+ intel_engine_pool_put(eb.batch->private);
err_vma:
if (eb.exec)
eb_release_vmas(&eb);
-err_engine:
- eb_unpin_context(&eb);
-err_unlock:
mutex_unlock(&dev->struct_mutex);
-err_rpm:
- intel_gt_pm_put(eb.i915);
+err_engine:
+ eb_unpin_engine(&eb);
+err_context:
i915_gem_context_put(eb.gem_context);
err_destroy:
eb_destroy(&eb);