summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gem
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2021-10-22 06:30:33 +1000
committerDave Airlie <airlied@redhat.com>2021-10-22 06:30:34 +1000
commit6f2f7c83303d2227f47551423e507d77d9ea01c7 (patch)
tree05dff35a0b96494a2419ff5747f80c4648c10cc4 /drivers/gpu/drm/i915/gem
parent94ff371eb8497d02b8cb9d0c2c7370193c98e9f7 (diff)
parentab5d964c001b9efffcbfa4d67a30186b67d79771 (diff)
downloadlinux-6f2f7c83303d2227f47551423e507d77d9ea01c7.tar.gz
linux-6f2f7c83303d2227f47551423e507d77d9ea01c7.tar.bz2
linux-6f2f7c83303d2227f47551423e507d77d9ea01c7.zip
Merge tag 'drm-intel-gt-next-2021-10-21' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
UAPI Changes: - Expose multi-LRC submission interface Similar to the bonded submission interface but simplified. Comes with GuC only implementation for now. See kerneldoc for more details. Userspace changes: https://github.com/intel/media-driver/pull/1252 - Expose logical engine instance to user Needed by the multi-LRC submission interface for GuC Userspace changes: https://github.com/intel/media-driver/pull/1252 Driver Changes: - Fix blank screen booting crashes when CONFIG_CC_OPTIMIZE_FOR_SIZE=y (Hugh) - Add support for multi-LRC submission in the GuC backend (Matt B) - Add extra cache flushing before making pages userspace visible (Matt A, Thomas) - Mark internal GPU object pages dirty so they will be flushed properly (Matt A) - Move remaining debugfs interfaces i915_wedged/i915_forcewake_user into gt (Andi) - Replace the unconditional clflushes with drm_clflush_virt_range() (Ville) - Remove IS_ACTIVE macro completely (Lucas) - Improve kerneldocs for cache_dirty (Matt A) - Add missing includes (Lucas) - Selftest improvements (Matt R, Ran, Matt A) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/YXFmLKoq8Fg9JxSd@jlahtine-mobl.ger.corp.intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/gem')
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_busy.c57
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c227
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context_types.h16
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c9
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c797
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_internal.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.c26
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.h1
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h27
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shmem.c29
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_userptr.c8
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_pages.c7
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c29
13 files changed, 947 insertions, 288 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
index 6234e17259c1..7358bebef15c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
@@ -4,6 +4,8 @@
* Copyright © 2014-2016 Intel Corporation
*/
+#include <linux/dma-fence-array.h>
+
#include "gt/intel_engine.h"
#include "i915_gem_ioctls.h"
@@ -36,7 +38,7 @@ static __always_inline u32 __busy_write_id(u16 id)
}
static __always_inline unsigned int
-__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id))
+__busy_set_if_active(struct dma_fence *fence, u32 (*flag)(u16 id))
{
const struct i915_request *rq;
@@ -46,29 +48,60 @@ __busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id))
* to eventually flush us, but to minimise latency just ask the
* hardware.
*
- * Note we only report on the status of native fences.
+ * Note we only report on the status of native fences and we currently
+ * have two native fences:
+ *
+ * 1. A composite fence (dma_fence_array) constructed of i915 requests
+ * created during a parallel submission. In this case we deconstruct the
+ * composite fence into individual i915 requests and check the status of
+ * each request.
+ *
+ * 2. A single i915 request.
*/
- if (!dma_fence_is_i915(fence))
+ if (dma_fence_is_array(fence)) {
+ struct dma_fence_array *array = to_dma_fence_array(fence);
+ struct dma_fence **child = array->fences;
+ unsigned int nchild = array->num_fences;
+
+ do {
+ struct dma_fence *current_fence = *child++;
+
+ /* Not an i915 fence, can't be busy per above */
+ if (!dma_fence_is_i915(current_fence) ||
+ !test_bit(I915_FENCE_FLAG_COMPOSITE,
+ &current_fence->flags)) {
+ return 0;
+ }
+
+ rq = to_request(current_fence);
+ if (!i915_request_completed(rq))
+ return flag(rq->engine->uabi_class);
+ } while (--nchild);
+
+ /* All requests in array complete, not busy */
return 0;
+ } else {
+ if (!dma_fence_is_i915(fence))
+ return 0;
- /* opencode to_request() in order to avoid const warnings */
- rq = container_of(fence, const struct i915_request, fence);
- if (i915_request_completed(rq))
- return 0;
+ rq = to_request(fence);
+ if (i915_request_completed(rq))
+ return 0;
- /* Beware type-expansion follies! */
- BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class));
- return flag(rq->engine->uabi_class);
+ /* Beware type-expansion follies! */
+ BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class));
+ return flag(rq->engine->uabi_class);
+ }
}
static __always_inline unsigned int
-busy_check_reader(const struct dma_fence *fence)
+busy_check_reader(struct dma_fence *fence)
{
return __busy_set_if_active(fence, __busy_read_flag);
}
static __always_inline unsigned int
-busy_check_writer(const struct dma_fence *fence)
+busy_check_writer(struct dma_fence *fence)
{
if (!fence)
return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index d225d3dd0b40..fb33d0322960 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -556,9 +556,147 @@ set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data)
return 0;
}
+static int
+set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
+ void *data)
+{
+ struct i915_context_engines_parallel_submit __user *ext =
+ container_of_user(base, typeof(*ext), base);
+ const struct set_proto_ctx_engines *set = data;
+ struct drm_i915_private *i915 = set->i915;
+ u64 flags;
+ int err = 0, n, i, j;
+ u16 slot, width, num_siblings;
+ struct intel_engine_cs **siblings = NULL;
+ intel_engine_mask_t prev_mask;
+
+ /* FIXME: This is NIY for execlists */
+ if (!(intel_uc_uses_guc_submission(&i915->gt.uc)))
+ return -ENODEV;
+
+ if (get_user(slot, &ext->engine_index))
+ return -EFAULT;
+
+ if (get_user(width, &ext->width))
+ return -EFAULT;
+
+ if (get_user(num_siblings, &ext->num_siblings))
+ return -EFAULT;
+
+ if (slot >= set->num_engines) {
+ drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n",
+ slot, set->num_engines);
+ return -EINVAL;
+ }
+
+ if (set->engines[slot].type != I915_GEM_ENGINE_TYPE_INVALID) {
+ drm_dbg(&i915->drm,
+ "Invalid placement[%d], already occupied\n", slot);
+ return -EINVAL;
+ }
+
+ if (get_user(flags, &ext->flags))
+ return -EFAULT;
+
+ if (flags) {
+ drm_dbg(&i915->drm, "Unknown flags 0x%02llx", flags);
+ return -EINVAL;
+ }
+
+ for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
+ err = check_user_mbz(&ext->mbz64[n]);
+ if (err)
+ return err;
+ }
+
+ if (width < 2) {
+ drm_dbg(&i915->drm, "Width (%d) < 2\n", width);
+ return -EINVAL;
+ }
+
+ if (num_siblings < 1) {
+ drm_dbg(&i915->drm, "Number siblings (%d) < 1\n",
+ num_siblings);
+ return -EINVAL;
+ }
+
+ siblings = kmalloc_array(num_siblings * width,
+ sizeof(*siblings),
+ GFP_KERNEL);
+ if (!siblings)
+ return -ENOMEM;
+
+ /* Create contexts / engines */
+ for (i = 0; i < width; ++i) {
+ intel_engine_mask_t current_mask = 0;
+ struct i915_engine_class_instance prev_engine;
+
+ for (j = 0; j < num_siblings; ++j) {
+ struct i915_engine_class_instance ci;
+
+ n = i * num_siblings + j;
+ if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
+ err = -EFAULT;
+ goto out_err;
+ }
+
+ siblings[n] =
+ intel_engine_lookup_user(i915, ci.engine_class,
+ ci.engine_instance);
+ if (!siblings[n]) {
+ drm_dbg(&i915->drm,
+ "Invalid sibling[%d]: { class:%d, inst:%d }\n",
+ n, ci.engine_class, ci.engine_instance);
+ err = -EINVAL;
+ goto out_err;
+ }
+
+ if (n) {
+ if (prev_engine.engine_class !=
+ ci.engine_class) {
+ drm_dbg(&i915->drm,
+ "Mismatched class %d, %d\n",
+ prev_engine.engine_class,
+ ci.engine_class);
+ err = -EINVAL;
+ goto out_err;
+ }
+ }
+
+ prev_engine = ci;
+ current_mask |= siblings[n]->logical_mask;
+ }
+
+ if (i > 0) {
+ if (current_mask != prev_mask << 1) {
+ drm_dbg(&i915->drm,
+ "Non contiguous logical mask 0x%x, 0x%x\n",
+ prev_mask, current_mask);
+ err = -EINVAL;
+ goto out_err;
+ }
+ }
+ prev_mask = current_mask;
+ }
+
+ set->engines[slot].type = I915_GEM_ENGINE_TYPE_PARALLEL;
+ set->engines[slot].num_siblings = num_siblings;
+ set->engines[slot].width = width;
+ set->engines[slot].siblings = siblings;
+
+ return 0;
+
+out_err:
+ kfree(siblings);
+
+ return err;
+}
+
static const i915_user_extension_fn set_proto_ctx_engines_extensions[] = {
[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_proto_ctx_engines_balance,
[I915_CONTEXT_ENGINES_EXT_BOND] = set_proto_ctx_engines_bond,
+ [I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT] =
+ set_proto_ctx_engines_parallel_submit,
};
static int set_proto_ctx_engines(struct drm_i915_file_private *fpriv,
@@ -794,6 +932,7 @@ static int intel_context_set_gem(struct intel_context *ce,
GEM_BUG_ON(rcu_access_pointer(ce->gem_context));
RCU_INIT_POINTER(ce->gem_context, ctx);
+ GEM_BUG_ON(intel_context_is_pinned(ce));
ce->ring_size = SZ_16K;
i915_vm_put(ce->vm);
@@ -818,6 +957,25 @@ static int intel_context_set_gem(struct intel_context *ce,
return ret;
}
+static void __unpin_engines(struct i915_gem_engines *e, unsigned int count)
+{
+ while (count--) {
+ struct intel_context *ce = e->engines[count], *child;
+
+ if (!ce || !test_bit(CONTEXT_PERMA_PIN, &ce->flags))
+ continue;
+
+ for_each_child(ce, child)
+ intel_context_unpin(child);
+ intel_context_unpin(ce);
+ }
+}
+
+static void unpin_engines(struct i915_gem_engines *e)
+{
+ __unpin_engines(e, e->num_engines);
+}
+
static void __free_engines(struct i915_gem_engines *e, unsigned int count)
{
while (count--) {
@@ -933,6 +1091,40 @@ free_engines:
return err;
}
+static int perma_pin_contexts(struct intel_context *ce)
+{
+ struct intel_context *child;
+ int i = 0, j = 0, ret;
+
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ ret = intel_context_pin(ce);
+ if (unlikely(ret))
+ return ret;
+
+ for_each_child(ce, child) {
+ ret = intel_context_pin(child);
+ if (unlikely(ret))
+ goto unwind;
+ ++i;
+ }
+
+ set_bit(CONTEXT_PERMA_PIN, &ce->flags);
+
+ return 0;
+
+unwind:
+ intel_context_unpin(ce);
+ for_each_child(ce, child) {
+ if (j++ < i)
+ intel_context_unpin(child);
+ else
+ break;
+ }
+
+ return ret;
+}
+
static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
unsigned int num_engines,
struct i915_gem_proto_engine *pe)
@@ -946,7 +1138,7 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
e->num_engines = num_engines;
for (n = 0; n < num_engines; n++) {
- struct intel_context *ce;
+ struct intel_context *ce, *child;
int ret;
switch (pe[n].type) {
@@ -956,7 +1148,13 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
case I915_GEM_ENGINE_TYPE_BALANCED:
ce = intel_engine_create_virtual(pe[n].siblings,
- pe[n].num_siblings);
+ pe[n].num_siblings, 0);
+ break;
+
+ case I915_GEM_ENGINE_TYPE_PARALLEL:
+ ce = intel_engine_create_parallel(pe[n].siblings,
+ pe[n].num_siblings,
+ pe[n].width);
break;
case I915_GEM_ENGINE_TYPE_INVALID:
@@ -977,6 +1175,30 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
err = ERR_PTR(ret);
goto free_engines;
}
+ for_each_child(ce, child) {
+ ret = intel_context_set_gem(child, ctx, pe->sseu);
+ if (ret) {
+ err = ERR_PTR(ret);
+ goto free_engines;
+ }
+ }
+
+ /*
+ * XXX: Must be done after calling intel_context_set_gem as that
+ * function changes the ring size. The ring is allocated when
+ * the context is pinned. If the ring size is changed after
+ * allocation we have a mismatch of the ring size and will cause
+ * the context to hang. Presumably with a bit of reordering we
+ * could move the perma-pin step to the backend function
+ * intel_engine_create_parallel.
+ */
+ if (pe[n].type == I915_GEM_ENGINE_TYPE_PARALLEL) {
+ ret = perma_pin_contexts(ce);
+ if (ret) {
+ err = ERR_PTR(ret);
+ goto free_engines;
+ }
+ }
}
return e;
@@ -1219,6 +1441,7 @@ static void context_close(struct i915_gem_context *ctx)
/* Flush any concurrent set_engines() */
mutex_lock(&ctx->engines_mutex);
+ unpin_engines(__context_engines_static(ctx));
engines_idle_release(ctx, rcu_replace_pointer(ctx->engines, NULL, 1));
i915_gem_context_set_closed(ctx);
mutex_unlock(&ctx->engines_mutex);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index a627b09c4680..282cdb8a5c5a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -78,13 +78,16 @@ enum i915_gem_engine_type {
/** @I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set */
I915_GEM_ENGINE_TYPE_BALANCED,
+
+ /** @I915_GEM_ENGINE_TYPE_PARALLEL: A parallel engine set */
+ I915_GEM_ENGINE_TYPE_PARALLEL,
};
/**
* struct i915_gem_proto_engine - prototype engine
*
* This struct describes an engine that a context may contain. Engines
- * have three types:
+ * have four types:
*
* - I915_GEM_ENGINE_TYPE_INVALID: Invalid engines can be created but they
* show up as a NULL in i915_gem_engines::engines[i] and any attempt to
@@ -97,6 +100,10 @@ enum i915_gem_engine_type {
*
* - I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set, described
* i915_gem_proto_engine::num_siblings and i915_gem_proto_engine::siblings.
+ *
+ * - I915_GEM_ENGINE_TYPE_PARALLEL: A parallel submission engine set, described
+ * i915_gem_proto_engine::width, i915_gem_proto_engine::num_siblings, and
+ * i915_gem_proto_engine::siblings.
*/
struct i915_gem_proto_engine {
/** @type: Type of this engine */
@@ -105,10 +112,13 @@ struct i915_gem_proto_engine {
/** @engine: Engine, for physical */
struct intel_engine_cs *engine;
- /** @num_siblings: Number of balanced siblings */
+ /** @num_siblings: Number of balanced or parallel siblings */
unsigned int num_siblings;
- /** @siblings: Balanced siblings */
+ /** @width: Width of each sibling */
+ unsigned int width;
+
+ /** @siblings: Balanced siblings or num_siblings * width for parallel */
struct intel_engine_cs **siblings;
/** @sseu: Client-set SSEU parameters */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index afa34111de02..1adcd8e02d29 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -232,6 +232,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags)
static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct sg_table *pages;
unsigned int sg_page_sizes;
@@ -242,8 +243,11 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
if (IS_ERR(pages))
return PTR_ERR(pages);
- sg_page_sizes = i915_sg_dma_sizes(pages->sgl);
+ /* XXX: consider doing a vmap flush or something */
+ if (!HAS_LLC(i915) || i915_gem_object_can_bypass_llc(obj))
+ wbinvd_on_all_cpus();
+ sg_page_sizes = i915_sg_dma_sizes(pages->sgl);
__i915_gem_object_set_pages(obj, pages, sg_page_sizes);
return 0;
@@ -301,7 +305,8 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
}
drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
- i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops, &lock_class, 0);
+ i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops, &lock_class,
+ I915_BO_ALLOC_USER);
obj->base.import_attach = attach;
obj->base.resv = dma_buf->resv;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 8b3a25bd93e6..4d7da07442f2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -246,17 +246,25 @@ struct i915_execbuffer {
struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
struct eb_vma *vma;
- struct intel_engine_cs *engine; /** engine to queue the request to */
+ struct intel_gt *gt; /* gt for the execbuf */
struct intel_context *context; /* logical state for the request */
struct i915_gem_context *gem_context; /** caller's context */
- struct i915_request *request; /** our request to build */
- struct eb_vma *batch; /** identity of the batch obj/vma */
+ /** our requests to build */
+ struct i915_request *requests[MAX_ENGINE_INSTANCE + 1];
+ /** identity of the batch obj/vma */
+ struct eb_vma *batches[MAX_ENGINE_INSTANCE + 1];
struct i915_vma *trampoline; /** trampoline used for chaining */
+ /** used for excl fence in dma_resv objects when > 1 BB submitted */
+ struct dma_fence *composite_fence;
+
/** actual size of execobj[] as we may extend it for the cmdparser */
unsigned int buffer_count;
+ /* number of batches in execbuf IOCTL */
+ unsigned int num_batches;
+
/** list of vma not yet bound during reservation phase */
struct list_head unbound;
@@ -283,7 +291,8 @@ struct i915_execbuffer {
u64 invalid_flags; /** Set of execobj.flags that are invalid */
- u64 batch_len; /** Length of batch within object */
+ /** Length of batch within object */
+ u64 batch_len[MAX_ENGINE_INSTANCE + 1];
u32 batch_start_offset; /** Location within object of batch */
u32 batch_flags; /** Flags composed for emit_bb_start() */
struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */
@@ -301,14 +310,13 @@ struct i915_execbuffer {
};
static int eb_parse(struct i915_execbuffer *eb);
-static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb,
- bool throttle);
+static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle);
static void eb_unpin_engine(struct i915_execbuffer *eb);
static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
{
- return intel_engine_requires_cmd_parser(eb->engine) ||
- (intel_engine_using_cmd_parser(eb->engine) &&
+ return intel_engine_requires_cmd_parser(eb->context->engine) ||
+ (intel_engine_using_cmd_parser(eb->context->engine) &&
eb->args->batch_len);
}
@@ -535,11 +543,21 @@ eb_validate_vma(struct i915_execbuffer *eb,
return 0;
}
-static void
+static inline bool
+is_batch_buffer(struct i915_execbuffer *eb, unsigned int buffer_idx)
+{
+ return eb->args->flags & I915_EXEC_BATCH_FIRST ?
+ buffer_idx < eb->num_batches :
+ buffer_idx >= eb->args->buffer_count - eb->num_batches;
+}
+
+static int
eb_add_vma(struct i915_execbuffer *eb,
- unsigned int i, unsigned batch_idx,
+ unsigned int *current_batch,
+ unsigned int i,
struct i915_vma *vma)
{
+ struct drm_i915_private *i915 = eb->i915;
struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
struct eb_vma *ev = &eb->vma[i];
@@ -566,15 +584,43 @@ eb_add_vma(struct i915_execbuffer *eb,
* Note that actual hangs have only been observed on gen7, but for
* paranoia do it everywhere.
*/
- if (i == batch_idx) {
+ if (is_batch_buffer(eb, i)) {
if (entry->relocation_count &&
!(ev->flags & EXEC_OBJECT_PINNED))
ev->flags |= __EXEC_OBJECT_NEEDS_BIAS;
if (eb->reloc_cache.has_fence)
ev->flags |= EXEC_OBJECT_NEEDS_FENCE;
- eb->batch = ev;
+ eb->batches[*current_batch] = ev;
+
+ if (unlikely(ev->flags & EXEC_OBJECT_WRITE)) {
+ drm_dbg(&i915->drm,
+ "Attempting to use self-modifying batch buffer\n");
+ return -EINVAL;
+ }
+
+ if (range_overflows_t(u64,
+ eb->batch_start_offset,
+ eb->args->batch_len,
+ ev->vma->size)) {
+ drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
+ return -EINVAL;
+ }
+
+ if (eb->args->batch_len == 0)
+ eb->batch_len[*current_batch] = ev->vma->size -
+ eb->batch_start_offset;
+ else
+ eb->batch_len[*current_batch] = eb->args->batch_len;
+ if (unlikely(eb->batch_len[*current_batch] == 0)) { /* impossible! */
+ drm_dbg(&i915->drm, "Invalid batch length\n");
+ return -EINVAL;
+ }
+
+ ++*current_batch;
}
+
+ return 0;
}
static inline int use_cpu_reloc(const struct reloc_cache *cache,
@@ -718,14 +764,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
} while (1);
}
-static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
-{
- if (eb->args->flags & I915_EXEC_BATCH_FIRST)
- return 0;
- else
- return eb->buffer_count - 1;
-}
-
static int eb_select_context(struct i915_execbuffer *eb)
{
struct i915_gem_context *ctx;
@@ -846,9 +884,7 @@ static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle)
static int eb_lookup_vmas(struct i915_execbuffer *eb)
{
- struct drm_i915_private *i915 = eb->i915;
- unsigned int batch = eb_batch_index(eb);
- unsigned int i;
+ unsigned int i, current_batch = 0;
int err = 0;
INIT_LIST_HEAD(&eb->relocs);
@@ -868,7 +904,9 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
goto err;
}
- eb_add_vma(eb, i, batch, vma);
+ err = eb_add_vma(eb, &current_batch, i, vma);
+ if (err)
+ return err;
if (i915_gem_object_is_userptr(vma->obj)) {
err = i915_gem_object_userptr_submit_init(vma->obj);
@@ -891,26 +929,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
}
}
- if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) {
- drm_dbg(&i915->drm,
- "Attempting to use self-modifying batch buffer\n");
- return -EINVAL;
- }
-
- if (range_overflows_t(u64,
- eb->batch_start_offset, eb->batch_len,
- eb->batch->vma->size)) {
- drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
- return -EINVAL;
- }
-
- if (eb->batch_len == 0)
- eb->batch_len = eb->batch->vma->size - eb->batch_start_offset;
- if (unlikely(eb->batch_len == 0)) { /* impossible! */
- drm_dbg(&i915->drm, "Invalid batch length\n");
- return -EINVAL;
- }
-
return 0;
err:
@@ -1643,8 +1661,7 @@ static int eb_reinit_userptr(struct i915_execbuffer *eb)
return 0;
}
-static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
- struct i915_request *rq)
+static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
{
bool have_copy = false;
struct eb_vma *ev;
@@ -1660,21 +1677,6 @@ repeat:
eb_release_vmas(eb, false);
i915_gem_ww_ctx_fini(&eb->ww);
- if (rq) {
- /* nonblocking is always false */
- if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT) < 0) {
- i915_request_put(rq);
- rq = NULL;
-
- err = -EINTR;
- goto err_relock;
- }
-
- i915_request_put(rq);
- rq = NULL;
- }
-
/*
* We take 3 passes through the slowpatch.
*
@@ -1701,28 +1703,21 @@ repeat:
if (!err)
err = eb_reinit_userptr(eb);
-err_relock:
i915_gem_ww_ctx_init(&eb->ww, true);
if (err)
goto out;
/* reacquire the objects */
repeat_validate:
- rq = eb_pin_engine(eb, false);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- rq = NULL;
+ err = eb_pin_engine(eb, false);
+ if (err)
goto err;
- }
-
- /* We didn't throttle, should be NULL */
- GEM_WARN_ON(rq);
err = eb_validate_vmas(eb);
if (err)
goto err;
- GEM_BUG_ON(!eb->batch);
+ GEM_BUG_ON(!eb->batches[0]);
list_for_each_entry(ev, &eb->relocs, reloc_link) {
if (!have_copy) {
@@ -1786,46 +1781,23 @@ out:
}
}
- if (rq)
- i915_request_put(rq);
-
return err;
}
static int eb_relocate_parse(struct i915_execbuffer *eb)
{
int err;
- struct i915_request *rq = NULL;
bool throttle = true;
retry:
- rq = eb_pin_engine(eb, throttle);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- rq = NULL;
+ err = eb_pin_engine(eb, throttle);
+ if (err) {
if (err != -EDEADLK)
return err;
goto err;
}
- if (rq) {
- bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
-
- /* Need to drop all locks now for throttling, take slowpath */
- err = i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, 0);
- if (err == -ETIME) {
- if (nonblock) {
- err = -EWOULDBLOCK;
- i915_request_put(rq);
- goto err;
- }
- goto slow;
- }
- i915_request_put(rq);
- rq = NULL;
- }
-
/* only throttle once, even if we didn't need to throttle */
throttle = false;
@@ -1865,7 +1837,7 @@ err:
return err;
slow:
- err = eb_relocate_parse_slow(eb, rq);
+ err = eb_relocate_parse_slow(eb);
if (err)
/*
* If the user expects the execobject.offset and
@@ -1879,11 +1851,40 @@ slow:
return err;
}
+/*
+ * Using two helper loops for the order of which requests / batches are created
+ * and added the to backend. Requests are created in order from the parent to
+ * the last child. Requests are added in the reverse order, from the last child
+ * to parent. This is done for locking reasons as the timeline lock is acquired
+ * during request creation and released when the request is added to the
+ * backend. To make lockdep happy (see intel_context_timeline_lock) this must be
+ * the ordering.
+ */
+#define for_each_batch_create_order(_eb, _i) \
+ for ((_i) = 0; (_i) < (_eb)->num_batches; ++(_i))
+#define for_each_batch_add_order(_eb, _i) \
+ BUILD_BUG_ON(!typecheck(int, _i)); \
+ for ((_i) = (_eb)->num_batches - 1; (_i) >= 0; --(_i))
+
+static struct i915_request *
+eb_find_first_request_added(struct i915_execbuffer *eb)
+{
+ int i;
+
+ for_each_batch_add_order(eb, i)
+ if (eb->requests[i])
+ return eb->requests[i];
+
+ GEM_BUG_ON("Request not found");
+
+ return NULL;
+}
+
static int eb_move_to_gpu(struct i915_execbuffer *eb)
{
const unsigned int count = eb->buffer_count;
unsigned int i = count;
- int err = 0;
+ int err = 0, j;
while (i--) {
struct eb_vma *ev = &eb->vma[i];
@@ -1896,11 +1897,17 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
if (flags & EXEC_OBJECT_CAPTURE) {
struct i915_capture_list *capture;
- capture = kmalloc(sizeof(*capture), GFP_KERNEL);
- if (capture) {
- capture->next = eb->request->capture_list;
- capture->vma = vma;
- eb->request->capture_list = capture;
+ for_each_batch_create_order(eb, j) {
+ if (!eb->requests[j])
+ break;
+
+ capture = kmalloc(sizeof(*capture), GFP_KERNEL);
+ if (capture) {
+ capture->next =
+ eb->requests[j]->capture_list;
+ capture->vma = vma;
+ eb->requests[j]->capture_list = capture;
+ }
}
}
@@ -1915,20 +1922,43 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
* !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
* but gcc's optimiser doesn't handle that as well and emits
* two jumps instead of one. Maybe one day...
+ *
+ * FIXME: There is also sync flushing in set_pages(), which
+ * serves a different purpose(some of the time at least).
+ *
+ * We should consider:
+ *
+ * 1. Rip out the async flush code.
+ *
+ * 2. Or make the sync flushing use the async clflush path
+ * using mandatory fences underneath. Currently the below
+ * async flush happens after we bind the object.
*/
if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
if (i915_gem_clflush_object(obj, 0))
flags &= ~EXEC_OBJECT_ASYNC;
}
+ /* We only need to await on the first request */
if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) {
err = i915_request_await_object
- (eb->request, obj, flags & EXEC_OBJECT_WRITE);
+ (eb_find_first_request_added(eb), obj,
+ flags & EXEC_OBJECT_WRITE);
}
- if (err == 0)
- err = i915_vma_move_to_active(vma, eb->request,
- flags | __EXEC_OBJECT_NO_RESERVE);
+ for_each_batch_add_order(eb, j) {
+ if (err)
+ break;
+ if (!eb->requests[j])
+ continue;
+
+ err = _i915_vma_move_to_active(vma, eb->requests[j],
+ j ? NULL :
+ eb->composite_fence ?
+ eb->composite_fence :
+ &eb->requests[j]->fence,
+ flags | __EXEC_OBJECT_NO_RESERVE);
+ }
}
#ifdef CONFIG_MMU_NOTIFIER
@@ -1959,11 +1989,16 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
goto err_skip;
/* Unconditionally flush any chipset caches (for streaming writes). */
- intel_gt_chipset_flush(eb->engine->gt);
+ intel_gt_chipset_flush(eb->gt);
return 0;
err_skip:
- i915_request_set_error_once(eb->request, err);
+ for_each_batch_create_order(eb, j) {
+ if (!eb->requests[j])
+ break;
+
+ i915_request_set_error_once(eb->requests[j], err);
+ }
return err;
}
@@ -2058,14 +2093,17 @@ static int eb_parse(struct i915_execbuffer *eb)
int err;
if (!eb_use_cmdparser(eb)) {
- batch = eb_dispatch_secure(eb, eb->batch->vma);
+ batch = eb_dispatch_secure(eb, eb->batches[0]->vma);
if (IS_ERR(batch))
return PTR_ERR(batch);
goto secure_batch;
}
- len = eb->batch_len;
+ if (intel_context_is_parallel(eb->context))
+ return -EINVAL;
+
+ len = eb->batch_len[0];
if (!CMDPARSER_USES_GGTT(eb->i915)) {
/*
* ppGTT backed shadow buffers must be mapped RO, to prevent
@@ -2079,11 +2117,11 @@ static int eb_parse(struct i915_execbuffer *eb)
} else {
len += I915_CMD_PARSER_TRAMPOLINE_SIZE;
}
- if (unlikely(len < eb->batch_len)) /* last paranoid check of overflow */
+ if (unlikely(len < eb->batch_len[0])) /* last paranoid check of overflow */
return -EINVAL;
if (!pool) {
- pool = intel_gt_get_buffer_pool(eb->engine->gt, len,
+ pool = intel_gt_get_buffer_pool(eb->gt, len,
I915_MAP_WB);
if (IS_ERR(pool))
return PTR_ERR(pool);
@@ -2108,7 +2146,7 @@ static int eb_parse(struct i915_execbuffer *eb)
trampoline = shadow;
shadow = shadow_batch_pin(eb, pool->obj,
- &eb->engine->gt->ggtt->vm,
+ &eb->gt->ggtt->vm,
PIN_GLOBAL);
if (IS_ERR(shadow)) {
err = PTR_ERR(shadow);
@@ -2130,26 +2168,29 @@ static int eb_parse(struct i915_execbuffer *eb)
if (err)
goto err_trampoline;
- err = intel_engine_cmd_parser(eb->engine,
- eb->batch->vma,
+ err = intel_engine_cmd_parser(eb->context->engine,
+ eb->batches[0]->vma,
eb->batch_start_offset,
- eb->batch_len,
+ eb->batch_len[0],
shadow, trampoline);
if (err)
goto err_unpin_batch;
- eb->batch = &eb->vma[eb->buffer_count++];
- eb->batch->vma = i915_vma_get(shadow);
- eb->batch->flags = __EXEC_OBJECT_HAS_PIN;
+ eb->batches[0] = &eb->vma[eb->buffer_count++];
+ eb->batches[0]->vma = i915_vma_get(shadow);
+ eb->batches[0]->flags = __EXEC_OBJECT_HAS_PIN;
eb->trampoline = trampoline;
eb->batch_start_offset = 0;
secure_batch:
if (batch) {
- eb->batch = &eb->vma[eb->buffer_count++];
- eb->batch->flags = __EXEC_OBJECT_HAS_PIN;
- eb->batch->vma = i915_vma_get(batch);
+ if (intel_context_is_parallel(eb->context))
+ return -EINVAL;
+
+ eb->batches[0] = &eb->vma[eb->buffer_count++];
+ eb->batches[0]->flags = __EXEC_OBJECT_HAS_PIN;
+ eb->batches[0]->vma = i915_vma_get(batch);
}
return 0;
@@ -2165,19 +2206,18 @@ err:
return err;
}
-static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch)
+static int eb_request_submit(struct i915_execbuffer *eb,
+ struct i915_request *rq,
+ struct i915_vma *batch,
+ u64 batch_len)
{
int err;
- if (intel_context_nopreempt(eb->context))
- __set_bit(I915_FENCE_FLAG_NOPREEMPT, &eb->request->fence.flags);
-
- err = eb_move_to_gpu(eb);
- if (err)
- return err;
+ if (intel_context_nopreempt(rq->context))
+ __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) {
- err = i915_reset_gen7_sol_offsets(eb->request);
+ err = i915_reset_gen7_sol_offsets(rq);
if (err)
return err;
}
@@ -2188,26 +2228,26 @@ static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch)
* allows us to determine if the batch is still waiting on the GPU
* or actually running by checking the breadcrumb.
*/
- if (eb->engine->emit_init_breadcrumb) {
- err = eb->engine->emit_init_breadcrumb(eb->request);
+ if (rq->context->engine->emit_init_breadcrumb) {
+ err = rq->context->engine->emit_init_breadcrumb(rq);
if (err)
return err;
}
- err = eb->engine->emit_bb_start(eb->request,
- batch->node.start +
- eb->batch_start_offset,
- eb->batch_len,
- eb->batch_flags);
+ err = rq->context->engine->emit_bb_start(rq,
+ batch->node.start +
+ eb->batch_start_offset,
+ batch_len,
+ eb->batch_flags);
if (err)
return err;
if (eb->trampoline) {
+ GEM_BUG_ON(intel_context_is_parallel(rq->context));
GEM_BUG_ON(eb->batch_start_offset);
- err = eb->engine->emit_bb_start(eb->request,
- eb->trampoline->node.start +
- eb->batch_len,
- 0, 0);
+ err = rq->context->engine->emit_bb_start(rq,
+ eb->trampoline->node.start +
+ batch_len, 0, 0);
if (err)
return err;
}
@@ -2215,6 +2255,27 @@ static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch)
return 0;
}
+static int eb_submit(struct i915_execbuffer *eb)
+{
+ unsigned int i;
+ int err;
+
+ err = eb_move_to_gpu(eb);
+
+ for_each_batch_create_order(eb, i) {
+ if (!eb->requests[i])
+ break;
+
+ trace_i915_request_queue(eb->requests[i], eb->batch_flags);
+ if (!err)
+ err = eb_request_submit(eb, eb->requests[i],
+ eb->batches[i]->vma,
+ eb->batch_len[i]);
+ }
+
+ return err;
+}
+
static int num_vcs_engines(const struct drm_i915_private *i915)
{
return hweight_long(VDBOX_MASK(&i915->gt));
@@ -2280,26 +2341,11 @@ static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel
return i915_request_get(rq);
}
-static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool throttle)
+static int eb_pin_timeline(struct i915_execbuffer *eb, struct intel_context *ce,
+ bool throttle)
{
- struct intel_context *ce = eb->context;
struct intel_timeline *tl;
struct i915_request *rq = NULL;
- int err;
-
- GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED);
-
- if (unlikely(intel_context_is_banned(ce)))
- return ERR_PTR(-EIO);
-
- /*
- * Pinning the contexts may generate requests in order to acquire
- * GGTT space, so do this first before we reserve a seqno for
- * ourselves.
- */
- err = intel_context_pin_ww(ce, &eb->ww);
- if (err)
- return ERR_PTR(err);
/*
* Take a local wakeref for preparing to dispatch the execbuf as
@@ -2310,33 +2356,108 @@ static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool throt
* taken on the engine, and the parent device.
*/
tl = intel_context_timeline_lock(ce);
- if (IS_ERR(tl)) {
- intel_context_unpin(ce);
- return ERR_CAST(tl);
- }
+ if (IS_ERR(tl))
+ return PTR_ERR(tl);
intel_context_enter(ce);
if (throttle)
rq = eb_throttle(eb, ce);
intel_context_timeline_unlock(tl);
+ if (rq) {
+ bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
+ long timeout = nonblock ? 0 : MAX_SCHEDULE_TIMEOUT;
+
+ if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE,
+ timeout) < 0) {
+ i915_request_put(rq);
+
+ tl = intel_context_timeline_lock(ce);
+ intel_context_exit(ce);
+ intel_context_timeline_unlock(tl);
+
+ if (nonblock)
+ return -EWOULDBLOCK;
+ else
+ return -EINTR;
+ }
+ i915_request_put(rq);
+ }
+
+ return 0;
+}
+
+static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle)
+{
+ struct intel_context *ce = eb->context, *child;
+ int err;
+ int i = 0, j = 0;
+
+ GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED);
+
+ if (unlikely(intel_context_is_banned(ce)))
+ return -EIO;
+
+ /*
+ * Pinning the contexts may generate requests in order to acquire
+ * GGTT space, so do this first before we reserve a seqno for
+ * ourselves.
+ */
+ err = intel_context_pin_ww(ce, &eb->ww);
+ if (err)
+ return err;
+ for_each_child(ce, child) {
+ err = intel_context_pin_ww(child, &eb->ww);
+ GEM_BUG_ON(err); /* perma-pinned should incr a counter */
+ }
+
+ for_each_child(ce, child) {
+ err = eb_pin_timeline(eb, child, throttle);
+ if (err)
+ goto unwind;
+ ++i;
+ }
+ err = eb_pin_timeline(eb, ce, throttle);
+ if (err)
+ goto unwind;
+
eb->args->flags |= __EXEC_ENGINE_PINNED;
- return rq;
+ return 0;
+
+unwind:
+ for_each_child(ce, child) {
+ if (j++ < i) {
+ mutex_lock(&child->timeline->mutex);
+ intel_context_exit(child);
+ mutex_unlock(&child->timeline->mutex);
+ }
+ }
+ for_each_child(ce, child)
+ intel_context_unpin(child);
+ intel_context_unpin(ce);
+ return err;
}
static void eb_unpin_engine(struct i915_execbuffer *eb)
{
- struct intel_context *ce = eb->context;
- struct intel_timeline *tl = ce->timeline;
+ struct intel_context *ce = eb->context, *child;
if (!(eb->args->flags & __EXEC_ENGINE_PINNED))
return;
eb->args->flags &= ~__EXEC_ENGINE_PINNED;
- mutex_lock(&tl->mutex);
+ for_each_child(ce, child) {
+ mutex_lock(&child->timeline->mutex);
+ intel_context_exit(child);
+ mutex_unlock(&child->timeline->mutex);
+
+ intel_context_unpin(child);
+ }
+
+ mutex_lock(&ce->timeline->mutex);
intel_context_exit(ce);
- mutex_unlock(&tl->mutex);
+ mutex_unlock(&ce->timeline->mutex);
intel_context_unpin(ce);
}
@@ -2387,7 +2508,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb)
static int
eb_select_engine(struct i915_execbuffer *eb)
{
- struct intel_context *ce;
+ struct intel_context *ce, *child;
unsigned int idx;
int err;
@@ -2400,6 +2521,20 @@ eb_select_engine(struct i915_execbuffer *eb)
if (IS_ERR(ce))
return PTR_ERR(ce);
+ if (intel_context_is_parallel(ce)) {
+ if (eb->buffer_count < ce->parallel.number_children + 1) {
+ intel_context_put(ce);
+ return -EINVAL;
+ }
+ if (eb->batch_start_offset || eb->args->batch_len) {
+ intel_context_put(ce);
+ return -EINVAL;
+ }
+ }
+ eb->num_batches = ce->parallel.number_children + 1;
+
+ for_each_child(ce, child)
+ intel_context_get(child);
intel_gt_pm_get(ce->engine->gt);
if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
@@ -2407,6 +2542,13 @@ eb_select_engine(struct i915_execbuffer *eb)
if (err)
goto err;
}
+ for_each_child(ce, child) {
+ if (!test_bit(CONTEXT_ALLOC_BIT, &child->flags)) {
+ err = intel_context_alloc_state(child);
+ if (err)
+ goto err;
+ }
+ }
/*
* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
@@ -2417,7 +2559,7 @@ eb_select_engine(struct i915_execbuffer *eb)
goto err;
eb->context = ce;
- eb->engine = ce->engine;
+ eb->gt = ce->engine->gt;
/*
* Make sure engine pool stays alive even if we call intel_context_put
@@ -2428,6 +2570,8 @@ eb_select_engine(struct i915_execbuffer *eb)
err:
intel_gt_pm_put(ce->engine->gt);
+ for_each_child(ce, child)
+ intel_context_put(child);
intel_context_put(ce);
return err;
}
@@ -2435,7 +2579,11 @@ err:
static void
eb_put_engine(struct i915_execbuffer *eb)
{
- intel_gt_pm_put(eb->engine->gt);
+ struct intel_context *child;
+
+ intel_gt_pm_put(eb->gt);
+ for_each_child(eb->context, child)
+ intel_context_put(child);
intel_context_put(eb->context);
}
@@ -2658,7 +2806,8 @@ static void put_fence_array(struct eb_fence *fences, int num_fences)
}
static int
-await_fence_array(struct i915_execbuffer *eb)
+await_fence_array(struct i915_execbuffer *eb,
+ struct i915_request *rq)
{
unsigned int n;
int err;
@@ -2672,8 +2821,7 @@ await_fence_array(struct i915_execbuffer *eb)
if (!eb->fences[n].dma_fence)
continue;
- err = i915_request_await_dma_fence(eb->request,
- eb->fences[n].dma_fence);
+ err = i915_request_await_dma_fence(rq, eb->fences[n].dma_fence);
if (err < 0)
return err;
}
@@ -2681,9 +2829,9 @@ await_fence_array(struct i915_execbuffer *eb)
return 0;
}
-static void signal_fence_array(const struct i915_execbuffer *eb)
+static void signal_fence_array(const struct i915_execbuffer *eb,
+ struct dma_fence * const fence)
{
- struct dma_fence * const fence = &eb->request->fence;
unsigned int n;
for (n = 0; n < eb->num_fences; n++) {
@@ -2731,9 +2879,9 @@ static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
break;
}
-static int eb_request_add(struct i915_execbuffer *eb, int err)
+static int eb_request_add(struct i915_execbuffer *eb, struct i915_request *rq,
+ int err, bool last_parallel)
{
- struct i915_request *rq = eb->request;
struct intel_timeline * const tl = i915_request_timeline(rq);
struct i915_sched_attr attr = {};
struct i915_request *prev;
@@ -2755,6 +2903,17 @@ static int eb_request_add(struct i915_execbuffer *eb, int err)
err = -ENOENT; /* override any transient errors */
}
+ if (intel_context_is_parallel(eb->context)) {
+ if (err) {
+ __i915_request_skip(rq);
+ set_bit(I915_FENCE_FLAG_SKIP_PARALLEL,
+ &rq->fence.flags);
+ }
+ if (last_parallel)
+ set_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL,
+ &rq->fence.flags);
+ }
+
__i915_request_queue(rq, &attr);
/* Try to clean up the client's timeline after submitting the request */
@@ -2766,6 +2925,25 @@ static int eb_request_add(struct i915_execbuffer *eb, int err)
return err;
}
+static int eb_requests_add(struct i915_execbuffer *eb, int err)
+{
+ int i;
+
+ /*
+ * We iterate in reverse order of creation to release timeline mutexes in
+ * same order.
+ */
+ for_each_batch_add_order(eb, i) {
+ struct i915_request *rq = eb->requests[i];
+
+ if (!rq)
+ continue;
+ err |= eb_request_add(eb, rq, err, i == 0);
+ }
+
+ return err;
+}
+
static const i915_user_extension_fn execbuf_extensions[] = {
[DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
};
@@ -2792,6 +2970,185 @@ parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args,
eb);
}
+static void eb_requests_get(struct i915_execbuffer *eb)
+{
+ unsigned int i;
+
+ for_each_batch_create_order(eb, i) {
+ if (!eb->requests[i])
+ break;
+
+ i915_request_get(eb->requests[i]);
+ }
+}
+
+static void eb_requests_put(struct i915_execbuffer *eb)
+{
+ unsigned int i;
+
+ for_each_batch_create_order(eb, i) {
+ if (!eb->requests[i])
+ break;
+
+ i915_request_put(eb->requests[i]);
+ }
+}
+
+static struct sync_file *
+eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd)
+{
+ struct sync_file *out_fence = NULL;
+ struct dma_fence_array *fence_array;
+ struct dma_fence **fences;
+ unsigned int i;
+
+ GEM_BUG_ON(!intel_context_is_parent(eb->context));
+
+ fences = kmalloc_array(eb->num_batches, sizeof(*fences), GFP_KERNEL);
+ if (!fences)
+ return ERR_PTR(-ENOMEM);
+
+ for_each_batch_create_order(eb, i) {
+ fences[i] = &eb->requests[i]->fence;
+ __set_bit(I915_FENCE_FLAG_COMPOSITE,
+ &eb->requests[i]->fence.flags);
+ }
+
+ fence_array = dma_fence_array_create(eb->num_batches,
+ fences,
+ eb->context->parallel.fence_context,
+ eb->context->parallel.seqno,
+ false);
+ if (!fence_array) {
+ kfree(fences);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* Move ownership to the dma_fence_array created above */
+ for_each_batch_create_order(eb, i)
+ dma_fence_get(fences[i]);
+
+ if (out_fence_fd != -1) {
+ out_fence = sync_file_create(&fence_array->base);
+ /* sync_file now owns fence_arry, drop creation ref */
+ dma_fence_put(&fence_array->base);
+ if (!out_fence)
+ return ERR_PTR(-ENOMEM);
+ }
+
+ eb->composite_fence = &fence_array->base;
+
+ return out_fence;
+}
+
+static struct sync_file *
+eb_fences_add(struct i915_execbuffer *eb, struct i915_request *rq,
+ struct dma_fence *in_fence, int out_fence_fd)
+{
+ struct sync_file *out_fence = NULL;
+ int err;
+
+ if (unlikely(eb->gem_context->syncobj)) {
+ struct dma_fence *fence;
+
+ fence = drm_syncobj_fence_get(eb->gem_context->syncobj);
+ err = i915_request_await_dma_fence(rq, fence);
+ dma_fence_put(fence);
+ if (err)
+ return ERR_PTR(err);
+ }
+
+ if (in_fence) {
+ if (eb->args->flags & I915_EXEC_FENCE_SUBMIT)
+ err = i915_request_await_execution(rq, in_fence);
+ else
+ err = i915_request_await_dma_fence(rq, in_fence);
+ if (err < 0)
+ return ERR_PTR(err);
+ }
+
+ if (eb->fences) {
+ err = await_fence_array(eb, rq);
+ if (err)
+ return ERR_PTR(err);
+ }
+
+ if (intel_context_is_parallel(eb->context)) {
+ out_fence = eb_composite_fence_create(eb, out_fence_fd);
+ if (IS_ERR(out_fence))
+ return ERR_PTR(-ENOMEM);
+ } else if (out_fence_fd != -1) {
+ out_fence = sync_file_create(&rq->fence);
+ if (!out_fence)
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return out_fence;
+}
+
+static struct intel_context *
+eb_find_context(struct i915_execbuffer *eb, unsigned int context_number)
+{
+ struct intel_context *child;
+
+ if (likely(context_number == 0))
+ return eb->context;
+
+ for_each_child(eb->context, child)
+ if (!--context_number)
+ return child;
+
+ GEM_BUG_ON("Context not found");
+
+ return NULL;
+}
+
+static struct sync_file *
+eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
+ int out_fence_fd)
+{
+ struct sync_file *out_fence = NULL;
+ unsigned int i;
+
+ for_each_batch_create_order(eb, i) {
+ /* Allocate a request for this batch buffer nice and early. */
+ eb->requests[i] = i915_request_create(eb_find_context(eb, i));
+ if (IS_ERR(eb->requests[i])) {
+ out_fence = ERR_PTR(PTR_ERR(eb->requests[i]));
+ eb->requests[i] = NULL;
+ return out_fence;
+ }
+
+ /*
+ * Only the first request added (committed to backend) has to
+ * take the in fences into account as all subsequent requests
+ * will have fences inserted inbetween them.
+ */
+ if (i + 1 == eb->num_batches) {
+ out_fence = eb_fences_add(eb, eb->requests[i],
+ in_fence, out_fence_fd);
+ if (IS_ERR(out_fence))
+ return out_fence;
+ }
+
+ /*
+ * Whilst this request exists, batch_obj will be on the
+ * active_list, and so will hold the active reference. Only when
+ * this request is retired will the batch_obj be moved onto
+ * the inactive_list and lose its active reference. Hence we do
+ * not need to explicitly hold another reference here.
+ */
+ eb->requests[i]->batch = eb->batches[i]->vma;
+ if (eb->batch_pool) {
+ GEM_BUG_ON(intel_context_is_parallel(eb->context));
+ intel_gt_buffer_pool_mark_active(eb->batch_pool,
+ eb->requests[i]);
+ }
+ }
+
+ return out_fence;
+}
+
static int
i915_gem_do_execbuffer(struct drm_device *dev,
struct drm_file *file,
@@ -2802,7 +3159,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
struct i915_execbuffer eb;
struct dma_fence *in_fence = NULL;
struct sync_file *out_fence = NULL;
- struct i915_vma *batch;
int out_fence_fd = -1;
int err;
@@ -2826,12 +3182,15 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.buffer_count = args->buffer_count;
eb.batch_start_offset = args->batch_start_offset;
- eb.batch_len = args->batch_len;
eb.trampoline = NULL;
eb.fences = NULL;
eb.num_fences = 0;
+ memset(eb.requests, 0, sizeof(struct i915_request *) *
+ ARRAY_SIZE(eb.requests));
+ eb.composite_fence = NULL;
+
eb.batch_flags = 0;
if (args->flags & I915_EXEC_SECURE) {
if (GRAPHICS_VER(i915) >= 11)
@@ -2915,70 +3274,25 @@ i915_gem_do_execbuffer(struct drm_device *dev,
ww_acquire_done(&eb.ww.ctx);
- batch = eb.batch->vma;
-
- /* Allocate a request for this batch buffer nice and early. */
- eb.request = i915_request_create(eb.context);
- if (IS_ERR(eb.request)) {
- err = PTR_ERR(eb.request);
- goto err_vma;
- }
-
- if (unlikely(eb.gem_context->syncobj)) {
- struct dma_fence *fence;
-
- fence = drm_syncobj_fence_get(eb.gem_context->syncobj);
- err = i915_request_await_dma_fence(eb.request, fence);
- dma_fence_put(fence);
- if (err)
- goto err_ext;
- }
-
- if (in_fence) {
- if (args->flags & I915_EXEC_FENCE_SUBMIT)
- err = i915_request_await_execution(eb.request,
- in_fence);
- else
- err = i915_request_await_dma_fence(eb.request,
- in_fence);
- if (err < 0)
- goto err_request;
- }
-
- if (eb.fences) {
- err = await_fence_array(&eb);
- if (err)
+ out_fence = eb_requests_create(&eb, in_fence, out_fence_fd);
+ if (IS_ERR(out_fence)) {
+ err = PTR_ERR(out_fence);
+ if (eb.requests[0])
goto err_request;
+ else
+ goto err_vma;
}
- if (out_fence_fd != -1) {
- out_fence = sync_file_create(&eb.request->fence);
- if (!out_fence) {
- err = -ENOMEM;
- goto err_request;
- }
- }
-
- /*
- * Whilst this request exists, batch_obj will be on the
- * active_list, and so will hold the active reference. Only when this
- * request is retired will the the batch_obj be moved onto the
- * inactive_list and lose its active reference. Hence we do not need
- * to explicitly hold another reference here.
- */
- eb.request->batch = batch;
- if (eb.batch_pool)
- intel_gt_buffer_pool_mark_active(eb.batch_pool, eb.request);
-
- trace_i915_request_queue(eb.request, eb.batch_flags);
- err = eb_submit(&eb, batch);
+ err = eb_submit(&eb);
err_request:
- i915_request_get(eb.request);
- err = eb_request_add(&eb, err);
+ eb_requests_get(&eb);
+ err = eb_requests_add(&eb, err);
if (eb.fences)
- signal_fence_array(&eb);
+ signal_fence_array(&eb, eb.composite_fence ?
+ eb.composite_fence :
+ &eb.requests[0]->fence);
if (out_fence) {
if (err == 0) {
@@ -2993,10 +3307,15 @@ err_request:
if (unlikely(eb.gem_context->syncobj)) {
drm_syncobj_replace_fence(eb.gem_context->syncobj,
- &eb.request->fence);
+ eb.composite_fence ?
+ eb.composite_fence :
+ &eb.requests[0]->fence);
}
- i915_request_put(eb.request);
+ if (!out_fence && eb.composite_fence)
+ dma_fence_put(eb.composite_fence);
+
+ eb_requests_put(&eb);
err_vma:
eb_release_vmas(&eb, true);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index e5ae9c06510c..a57a6b7013c2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -134,6 +134,8 @@ static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj,
internal_free_pages(pages);
obj->mm.dirty = false;
+
+ __start_cpu_write(obj);
}
static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 76ce6a1500bc..1e426a42a36c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -128,6 +128,32 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
}
+bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+ /*
+ * This is purely from a security perspective, so we simply don't care
+ * about non-userspace objects being able to bypass the LLC.
+ */
+ if (!(obj->flags & I915_BO_ALLOC_USER))
+ return false;
+
+ /*
+ * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
+ * possible for userspace to bypass the GTT caching bits set by the
+ * kernel, as per the given object cache_level. This is troublesome
+ * since the heavy flush we apply when first gathering the pages is
+ * skipped if the kernel thinks the object is coherent with the GPU. As
+ * a result it might be possible to bypass the cache and read the
+ * contents of the page directly, which could be stale data. If it's
+ * just a case of userspace shooting themselves in the foot then so be
+ * it, but since i915 takes the stance of always zeroing memory before
+ * handing it to userspace, we need to prevent this.
+ */
+ return IS_JSL_EHL(i915);
+}
+
static void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
{
struct drm_i915_gem_object *obj = to_intel_bo(gem);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 9df3ee60604e..59201801cec5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -514,6 +514,7 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
unsigned int cache_level);
+bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj);
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 7c3da4e3e737..da85169006d4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -427,6 +427,33 @@ struct drm_i915_gem_object {
* can freely bypass the CPU cache when touching the pages with the GPU,
* where the kernel is completely unaware. On such platform we need
* apply the sledgehammer-on-acquire regardless of the @cache_coherent.
+ *
+ * Special care is taken on non-LLC platforms, to prevent potential
+ * information leak. The driver currently ensures:
+ *
+ * 1. All userspace objects, by default, have @cache_level set as
+ * I915_CACHE_NONE. The only exception is userptr objects, where we
+ * instead force I915_CACHE_LLC, but we also don't allow userspace to
+ * ever change the @cache_level for such objects. Another special case
+ * is dma-buf, which doesn't rely on @cache_dirty, but there we
+ * always do a forced flush when acquiring the pages, if there is a
+ * chance that the pages can be read directly from main memory with
+ * the GPU.
+ *
+ * 2. All I915_CACHE_NONE objects have @cache_dirty initially true.
+ *
+ * 3. All swapped-out objects(i.e shmem) have @cache_dirty set to
+ * true.
+ *
+ * 4. The @cache_dirty is never freely reset before the initial
+ * flush, even if userspace adjusts the @cache_level through the
+ * i915_gem_set_caching_ioctl.
+ *
+ * 5. All @cache_dirty objects(including swapped-in) are initially
+ * flushed with a synchronous call to drm_clflush_sg in
+ * __i915_gem_object_set_pages. The @cache_dirty can be freely reset
+ * at this point. All further asynchronous clfushes are never security
+ * critical, i.e userspace is free to race against itself.
*/
unsigned int cache_dirty:1;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 11f072193f3b..d77da59fae04 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -182,22 +182,7 @@ rebuild_st:
if (i915_gem_object_needs_bit17_swizzle(obj))
i915_gem_object_do_bit_17_swizzle(obj, st);
- /*
- * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
- * possible for userspace to bypass the GTT caching bits set by the
- * kernel, as per the given object cache_level. This is troublesome
- * since the heavy flush we apply when first gathering the pages is
- * skipped if the kernel thinks the object is coherent with the GPU. As
- * a result it might be possible to bypass the cache and read the
- * contents of the page directly, which could be stale data. If it's
- * just a case of userspace shooting themselves in the foot then so be
- * it, but since i915 takes the stance of always zeroing memory before
- * handing it to userspace, we need to prevent this.
- *
- * By setting cache_dirty here we make the clflush in set_pages
- * unconditional on such platforms.
- */
- if (IS_JSL_EHL(i915) && obj->flags & I915_BO_ALLOC_USER)
+ if (i915_gem_object_can_bypass_llc(obj))
obj->cache_dirty = true;
__i915_gem_object_set_pages(obj, st, sg_page_sizes);
@@ -301,6 +286,8 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
struct sg_table *pages,
bool needs_clflush)
{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
if (obj->mm.madv == I915_MADV_DONTNEED)
@@ -312,6 +299,16 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
drm_clflush_sg(pages);
__start_cpu_write(obj);
+ /*
+ * On non-LLC platforms, force the flush-on-acquire if this is ever
+ * swapped-in. Our async flush path is not trust worthy enough yet(and
+ * happens in the wrong order), and with some tricks it's conceivable
+ * for userspace to change the cache-level to I915_CACHE_NONE after the
+ * pages are swapped-in, and since execbuf binds the object before doing
+ * the async flush, we have a race window.
+ */
+ if (!HAS_LLC(i915))
+ obj->cache_dirty = true;
}
void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 8ea0fa665e53..3173c9f9a040 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -165,8 +165,11 @@ alloc_table:
goto err;
}
- sg_page_sizes = i915_sg_dma_sizes(st->sgl);
+ WARN_ON_ONCE(!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE));
+ if (i915_gem_object_can_bypass_llc(obj))
+ obj->cache_dirty = true;
+ sg_page_sizes = i915_sg_dma_sizes(st->sgl);
__i915_gem_object_set_pages(obj, st, sg_page_sizes);
return 0;
@@ -546,7 +549,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
return -ENOMEM;
drm_gem_private_object_init(dev, &obj->base, args->user_size);
- i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class, 0);
+ i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class,
+ I915_BO_ALLOC_USER);
obj->mem_flags = I915_BO_FLAG_STRUCT_PAGE;
obj->read_domains = I915_GEM_DOMAIN_CPU;
obj->write_domain = I915_GEM_DOMAIN_CPU;
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 41d0680f3bd7..b2003133deaf 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -136,6 +136,8 @@ static void put_huge_pages(struct drm_i915_gem_object *obj,
huge_pages_free_pages(pages);
obj->mm.dirty = false;
+
+ __start_cpu_write(obj);
}
static const struct drm_i915_gem_object_ops huge_page_ops = {
@@ -152,6 +154,7 @@ huge_pages_object(struct drm_i915_private *i915,
{
static struct lock_class_key lock_class;
struct drm_i915_gem_object *obj;
+ unsigned int cache_level;
GEM_BUG_ON(!size);
GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask))));
@@ -173,7 +176,9 @@ huge_pages_object(struct drm_i915_private *i915,
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
- obj->cache_level = I915_CACHE_NONE;
+
+ cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
+ i915_gem_object_set_cache_coherency(obj, cache_level);
obj->mm.page_mask = page_mask;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index ecbcbb86ae1e..8402ed925a69 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -17,13 +17,20 @@
#include "huge_gem_object.h"
#include "mock_context.h"
+enum client_tiling {
+ CLIENT_TILING_LINEAR,
+ CLIENT_TILING_X,
+ CLIENT_TILING_Y,
+ CLIENT_NUM_TILING_TYPES
+};
+
#define WIDTH 512
#define HEIGHT 32
struct blit_buffer {
struct i915_vma *vma;
u32 start_val;
- u32 tiling;
+ enum client_tiling tiling;
};
struct tiled_blits {
@@ -53,9 +60,9 @@ static int prepare_blit(const struct tiled_blits *t,
*cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(BCS_SWCTRL);
cmd = (BCS_SRC_Y | BCS_DST_Y) << 16;
- if (src->tiling == I915_TILING_Y)
+ if (src->tiling == CLIENT_TILING_Y)
cmd |= BCS_SRC_Y;
- if (dst->tiling == I915_TILING_Y)
+ if (dst->tiling == CLIENT_TILING_Y)
cmd |= BCS_DST_Y;
*cs++ = cmd;
@@ -172,7 +179,7 @@ static int tiled_blits_create_buffers(struct tiled_blits *t,
t->buffers[i].vma = vma;
t->buffers[i].tiling =
- i915_prandom_u32_max_state(I915_TILING_Y + 1, prng);
+ i915_prandom_u32_max_state(CLIENT_TILING_Y + 1, prng);
}
return 0;
@@ -197,17 +204,17 @@ static u64 swizzle_bit(unsigned int bit, u64 offset)
static u64 tiled_offset(const struct intel_gt *gt,
u64 v,
unsigned int stride,
- unsigned int tiling)
+ enum client_tiling tiling)
{
unsigned int swizzle;
u64 x, y;
- if (tiling == I915_TILING_NONE)
+ if (tiling == CLIENT_TILING_LINEAR)
return v;
y = div64_u64_rem(v, stride, &x);
- if (tiling == I915_TILING_X) {
+ if (tiling == CLIENT_TILING_X) {
v = div64_u64_rem(y, 8, &y) * stride * 8;
v += y * 512;
v += div64_u64_rem(x, 512, &x) << 12;
@@ -244,12 +251,12 @@ static u64 tiled_offset(const struct intel_gt *gt,
return v;
}
-static const char *repr_tiling(int tiling)
+static const char *repr_tiling(enum client_tiling tiling)
{
switch (tiling) {
- case I915_TILING_NONE: return "linear";
- case I915_TILING_X: return "X";
- case I915_TILING_Y: return "Y";
+ case CLIENT_TILING_LINEAR: return "linear";
+ case CLIENT_TILING_X: return "X";
+ case CLIENT_TILING_Y: return "Y";
default: return "unknown";
}
}