summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c52
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.h56
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h73
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c66
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.h37
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_debugfs.c55
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_debugfs.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.h14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c41
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_llc.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c4
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_execlists.c12
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c29
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h54
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c24
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h34
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c1446
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c179
27 files changed, 1963 insertions, 272 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index e9a0cad5c34d..488acd39ff67 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -240,6 +240,8 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
if (err)
goto err_post_unpin;
+ intel_engine_pm_might_get(ce->engine);
+
if (unlikely(intel_context_is_closed(ce))) {
err = -ENOENT;
goto err_unlock;
@@ -362,8 +364,8 @@ static int __intel_context_active(struct i915_active *active)
return 0;
}
-static int sw_fence_dummy_notify(struct i915_sw_fence *sf,
- enum i915_sw_fence_notify state)
+static int __i915_sw_fence_call
+sw_fence_dummy_notify(struct i915_sw_fence *sf, enum i915_sw_fence_notify state)
{
return NOTIFY_DONE;
}
@@ -399,6 +401,10 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
ce->guc_id.id = GUC_INVALID_LRC_ID;
INIT_LIST_HEAD(&ce->guc_id.link);
+ INIT_LIST_HEAD(&ce->destroyed_link);
+
+ INIT_LIST_HEAD(&ce->parallel.child_list);
+
/*
* Initialize fence to be complete as this is expected to be complete
* unless there is a pending schedule disable outstanding.
@@ -413,10 +419,17 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
void intel_context_fini(struct intel_context *ce)
{
+ struct intel_context *child, *next;
+
if (ce->timeline)
intel_timeline_put(ce->timeline);
i915_vm_put(ce->vm);
+ /* Need to put the creation ref for the children */
+ if (intel_context_is_parent(ce))
+ for_each_child_safe(ce, child, next)
+ intel_context_put(child);
+
mutex_destroy(&ce->pin_mutex);
i915_active_fini(&ce->active);
i915_sw_fence_fini(&ce->guc_state.blocked);
@@ -515,24 +528,53 @@ retry:
struct i915_request *intel_context_find_active_request(struct intel_context *ce)
{
+ struct intel_context *parent = intel_context_to_parent(ce);
struct i915_request *rq, *active = NULL;
unsigned long flags;
GEM_BUG_ON(!intel_engine_uses_guc(ce->engine));
- spin_lock_irqsave(&ce->guc_state.lock, flags);
- list_for_each_entry_reverse(rq, &ce->guc_state.requests,
+ /*
+ * We search the parent list to find an active request on the submitted
+ * context. The parent list contains the requests for all the contexts
+ * in the relationship so we have to do a compare of each request's
+ * context.
+ */
+ spin_lock_irqsave(&parent->guc_state.lock, flags);
+ list_for_each_entry_reverse(rq, &parent->guc_state.requests,
sched.link) {
+ if (rq->context != ce)
+ continue;
if (i915_request_completed(rq))
break;
active = rq;
}
- spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+ spin_unlock_irqrestore(&parent->guc_state.lock, flags);
return active;
}
+void intel_context_bind_parent_child(struct intel_context *parent,
+ struct intel_context *child)
+{
+ /*
+ * Callers responsibility to validate that this function is used
+ * correctly but we use GEM_BUG_ON here ensure that they do.
+ */
+ GEM_BUG_ON(!intel_engine_uses_guc(parent->engine));
+ GEM_BUG_ON(intel_context_is_pinned(parent));
+ GEM_BUG_ON(intel_context_is_child(parent));
+ GEM_BUG_ON(intel_context_is_pinned(child));
+ GEM_BUG_ON(intel_context_is_child(child));
+ GEM_BUG_ON(intel_context_is_parent(child));
+
+ parent->parallel.child_index = parent->parallel.number_children++;
+ list_add_tail(&child->parallel.child_link,
+ &parent->parallel.child_list);
+ child->parallel.parent = parent;
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_context.c"
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index c41098950746..246c37d72cd7 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -44,6 +44,54 @@ void intel_context_free(struct intel_context *ce);
int intel_context_reconfigure_sseu(struct intel_context *ce,
const struct intel_sseu sseu);
+#define PARENT_SCRATCH_SIZE PAGE_SIZE
+
+static inline bool intel_context_is_child(struct intel_context *ce)
+{
+ return !!ce->parallel.parent;
+}
+
+static inline bool intel_context_is_parent(struct intel_context *ce)
+{
+ return !!ce->parallel.number_children;
+}
+
+static inline bool intel_context_is_pinned(struct intel_context *ce);
+
+static inline struct intel_context *
+intel_context_to_parent(struct intel_context *ce)
+{
+ if (intel_context_is_child(ce)) {
+ /*
+ * The parent holds ref count to the child so it is always safe
+ * for the parent to access the child, but the child has a
+ * pointer to the parent without a ref. To ensure this is safe
+ * the child should only access the parent pointer while the
+ * parent is pinned.
+ */
+ GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent));
+
+ return ce->parallel.parent;
+ } else {
+ return ce;
+ }
+}
+
+static inline bool intel_context_is_parallel(struct intel_context *ce)
+{
+ return intel_context_is_child(ce) || intel_context_is_parent(ce);
+}
+
+void intel_context_bind_parent_child(struct intel_context *parent,
+ struct intel_context *child);
+
+#define for_each_child(parent, ce)\
+ list_for_each_entry(ce, &(parent)->parallel.child_list,\
+ parallel.child_link)
+#define for_each_child_safe(parent, ce, cn)\
+ list_for_each_entry_safe(ce, cn, &(parent)->parallel.child_list,\
+ parallel.child_link)
+
/**
* intel_context_lock_pinned - Stablises the 'pinned' status of the HW context
* @ce - the context
@@ -193,7 +241,13 @@ intel_context_timeline_lock(struct intel_context *ce)
struct intel_timeline *tl = ce->timeline;
int err;
- err = mutex_lock_interruptible(&tl->mutex);
+ if (intel_context_is_parent(ce))
+ err = mutex_lock_interruptible_nested(&tl->mutex, 0);
+ else if (intel_context_is_child(ce))
+ err = mutex_lock_interruptible_nested(&tl->mutex,
+ ce->parallel.child_index + 1);
+ else
+ err = mutex_lock_interruptible(&tl->mutex);
if (err)
return ERR_PTR(err);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 12252c411159..9e0177dc5484 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -55,9 +55,13 @@ struct intel_context_ops {
void (*reset)(struct intel_context *ce);
void (*destroy)(struct kref *kref);
- /* virtual engine/context interface */
+ /* virtual/parallel engine/context interface */
struct intel_context *(*create_virtual)(struct intel_engine_cs **engine,
- unsigned int count);
+ unsigned int count,
+ unsigned long flags);
+ struct intel_context *(*create_parallel)(struct intel_engine_cs **engines,
+ unsigned int num_siblings,
+ unsigned int width);
struct intel_engine_cs *(*get_sibling)(struct intel_engine_cs *engine,
unsigned int sibling);
};
@@ -113,6 +117,7 @@ struct intel_context {
#define CONTEXT_NOPREEMPT 8
#define CONTEXT_LRCA_DIRTY 9
#define CONTEXT_GUC_INIT 10
+#define CONTEXT_PERMA_PIN 11
struct {
u64 timeout_us;
@@ -197,22 +202,80 @@ struct intel_context {
struct {
/**
* @id: handle which is used to uniquely identify this context
- * with the GuC, protected by guc->contexts_lock
+ * with the GuC, protected by guc->submission_state.lock
*/
u16 id;
/**
* @ref: the number of references to the guc_id, when
* transitioning in and out of zero protected by
- * guc->contexts_lock
+ * guc->submission_state.lock
*/
atomic_t ref;
/**
* @link: in guc->guc_id_list when the guc_id has no refs but is
- * still valid, protected by guc->contexts_lock
+ * still valid, protected by guc->submission_state.lock
*/
struct list_head link;
} guc_id;
+ /**
+ * @destroyed_link: link in guc->submission_state.destroyed_contexts, in
+ * list when context is pending to be destroyed (deregistered with the
+ * GuC), protected by guc->submission_state.lock
+ */
+ struct list_head destroyed_link;
+
+ /** @parallel: sub-structure for parallel submission members */
+ struct {
+ union {
+ /**
+ * @child_list: parent's list of children
+ * contexts, no protection as immutable after context
+ * creation
+ */
+ struct list_head child_list;
+ /**
+ * @child_link: child's link into parent's list of
+ * children
+ */
+ struct list_head child_link;
+ };
+ /** @parent: pointer to parent if child */
+ struct intel_context *parent;
+ /**
+ * @last_rq: last request submitted on a parallel context, used
+ * to insert submit fences between requests in the parallel
+ * context
+ */
+ struct i915_request *last_rq;
+ /**
+ * @fence_context: fence context composite fence when doing
+ * parallel submission
+ */
+ u64 fence_context;
+ /**
+ * @seqno: seqno for composite fence when doing parallel
+ * submission
+ */
+ u32 seqno;
+ /** @number_children: number of children if parent */
+ u8 number_children;
+ /** @child_index: index into child_list if child */
+ u8 child_index;
+ /** @guc: GuC specific members for parallel submission */
+ struct {
+ /** @wqi_head: head pointer in work queue */
+ u16 wqi_head;
+ /** @wqi_tail: tail pointer in work queue */
+ u16 wqi_tail;
+ /**
+ * @parent_page: page in context state (ce->state) used
+ * by parent for work queue, process descriptor
+ */
+ u8 parent_page;
+ } guc;
+ } parallel;
+
#ifdef CONFIG_DRM_I915_SELFTEST
/**
* @drop_schedule_enable: Force drop of schedule enable G2H for selftest
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 452248884ef1..08559ace0ada 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -2,6 +2,7 @@
#ifndef _INTEL_RINGBUFFER_H_
#define _INTEL_RINGBUFFER_H_
+#include <asm/cacheflush.h>
#include <drm/drm_util.h>
#include <linux/hashtable.h>
@@ -281,9 +282,19 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
return intel_engine_has_preemption(engine);
}
+#define FORCE_VIRTUAL BIT(0)
struct intel_context *
intel_engine_create_virtual(struct intel_engine_cs **siblings,
- unsigned int count);
+ unsigned int count, unsigned long flags);
+
+static inline struct intel_context *
+intel_engine_create_parallel(struct intel_engine_cs **engines,
+ unsigned int num_engines,
+ unsigned int width)
+{
+ GEM_BUG_ON(!engines[0]->cops->create_parallel);
+ return engines[0]->cops->create_parallel(engines, num_engines, width);
+}
static inline bool
intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 2ae57e4656a3..ff6753ccb129 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -290,7 +290,8 @@ static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir)
GEM_DEBUG_WARN_ON(iir);
}
-static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
+static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
+ u8 logical_instance)
{
const struct engine_info *info = &intel_engines[id];
struct drm_i915_private *i915 = gt->i915;
@@ -335,6 +336,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
engine->class = info->class;
engine->instance = info->instance;
+ engine->logical_mask = BIT(logical_instance);
__sprint_engine_name(engine);
engine->props.heartbeat_interval_ms =
@@ -588,6 +590,37 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
return info->engine_mask;
}
+static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids,
+ u8 class, const u8 *map, u8 num_instances)
+{
+ int i, j;
+ u8 current_logical_id = 0;
+
+ for (j = 0; j < num_instances; ++j) {
+ for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
+ if (!HAS_ENGINE(gt, i) ||
+ intel_engines[i].class != class)
+ continue;
+
+ if (intel_engines[i].instance == map[j]) {
+ logical_ids[intel_engines[i].instance] =
+ current_logical_id++;
+ break;
+ }
+ }
+ }
+}
+
+static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class)
+{
+ int i;
+ u8 map[MAX_ENGINE_INSTANCE + 1];
+
+ for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
+ map[i] = i;
+ populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map));
+}
+
/**
* intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
* @gt: pointer to struct intel_gt
@@ -599,7 +632,8 @@ int intel_engines_init_mmio(struct intel_gt *gt)
struct drm_i915_private *i915 = gt->i915;
const unsigned int engine_mask = init_engine_mask(gt);
unsigned int mask = 0;
- unsigned int i;
+ unsigned int i, class;
+ u8 logical_ids[MAX_ENGINE_INSTANCE + 1];
int err;
drm_WARN_ON(&i915->drm, engine_mask == 0);
@@ -609,15 +643,23 @@ int intel_engines_init_mmio(struct intel_gt *gt)
if (i915_inject_probe_failure(i915))
return -ENODEV;
- for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
- if (!HAS_ENGINE(gt, i))
- continue;
+ for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
+ setup_logical_ids(gt, logical_ids, class);
- err = intel_engine_setup(gt, i);
- if (err)
- goto cleanup;
+ for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
+ u8 instance = intel_engines[i].instance;
+
+ if (intel_engines[i].class != class ||
+ !HAS_ENGINE(gt, i))
+ continue;
- mask |= BIT(i);
+ err = intel_engine_setup(gt, i,
+ logical_ids[instance]);
+ if (err)
+ goto cleanup;
+
+ mask |= BIT(i);
+ }
}
/*
@@ -1911,16 +1953,16 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
struct intel_context *
intel_engine_create_virtual(struct intel_engine_cs **siblings,
- unsigned int count)
+ unsigned int count, unsigned long flags)
{
if (count == 0)
return ERR_PTR(-EINVAL);
- if (count == 1)
+ if (count == 1 && !(flags & FORCE_VIRTUAL))
return intel_context_create(siblings[0]);
GEM_BUG_ON(!siblings[0]->cops->create_virtual);
- return siblings[0]->cops->create_virtual(siblings, count);
+ return siblings[0]->cops->create_virtual(siblings, count, flags);
}
struct i915_request *
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index dacd62773735..a1334b48dde7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -162,6 +162,19 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
unsigned long flags;
bool result = true;
+ /*
+ * This is execlist specific behaviour intended to ensure the GPU is
+ * idle by switching to a known 'safe' context. With GuC submission, the
+ * same idle guarantee is achieved by other means (disabling
+ * scheduling). Further, switching to a 'safe' context has no effect
+ * with GuC submission as the scheduler can just switch back again.
+ *
+ * FIXME: Move this backend scheduler specific behaviour into the
+ * scheduler backend.
+ */
+ if (intel_engine_uses_guc(engine))
+ return true;
+
/* GPU is pointing to the void, as good as in the kernel context. */
if (intel_gt_is_wedged(engine->gt))
return true;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index 8520c595f5e1..d68675925b79 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -6,9 +6,11 @@
#ifndef INTEL_ENGINE_PM_H
#define INTEL_ENGINE_PM_H
+#include "i915_drv.h"
#include "i915_request.h"
#include "intel_engine_types.h"
#include "intel_wakeref.h"
+#include "intel_gt_pm.h"
static inline bool
intel_engine_pm_is_awake(const struct intel_engine_cs *engine)
@@ -16,6 +18,11 @@ intel_engine_pm_is_awake(const struct intel_engine_cs *engine)
return intel_wakeref_is_active(&engine->wakeref);
}
+static inline void __intel_engine_pm_get(struct intel_engine_cs *engine)
+{
+ __intel_wakeref_get(&engine->wakeref);
+}
+
static inline void intel_engine_pm_get(struct intel_engine_cs *engine)
{
intel_wakeref_get(&engine->wakeref);
@@ -26,6 +33,21 @@ static inline bool intel_engine_pm_get_if_awake(struct intel_engine_cs *engine)
return intel_wakeref_get_if_active(&engine->wakeref);
}
+static inline void intel_engine_pm_might_get(struct intel_engine_cs *engine)
+{
+ if (!intel_engine_is_virtual(engine)) {
+ intel_wakeref_might_get(&engine->wakeref);
+ } else {
+ struct intel_gt *gt = engine->gt;
+ struct intel_engine_cs *tengine;
+ intel_engine_mask_t tmp, mask = engine->mask;
+
+ for_each_engine_masked(tengine, gt, mask, tmp)
+ intel_wakeref_might_get(&tengine->wakeref);
+ }
+ intel_gt_pm_might_get(engine->gt);
+}
+
static inline void intel_engine_pm_put(struct intel_engine_cs *engine)
{
intel_wakeref_put(&engine->wakeref);
@@ -47,6 +69,21 @@ static inline void intel_engine_pm_flush(struct intel_engine_cs *engine)
intel_wakeref_unlock_wait(&engine->wakeref);
}
+static inline void intel_engine_pm_might_put(struct intel_engine_cs *engine)
+{
+ if (!intel_engine_is_virtual(engine)) {
+ intel_wakeref_might_put(&engine->wakeref);
+ } else {
+ struct intel_gt *gt = engine->gt;
+ struct intel_engine_cs *tengine;
+ intel_engine_mask_t tmp, mask = engine->mask;
+
+ for_each_engine_masked(tengine, gt, mask, tmp)
+ intel_wakeref_might_put(&tengine->wakeref);
+ }
+ intel_gt_pm_might_put(engine->gt);
+}
+
static inline struct i915_request *
intel_engine_create_kernel_request(struct intel_engine_cs *engine)
{
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 9167ce52487c..e0f773585c29 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -269,6 +269,13 @@ struct intel_engine_cs {
unsigned int guc_id;
intel_engine_mask_t mask;
+ /**
+ * @logical_mask: logical mask of engine, reported to user space via
+ * query IOCTL and used to communicate with the GuC in logical space.
+ * The logical instance of a physical engine can change based on product
+ * and fusing.
+ */
+ intel_engine_mask_t logical_mask;
u8 class;
u8 instance;
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 73a79c2acd3a..bedb80057046 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -201,7 +201,8 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
}
static struct intel_context *
-execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count);
+execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
+ unsigned long flags);
static struct i915_request *
__active_request(const struct intel_timeline * const tl,
@@ -3784,7 +3785,8 @@ unlock:
}
static struct intel_context *
-execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
+execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
+ unsigned long flags)
{
struct virtual_engine *ve;
unsigned int n;
@@ -3877,6 +3879,7 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
ve->siblings[ve->num_siblings++] = sibling;
ve->base.mask |= sibling->mask;
+ ve->base.logical_mask |= sibling->logical_mask;
/*
* All physical engines must be compatible for their emission
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
index 1fe19ccd2794..f103664b71d4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
@@ -13,6 +13,59 @@
#include "pxp/intel_pxp_debugfs.h"
#include "uc/intel_uc_debugfs.h"
+int intel_gt_debugfs_reset_show(struct intel_gt *gt, u64 *val)
+{
+ int ret = intel_gt_terminally_wedged(gt);
+
+ switch (ret) {
+ case -EIO:
+ *val = 1;
+ return 0;
+ case 0:
+ *val = 0;
+ return 0;
+ default:
+ return ret;
+ }
+}
+
+int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val)
+{
+ /* Flush any previous reset before applying for a new one */
+ wait_event(gt->reset.queue,
+ !test_bit(I915_RESET_BACKOFF, &gt->reset.flags));
+
+ intel_gt_handle_error(gt, val, I915_ERROR_CAPTURE,
+ "Manually reset engine mask to %llx", val);
+ return 0;
+}
+
+/*
+ * keep the interface clean where the first parameter
+ * is a 'struct intel_gt *' instead of 'void *'
+ */
+static int __intel_gt_debugfs_reset_show(void *data, u64 *val)
+{
+ return intel_gt_debugfs_reset_show(data, val);
+}
+
+static int __intel_gt_debugfs_reset_store(void *data, u64 val)
+{
+ return intel_gt_debugfs_reset_store(data, val);
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(reset_fops, __intel_gt_debugfs_reset_show,
+ __intel_gt_debugfs_reset_store, "%llu\n");
+
+static void gt_debugfs_register(struct intel_gt *gt, struct dentry *root)
+{
+ static const struct intel_gt_debugfs_file files[] = {
+ { "reset", &reset_fops, NULL },
+ };
+
+ intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
+}
+
void intel_gt_debugfs_register(struct intel_gt *gt)
{
struct dentry *root;
@@ -24,6 +77,8 @@ void intel_gt_debugfs_register(struct intel_gt *gt)
if (IS_ERR(root))
return;
+ gt_debugfs_register(gt, root);
+
intel_gt_engines_debugfs_register(gt, root);
intel_gt_pm_debugfs_register(gt, root);
intel_sseu_debugfs_register(gt, root);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h
index 8b6fca09897c..e307ceb99031 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h
@@ -35,4 +35,8 @@ void intel_gt_debugfs_register_files(struct dentry *root,
const struct intel_gt_debugfs_file *files,
unsigned long count, void *data);
+/* functions that need to be accessed by the upper level non-gt interfaces */
+int intel_gt_debugfs_reset_show(struct intel_gt *gt, u64 *val);
+int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val);
+
#endif /* INTEL_GT_DEBUGFS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index d0588d8aaa44..bc898df7a48c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -31,6 +31,11 @@ static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt)
return intel_wakeref_get_if_active(&gt->wakeref);
}
+static inline void intel_gt_pm_might_get(struct intel_gt *gt)
+{
+ intel_wakeref_might_get(&gt->wakeref);
+}
+
static inline void intel_gt_pm_put(struct intel_gt *gt)
{
intel_wakeref_put(&gt->wakeref);
@@ -41,6 +46,15 @@ static inline void intel_gt_pm_put_async(struct intel_gt *gt)
intel_wakeref_put_async(&gt->wakeref);
}
+static inline void intel_gt_pm_might_put(struct intel_gt *gt)
+{
+ intel_wakeref_might_put(&gt->wakeref);
+}
+
+#define with_intel_gt_pm(gt, tmp) \
+ for (tmp = 1, intel_gt_pm_get(gt); tmp; \
+ intel_gt_pm_put(gt), tmp = 0)
+
static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
{
return intel_wakeref_wait_for_idle(&gt->wakeref);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index b3ddb9106125..404dfa7673c6 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -20,6 +20,46 @@
#include "intel_uncore.h"
#include "vlv_sideband.h"
+int intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt)
+{
+ atomic_inc(&gt->user_wakeref);
+ intel_gt_pm_get(gt);
+ if (GRAPHICS_VER(gt->i915) >= 6)
+ intel_uncore_forcewake_user_get(gt->uncore);
+
+ return 0;
+}
+
+int intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt)
+{
+ if (GRAPHICS_VER(gt->i915) >= 6)
+ intel_uncore_forcewake_user_put(gt->uncore);
+ intel_gt_pm_put(gt);
+ atomic_dec(&gt->user_wakeref);
+
+ return 0;
+}
+
+static int forcewake_user_open(struct inode *inode, struct file *file)
+{
+ struct intel_gt *gt = inode->i_private;
+
+ return intel_gt_pm_debugfs_forcewake_user_open(gt);
+}
+
+static int forcewake_user_release(struct inode *inode, struct file *file)
+{
+ struct intel_gt *gt = inode->i_private;
+
+ return intel_gt_pm_debugfs_forcewake_user_release(gt);
+}
+
+static const struct file_operations forcewake_user_fops = {
+ .owner = THIS_MODULE,
+ .open = forcewake_user_open,
+ .release = forcewake_user_release,
+};
+
static int fw_domains_show(struct seq_file *m, void *data)
{
struct intel_gt *gt = m->private;
@@ -628,6 +668,7 @@ void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root)
{ "drpc", &drpc_fops, NULL },
{ "frequency", &frequency_fops, NULL },
{ "forcewake", &fw_domains_fops, NULL },
+ { "forcewake_user", &forcewake_user_fops, NULL},
{ "llc", &llc_fops, llc_eval },
{ "rps_boost", &rps_boost_fops, rps_eval },
};
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h
index 2b824289582b..a8457887ec65 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h
@@ -13,4 +13,8 @@ struct drm_printer;
void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root);
void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *m);
+/* functions that need to be accessed by the upper level non-gt interfaces */
+int intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt);
+int intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt);
+
#endif /* INTEL_GT_PM_DEBUGFS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c
index 9ca88f8ac5dd..08d7d5ae263a 100644
--- a/drivers/gpu/drm/i915/gt/intel_llc.c
+++ b/drivers/gpu/drm/i915/gt/intel_llc.c
@@ -3,6 +3,7 @@
* Copyright © 2019 Intel Corporation
*/
+#include <asm/tsc.h>
#include <linux/cpufreq.h>
#include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 3ef9eaf8c50e..56156cf18c41 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -942,6 +942,11 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
context_size += PAGE_SIZE;
}
+ if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
+ ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
+ context_size += PARENT_SCRATCH_SIZE;
+ }
+
obj = i915_gem_object_create_lmem(engine->i915, context_size,
I915_BO_ALLOC_PM_VOLATILE);
if (IS_ERR(obj))
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 593524195707..586dca1731ce 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -292,7 +292,7 @@ static void xcs_sanitize(struct intel_engine_cs *engine)
sanitize_hwsp(engine);
/* And scrub the dirty cachelines for the HWSP */
- clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
+ drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
intel_engine_reset_pinned_contexts(engine);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 1257f4f11e66..438bbc7b8147 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -64,7 +64,7 @@ intel_timeline_pin_map(struct intel_timeline *timeline)
timeline->hwsp_map = vaddr;
timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES);
- clflush(vaddr + ofs);
+ drm_clflush_virt_range(vaddr + ofs, TIMELINE_SEQNO_BYTES);
return 0;
}
@@ -225,7 +225,7 @@ void intel_timeline_reset_seqno(const struct intel_timeline *tl)
memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno));
WRITE_ONCE(*hwsp_seqno, tl->seqno);
- clflush(hwsp_seqno);
+ drm_clflush_virt_range(hwsp_seqno, TIMELINE_SEQNO_BYTES);
}
void intel_timeline_enter(struct intel_timeline *tl)
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 25a8c4f62b0d..b367ecfa42de 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -3733,7 +3733,7 @@ static int nop_virtual_engine(struct intel_gt *gt,
GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
for (n = 0; n < nctx; n++) {
- ve[n] = intel_engine_create_virtual(siblings, nsibling);
+ ve[n] = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ve[n])) {
err = PTR_ERR(ve[n]);
nctx = n;
@@ -3929,7 +3929,7 @@ static int mask_virtual_engine(struct intel_gt *gt,
* restrict it to our desired engine within the virtual engine.
*/
- ve = intel_engine_create_virtual(siblings, nsibling);
+ ve = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ve)) {
err = PTR_ERR(ve);
goto out_close;
@@ -4060,7 +4060,7 @@ static int slicein_virtual_engine(struct intel_gt *gt,
i915_request_add(rq);
}
- ce = intel_engine_create_virtual(siblings, nsibling);
+ ce = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
goto out;
@@ -4112,7 +4112,7 @@ static int sliceout_virtual_engine(struct intel_gt *gt,
/* XXX We do not handle oversubscription and fairness with normal rq */
for (n = 0; n < nsibling; n++) {
- ce = intel_engine_create_virtual(siblings, nsibling);
+ ce = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
goto out;
@@ -4214,7 +4214,7 @@ static int preserved_virtual_engine(struct intel_gt *gt,
if (err)
goto out_scratch;
- ve = intel_engine_create_virtual(siblings, nsibling);
+ ve = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ve)) {
err = PTR_ERR(ve);
goto out_scratch;
@@ -4354,7 +4354,7 @@ static int reset_virtual_engine(struct intel_gt *gt,
if (igt_spinner_init(&spin, gt))
return -ENOMEM;
- ve = intel_engine_create_virtual(siblings, nsibling);
+ ve = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ve)) {
err = PTR_ERR(ve);
goto out_spin;
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index 8ff582222aff..ba10bd374cee 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -142,6 +142,7 @@ enum intel_guc_action {
INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
+ INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
INTEL_GUC_ACTION_RESET_CLIENT = 0x5507,
INTEL_GUC_ACTION_LIMIT
};
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 8f8182bf7c11..6e228343e8cb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -756,3 +756,32 @@ void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p)
}
}
}
+
+void intel_guc_write_barrier(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ if (i915_gem_object_is_lmem(guc->ct.vma->obj)) {
+ /*
+ * Ensure intel_uncore_write_fw can be used rather than
+ * intel_uncore_write.
+ */
+ GEM_BUG_ON(guc->send_regs.fw_domains);
+
+ /*
+ * This register is used by the i915 and GuC for MMIO based
+ * communication. Once we are in this code CTBs are the only
+ * method the i915 uses to communicate with the GuC so it is
+ * safe to write to this register (a value of 0 is NOP for MMIO
+ * communication). If we ever start mixing CTBs and MMIOs a new
+ * register will have to be chosen. This function is also used
+ * to enforce ordering of a work queue item write and an update
+ * to the process descriptor. When a work queue is being used,
+ * CTBs are also the only mechanism of communication.
+ */
+ intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0);
+ } else {
+ /* wmb() sufficient for a barrier if in smem */
+ wmb();
+ }
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 5dd174babf7a..31cf9fb48c7e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -46,6 +46,15 @@ struct intel_guc {
* submitted until the stalled request is processed.
*/
struct i915_request *stalled_request;
+ /**
+ * @submission_stall_reason: reason why submission is stalled
+ */
+ enum {
+ STALL_NONE,
+ STALL_REGISTER_CONTEXT,
+ STALL_MOVE_LRC_TAIL,
+ STALL_ADD_REQUEST,
+ } submission_stall_reason;
/* intel_guc_recv interrupt related state */
/** @irq_lock: protects GuC irq state */
@@ -71,16 +80,41 @@ struct intel_guc {
} interrupts;
/**
- * @contexts_lock: protects guc_ids, guc_id_list, ce->guc_id.id, and
- * ce->guc_id.ref when transitioning in and out of zero
- */
- spinlock_t contexts_lock;
- /** @guc_ids: used to allocate unique ce->guc_id.id values */
- struct ida guc_ids;
- /**
- * @guc_id_list: list of intel_context with valid guc_ids but no refs
+ * @submission_state: sub-structure for submission state protected by
+ * single lock
*/
- struct list_head guc_id_list;
+ struct {
+ /**
+ * @lock: protects everything in submission_state,
+ * ce->guc_id.id, and ce->guc_id.ref when transitioning in and
+ * out of zero
+ */
+ spinlock_t lock;
+ /**
+ * @guc_ids: used to allocate new guc_ids, single-lrc
+ */
+ struct ida guc_ids;
+ /**
+ * @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc
+ */
+ unsigned long *guc_ids_bitmap;
+ /**
+ * @guc_id_list: list of intel_context with valid guc_ids but no
+ * refs
+ */
+ struct list_head guc_id_list;
+ /**
+ * @destroyed_contexts: list of contexts waiting to be destroyed
+ * (deregistered with the GuC)
+ */
+ struct list_head destroyed_contexts;
+ /**
+ * @destroyed_worker: worker to deregister contexts, need as we
+ * need to take a GT PM reference and can't from destroy
+ * function as it might be in an atomic context (no sleeping)
+ */
+ struct work_struct destroyed_worker;
+ } submission_state;
/**
* @submission_supported: tracks whether we support GuC submission on
@@ -342,4 +376,6 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc);
void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p);
+void intel_guc_write_barrier(struct intel_guc *guc);
+
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 2c6ea64af7ec..621c893a009f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -176,7 +176,7 @@ static void guc_mapping_table_init(struct intel_gt *gt,
for_each_engine(engine, gt, id) {
u8 guc_class = engine_class_to_guc_class(engine->class);
- system_info->mapping_table[guc_class][engine->instance] =
+ system_info->mapping_table[guc_class][ilog2(engine->logical_mask)] =
engine->instance;
}
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 0a3504bc0b61..a0cc34be7b56 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -383,28 +383,6 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct)
return ++ct->requests.last_fence;
}
-static void write_barrier(struct intel_guc_ct *ct)
-{
- struct intel_guc *guc = ct_to_guc(ct);
- struct intel_gt *gt = guc_to_gt(guc);
-
- if (i915_gem_object_is_lmem(guc->ct.vma->obj)) {
- GEM_BUG_ON(guc->send_regs.fw_domains);
- /*
- * This register is used by the i915 and GuC for MMIO based
- * communication. Once we are in this code CTBs are the only
- * method the i915 uses to communicate with the GuC so it is
- * safe to write to this register (a value of 0 is NOP for MMIO
- * communication). If we ever start mixing CTBs and MMIOs a new
- * register will have to be chosen.
- */
- intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0);
- } else {
- /* wmb() sufficient for a barrier if in smem */
- wmb();
- }
-}
-
static int ct_write(struct intel_guc_ct *ct,
const u32 *action,
u32 len /* in dwords */,
@@ -474,7 +452,7 @@ static int ct_write(struct intel_guc_ct *ct,
* make sure H2G buffer update and LRC tail update (if this triggering a
* submission) are visible before updating the descriptor tail
*/
- write_barrier(ct);
+ intel_guc_write_barrier(ct_to_guc(ct));
/* update local copies */
ctb->tail = tail;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index fa4be13c8854..722933e26347 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -52,27 +52,27 @@
#define GUC_DOORBELL_INVALID 256
-#define GUC_WQ_SIZE (PAGE_SIZE * 2)
-
-/* Work queue item header definitions */
+/*
+ * Work queue item header definitions
+ *
+ * Work queue is circular buffer used to submit complex (multi-lrc) submissions
+ * to the GuC. A work queue item is an entry in the circular buffer.
+ */
#define WQ_STATUS_ACTIVE 1
#define WQ_STATUS_SUSPENDED 2
#define WQ_STATUS_CMD_ERROR 3
#define WQ_STATUS_ENGINE_ID_NOT_USED 4
#define WQ_STATUS_SUSPENDED_FROM_RESET 5
-#define WQ_TYPE_SHIFT 0
-#define WQ_TYPE_BATCH_BUF (0x1 << WQ_TYPE_SHIFT)
-#define WQ_TYPE_PSEUDO (0x2 << WQ_TYPE_SHIFT)
-#define WQ_TYPE_INORDER (0x3 << WQ_TYPE_SHIFT)
-#define WQ_TYPE_NOOP (0x4 << WQ_TYPE_SHIFT)
-#define WQ_TARGET_SHIFT 10
-#define WQ_LEN_SHIFT 16
-#define WQ_NO_WCFLUSH_WAIT (1 << 27)
-#define WQ_PRESENT_WORKLOAD (1 << 28)
-
-#define WQ_RING_TAIL_SHIFT 20
-#define WQ_RING_TAIL_MAX 0x7FF /* 2^11 QWords */
-#define WQ_RING_TAIL_MASK (WQ_RING_TAIL_MAX << WQ_RING_TAIL_SHIFT)
+#define WQ_TYPE_BATCH_BUF 0x1
+#define WQ_TYPE_PSEUDO 0x2
+#define WQ_TYPE_INORDER 0x3
+#define WQ_TYPE_NOOP 0x4
+#define WQ_TYPE_MULTI_LRC 0x5
+#define WQ_TYPE_MASK GENMASK(7, 0)
+#define WQ_LEN_MASK GENMASK(26, 16)
+
+#define WQ_GUC_ID_MASK GENMASK(15, 0)
+#define WQ_RING_TAIL_MASK GENMASK(28, 18)
#define GUC_STAGE_DESC_ATTR_ACTIVE BIT(0)
#define GUC_STAGE_DESC_ATTR_PENDING_DB BIT(1)
@@ -186,7 +186,7 @@ struct guc_process_desc {
u32 wq_status;
u32 engine_presence;
u32 priority;
- u32 reserved[30];
+ u32 reserved[36];
} __packed;
#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index ba0de35f6323..d7710debcd47 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -11,6 +11,7 @@
#include "gt/intel_context.h"
#include "gt/intel_engine_pm.h"
#include "gt/intel_engine_heartbeat.h"
+#include "gt/intel_gpu_commands.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_irq.h"
#include "gt/intel_gt_pm.h"
@@ -68,7 +69,7 @@
* fence is used to stall all requests associated with this guc_id until the
* corresponding G2H returns indicating the guc_id has been deregistered.
*
- * guc_ids:
+ * submission_state.guc_ids:
* Unique number associated with private GuC context data passed in during
* context registration / submission / deregistration. 64k available. Simple ida
* is used for allocation.
@@ -89,9 +90,9 @@
* sched_engine can be submitting at a time. Currently only one sched_engine is
* used for all of GuC submission but that could change in the future.
*
- * guc->contexts_lock
- * Protects guc_id allocation for the given GuC, i.e. only one context can be
- * doing guc_id allocation operations at a time for each GuC in the system.
+ * guc->submission_state.lock
+ * Global lock for GuC submission state. Protects guc_ids and destroyed contexts
+ * list.
*
* ce->guc_state.lock
* Protects everything under ce->guc_state. Ensures that a context is in the
@@ -103,7 +104,7 @@
*
* Lock ordering rules:
* sched_engine->lock -> ce->guc_state.lock
- * guc->contexts_lock -> ce->guc_state.lock
+ * guc->submission_state.lock -> ce->guc_state.lock
*
* Reset races:
* When a full GT reset is triggered it is assumed that some G2H responses to
@@ -124,11 +125,27 @@ struct guc_virtual_engine {
};
static struct intel_context *
-guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count);
+guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
+ unsigned long flags);
+
+static struct intel_context *
+guc_create_parallel(struct intel_engine_cs **engines,
+ unsigned int num_siblings,
+ unsigned int width);
#define GUC_REQUEST_SIZE 64 /* bytes */
/*
+ * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
+ * per the GuC submission interface. A different allocation algorithm is used
+ * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
+ * partition the guc_id space. We believe the number of multi-lrc contexts in
+ * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
+ * multi-lrc.
+ */
+#define NUMBER_MULTI_LRC_GUC_ID (GUC_MAX_LRC_DESCRIPTORS / 16)
+
+/*
* Below is a set of functions which control the GuC scheduling state which
* require a lock.
*/
@@ -324,6 +341,12 @@ static inline void decr_context_committed_requests(struct intel_context *ce)
GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
}
+static struct intel_context *
+request_to_scheduling_context(struct i915_request *rq)
+{
+ return intel_context_to_parent(rq->context);
+}
+
static inline bool context_guc_id_invalid(struct intel_context *ce)
{
return ce->guc_id.id == GUC_INVALID_LRC_ID;
@@ -344,6 +367,104 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb)
return rb_entry(rb, struct i915_priolist, node);
}
+/*
+ * When using multi-lrc submission a scratch memory area is reserved in the
+ * parent's context state for the process descriptor, work queue, and handshake
+ * between the parent + children contexts to insert safe preemption points
+ * between each of the BBs. Currently the scratch area is sized to a page.
+ *
+ * The layout of this scratch area is below:
+ * 0 guc_process_desc
+ * + sizeof(struct guc_process_desc) child go
+ * + CACHELINE_BYTES child join[0]
+ * ...
+ * + CACHELINE_BYTES child join[n - 1]
+ * ... unused
+ * PARENT_SCRATCH_SIZE / 2 work queue start
+ * ... work queue
+ * PARENT_SCRATCH_SIZE - 1 work queue end
+ */
+#define WQ_SIZE (PARENT_SCRATCH_SIZE / 2)
+#define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE)
+
+struct sync_semaphore {
+ u32 semaphore;
+ u8 unused[CACHELINE_BYTES - sizeof(u32)];
+};
+
+struct parent_scratch {
+ struct guc_process_desc pdesc;
+
+ struct sync_semaphore go;
+ struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1];
+
+ u8 unused[WQ_OFFSET - sizeof(struct guc_process_desc) -
+ sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)];
+
+ u32 wq[WQ_SIZE / sizeof(u32)];
+};
+
+static u32 __get_parent_scratch_offset(struct intel_context *ce)
+{
+ GEM_BUG_ON(!ce->parallel.guc.parent_page);
+
+ return ce->parallel.guc.parent_page * PAGE_SIZE;
+}
+
+static u32 __get_wq_offset(struct intel_context *ce)
+{
+ BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET);
+
+ return __get_parent_scratch_offset(ce) + WQ_OFFSET;
+}
+
+static struct parent_scratch *
+__get_parent_scratch(struct intel_context *ce)
+{
+ BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE);
+ BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES);
+
+ /*
+ * Need to subtract LRC_STATE_OFFSET here as the
+ * parallel.guc.parent_page is the offset into ce->state while
+ * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET.
+ */
+ return (struct parent_scratch *)
+ (ce->lrc_reg_state +
+ ((__get_parent_scratch_offset(ce) -
+ LRC_STATE_OFFSET) / sizeof(u32)));
+}
+
+static struct guc_process_desc *
+__get_process_desc(struct intel_context *ce)
+{
+ struct parent_scratch *ps = __get_parent_scratch(ce);
+
+ return &ps->pdesc;
+}
+
+static u32 *get_wq_pointer(struct guc_process_desc *desc,
+ struct intel_context *ce,
+ u32 wqi_size)
+{
+ /*
+ * Check for space in work queue. Caching a value of head pointer in
+ * intel_context structure in order reduce the number accesses to shared
+ * GPU memory which may be across a PCIe bus.
+ */
+#define AVAILABLE_SPACE \
+ CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE)
+ if (wqi_size > AVAILABLE_SPACE) {
+ ce->parallel.guc.wqi_head = READ_ONCE(desc->head);
+
+ if (wqi_size > AVAILABLE_SPACE)
+ return NULL;
+ }
+#undef AVAILABLE_SPACE
+
+ return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)];
+}
+
static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index)
{
struct guc_lrc_desc *base = guc->lrc_desc_pool_vaddr;
@@ -503,10 +624,10 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
static int guc_lrc_desc_pin(struct intel_context *ce, bool loop);
-static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
+static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
{
int err = 0;
- struct intel_context *ce = rq->context;
+ struct intel_context *ce = request_to_scheduling_context(rq);
u32 action[3];
int len = 0;
u32 g2h_len_dw = 0;
@@ -527,26 +648,17 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
GEM_BUG_ON(context_guc_id_invalid(ce));
- /*
- * Corner case where the GuC firmware was blown away and reloaded while
- * this context was pinned.
- */
- if (unlikely(!lrc_desc_registered(guc, ce->guc_id.id))) {
- err = guc_lrc_desc_pin(ce, false);
- if (unlikely(err))
- return err;
- }
-
spin_lock(&ce->guc_state.lock);
/*
* The request / context will be run on the hardware when scheduling
- * gets enabled in the unblock.
+ * gets enabled in the unblock. For multi-lrc we still submit the
+ * context to move the LRC tails.
*/
- if (unlikely(context_blocked(ce)))
+ if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce)))
goto out;
- enabled = context_enabled(ce);
+ enabled = context_enabled(ce) || context_blocked(ce);
if (!enabled) {
action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
@@ -565,6 +677,18 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
trace_intel_context_sched_enable(ce);
atomic_inc(&guc->outstanding_submission_g2h);
set_context_enabled(ce);
+
+ /*
+ * Without multi-lrc KMD does the submission step (moving the
+ * lrc tail) so enabling scheduling is sufficient to submit the
+ * context. This isn't the case in multi-lrc submission as the
+ * GuC needs to move the tails, hence the need for another H2G
+ * to submit a multi-lrc context after enabling scheduling.
+ */
+ if (intel_context_is_parent(ce)) {
+ action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT;
+ err = intel_guc_send_nb(guc, action, len - 1, 0);
+ }
} else if (!enabled) {
clr_context_pending_enable(ce);
intel_context_put(ce);
@@ -577,6 +701,18 @@ out:
return err;
}
+static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
+{
+ int ret = __guc_add_request(guc, rq);
+
+ if (unlikely(ret == -EBUSY)) {
+ guc->stalled_request = rq;
+ guc->submission_stall_reason = STALL_ADD_REQUEST;
+ }
+
+ return ret;
+}
+
static inline void guc_set_lrc_tail(struct i915_request *rq)
{
rq->context->lrc_reg_state[CTX_RING_TAIL] =
@@ -588,6 +724,135 @@ static inline int rq_prio(const struct i915_request *rq)
return rq->sched.attr.priority;
}
+static bool is_multi_lrc_rq(struct i915_request *rq)
+{
+ return intel_context_is_parallel(rq->context);
+}
+
+static bool can_merge_rq(struct i915_request *rq,
+ struct i915_request *last)
+{
+ return request_to_scheduling_context(rq) ==
+ request_to_scheduling_context(last);
+}
+
+static u32 wq_space_until_wrap(struct intel_context *ce)
+{
+ return (WQ_SIZE - ce->parallel.guc.wqi_tail);
+}
+
+static void write_wqi(struct guc_process_desc *desc,
+ struct intel_context *ce,
+ u32 wqi_size)
+{
+ BUILD_BUG_ON(!is_power_of_2(WQ_SIZE));
+
+ /*
+ * Ensure WQI are visible before updating tail
+ */
+ intel_guc_write_barrier(ce_to_guc(ce));
+
+ ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) &
+ (WQ_SIZE - 1);
+ WRITE_ONCE(desc->tail, ce->parallel.guc.wqi_tail);
+}
+
+static int guc_wq_noop_append(struct intel_context *ce)
+{
+ struct guc_process_desc *desc = __get_process_desc(ce);
+ u32 *wqi = get_wq_pointer(desc, ce, wq_space_until_wrap(ce));
+ u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1;
+
+ if (!wqi)
+ return -EBUSY;
+
+ GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
+
+ *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
+ FIELD_PREP(WQ_LEN_MASK, len_dw);
+ ce->parallel.guc.wqi_tail = 0;
+
+ return 0;
+}
+
+static int __guc_wq_item_append(struct i915_request *rq)
+{
+ struct intel_context *ce = request_to_scheduling_context(rq);
+ struct intel_context *child;
+ struct guc_process_desc *desc = __get_process_desc(ce);
+ unsigned int wqi_size = (ce->parallel.number_children + 4) *
+ sizeof(u32);
+ u32 *wqi;
+ u32 len_dw = (wqi_size / sizeof(u32)) - 1;
+ int ret;
+
+ /* Ensure context is in correct state updating work queue */
+ GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
+ GEM_BUG_ON(context_guc_id_invalid(ce));
+ GEM_BUG_ON(context_wait_for_deregister_to_register(ce));
+ GEM_BUG_ON(!lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id));
+
+ /* Insert NOOP if this work queue item will wrap the tail pointer. */
+ if (wqi_size > wq_space_until_wrap(ce)) {
+ ret = guc_wq_noop_append(ce);
+ if (ret)
+ return ret;
+ }
+
+ wqi = get_wq_pointer(desc, ce, wqi_size);
+ if (!wqi)
+ return -EBUSY;
+
+ GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
+
+ *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
+ FIELD_PREP(WQ_LEN_MASK, len_dw);
+ *wqi++ = ce->lrc.lrca;
+ *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) |
+ FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64));
+ *wqi++ = 0; /* fence_id */
+ for_each_child(ce, child)
+ *wqi++ = child->ring->tail / sizeof(u64);
+
+ write_wqi(desc, ce, wqi_size);
+
+ return 0;
+}
+
+static int guc_wq_item_append(struct intel_guc *guc,
+ struct i915_request *rq)
+{
+ struct intel_context *ce = request_to_scheduling_context(rq);
+ int ret = 0;
+
+ if (likely(!intel_context_is_banned(ce))) {
+ ret = __guc_wq_item_append(rq);
+
+ if (unlikely(ret == -EBUSY)) {
+ guc->stalled_request = rq;
+ guc->submission_stall_reason = STALL_MOVE_LRC_TAIL;
+ }
+ }
+
+ return ret;
+}
+
+static bool multi_lrc_submit(struct i915_request *rq)
+{
+ struct intel_context *ce = request_to_scheduling_context(rq);
+
+ intel_ring_set_tail(rq->ring, rq->tail);
+
+ /*
+ * We expect the front end (execbuf IOCTL) to set this flag on the last
+ * request generated from a multi-BB submission. This indicates to the
+ * backend (GuC interface) that we should submit this context thus
+ * submitting all the requests generated in parallel.
+ */
+ return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) ||
+ intel_context_is_banned(ce);
+}
+
static int guc_dequeue_one_context(struct intel_guc *guc)
{
struct i915_sched_engine * const sched_engine = guc->sched_engine;
@@ -601,7 +866,17 @@ static int guc_dequeue_one_context(struct intel_guc *guc)
if (guc->stalled_request) {
submit = true;
last = guc->stalled_request;
- goto resubmit;
+
+ switch (guc->submission_stall_reason) {
+ case STALL_REGISTER_CONTEXT:
+ goto register_context;
+ case STALL_MOVE_LRC_TAIL:
+ goto move_lrc_tail;
+ case STALL_ADD_REQUEST:
+ goto add_request;
+ default:
+ MISSING_CASE(guc->submission_stall_reason);
+ }
}
while ((rb = rb_first_cached(&sched_engine->queue))) {
@@ -609,8 +884,8 @@ static int guc_dequeue_one_context(struct intel_guc *guc)
struct i915_request *rq, *rn;
priolist_for_each_request_consume(rq, rn, p) {
- if (last && rq->context != last->context)
- goto done;
+ if (last && !can_merge_rq(rq, last))
+ goto register_context;
list_del_init(&rq->sched.link);
@@ -618,33 +893,84 @@ static int guc_dequeue_one_context(struct intel_guc *guc)
trace_i915_request_in(rq, 0);
last = rq;
- submit = true;
+
+ if (is_multi_lrc_rq(rq)) {
+ /*
+ * We need to coalesce all multi-lrc requests in
+ * a relationship into a single H2G. We are
+ * guaranteed that all of these requests will be
+ * submitted sequentially.
+ */
+ if (multi_lrc_submit(rq)) {
+ submit = true;
+ goto register_context;
+ }
+ } else {
+ submit = true;
+ }
}
rb_erase_cached(&p->node, &sched_engine->queue);
i915_priolist_free(p);
}
-done:
+
+register_context:
if (submit) {
- guc_set_lrc_tail(last);
-resubmit:
+ struct intel_context *ce = request_to_scheduling_context(last);
+
+ if (unlikely(!lrc_desc_registered(guc, ce->guc_id.id) &&
+ !intel_context_is_banned(ce))) {
+ ret = guc_lrc_desc_pin(ce, false);
+ if (unlikely(ret == -EPIPE)) {
+ goto deadlk;
+ } else if (ret == -EBUSY) {
+ guc->stalled_request = last;
+ guc->submission_stall_reason =
+ STALL_REGISTER_CONTEXT;
+ goto schedule_tasklet;
+ } else if (ret != 0) {
+ GEM_WARN_ON(ret); /* Unexpected */
+ goto deadlk;
+ }
+ }
+
+move_lrc_tail:
+ if (is_multi_lrc_rq(last)) {
+ ret = guc_wq_item_append(guc, last);
+ if (ret == -EBUSY) {
+ goto schedule_tasklet;
+ } else if (ret != 0) {
+ GEM_WARN_ON(ret); /* Unexpected */
+ goto deadlk;
+ }
+ } else {
+ guc_set_lrc_tail(last);
+ }
+
+add_request:
ret = guc_add_request(guc, last);
- if (unlikely(ret == -EPIPE))
+ if (unlikely(ret == -EPIPE)) {
+ goto deadlk;
+ } else if (ret == -EBUSY) {
+ goto schedule_tasklet;
+ } else if (ret != 0) {
+ GEM_WARN_ON(ret); /* Unexpected */
goto deadlk;
- else if (ret == -EBUSY) {
- tasklet_schedule(&sched_engine->tasklet);
- guc->stalled_request = last;
- return false;
}
}
guc->stalled_request = NULL;
+ guc->submission_stall_reason = STALL_NONE;
return submit;
deadlk:
sched_engine->tasklet.callback = NULL;
tasklet_disable_nosync(&sched_engine->tasklet);
return false;
+
+schedule_tasklet:
+ tasklet_schedule(&sched_engine->tasklet);
+ return false;
}
static void guc_submission_tasklet(struct tasklet_struct *t)
@@ -719,6 +1045,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
if (deregister)
guc_signal_context_fence(ce);
if (destroyed) {
+ intel_gt_pm_put_async(guc_to_gt(guc));
release_guc_id(guc, ce);
__guc_context_destroy(ce);
}
@@ -797,6 +1124,8 @@ static void guc_flush_submissions(struct intel_guc *guc)
spin_unlock_irqrestore(&sched_engine->lock, flags);
}
+static void guc_flush_destroyed_contexts(struct intel_guc *guc);
+
void intel_guc_submission_reset_prepare(struct intel_guc *guc)
{
int i;
@@ -815,6 +1144,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc)
spin_unlock_irq(&guc_to_gt(guc)->irq_lock);
guc_flush_submissions(guc);
+ guc_flush_destroyed_contexts(guc);
/*
* Handle any outstanding G2Hs before reset. Call IRQ handler directly
@@ -929,10 +1259,15 @@ __unwind_incomplete_requests(struct intel_context *ce)
static void __guc_reset_context(struct intel_context *ce, bool stalled)
{
+ bool local_stalled;
struct i915_request *rq;
unsigned long flags;
u32 head;
+ int i, number_children = ce->parallel.number_children;
bool skip = false;
+ struct intel_context *parent = ce;
+
+ GEM_BUG_ON(intel_context_is_child(ce));
intel_context_get(ce);
@@ -958,25 +1293,38 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
if (unlikely(skip))
goto out_put;
- rq = intel_context_find_active_request(ce);
- if (!rq) {
- head = ce->ring->tail;
- stalled = false;
- goto out_replay;
- }
+ /*
+ * For each context in the relationship find the hanging request
+ * resetting each context / request as needed
+ */
+ for (i = 0; i < number_children + 1; ++i) {
+ if (!intel_context_is_pinned(ce))
+ goto next_context;
- if (!i915_request_started(rq))
- stalled = false;
+ local_stalled = false;
+ rq = intel_context_find_active_request(ce);
+ if (!rq) {
+ head = ce->ring->tail;
+ goto out_replay;
+ }
- GEM_BUG_ON(i915_active_is_idle(&ce->active));
- head = intel_ring_wrap(ce->ring, rq->head);
- __i915_request_reset(rq, stalled);
+ if (i915_request_started(rq))
+ local_stalled = true;
+ GEM_BUG_ON(i915_active_is_idle(&ce->active));
+ head = intel_ring_wrap(ce->ring, rq->head);
+
+ __i915_request_reset(rq, local_stalled && stalled);
out_replay:
- guc_reset_state(ce, head, stalled);
- __unwind_incomplete_requests(ce);
+ guc_reset_state(ce, head, local_stalled && stalled);
+next_context:
+ if (i != number_children)
+ ce = list_next_entry(ce, parallel.child_link);
+ }
+
+ __unwind_incomplete_requests(parent);
out_put:
- intel_context_put(ce);
+ intel_context_put(parent);
}
void intel_guc_submission_reset(struct intel_guc *guc, bool stalled)
@@ -997,7 +1345,8 @@ void intel_guc_submission_reset(struct intel_guc *guc, bool stalled)
xa_unlock(&guc->context_lookup);
- if (intel_context_is_pinned(ce))
+ if (intel_context_is_pinned(ce) &&
+ !intel_context_is_child(ce))
__guc_reset_context(ce, stalled);
intel_context_put(ce);
@@ -1089,7 +1438,8 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc)
xa_unlock(&guc->context_lookup);
- if (intel_context_is_pinned(ce))
+ if (intel_context_is_pinned(ce) &&
+ !intel_context_is_child(ce))
guc_cancel_context_requests(ce);
intel_context_put(ce);
@@ -1126,6 +1476,8 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
intel_gt_unpark_heartbeats(guc_to_gt(guc));
}
+static void destroyed_worker_func(struct work_struct *w);
+
/*
* Set up the memory resources to be shared with the GuC (via the GGTT)
* at firmware loading time.
@@ -1148,9 +1500,17 @@ int intel_guc_submission_init(struct intel_guc *guc)
xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
- spin_lock_init(&guc->contexts_lock);
- INIT_LIST_HEAD(&guc->guc_id_list);
- ida_init(&guc->guc_ids);
+ spin_lock_init(&guc->submission_state.lock);
+ INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
+ ida_init(&guc->submission_state.guc_ids);
+ INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
+ INIT_WORK(&guc->submission_state.destroyed_worker,
+ destroyed_worker_func);
+
+ guc->submission_state.guc_ids_bitmap =
+ bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID, GFP_KERNEL);
+ if (!guc->submission_state.guc_ids_bitmap)
+ return -ENOMEM;
return 0;
}
@@ -1160,8 +1520,10 @@ void intel_guc_submission_fini(struct intel_guc *guc)
if (!guc->lrc_desc_pool)
return;
+ guc_flush_destroyed_contexts(guc);
guc_lrc_desc_pool_destroy(guc);
i915_sched_engine_put(guc->sched_engine);
+ bitmap_free(guc->submission_state.guc_ids_bitmap);
}
static inline void queue_request(struct i915_sched_engine *sched_engine,
@@ -1178,16 +1540,22 @@ static inline void queue_request(struct i915_sched_engine *sched_engine,
static int guc_bypass_tasklet_submit(struct intel_guc *guc,
struct i915_request *rq)
{
- int ret;
+ int ret = 0;
__i915_request_submit(rq);
trace_i915_request_in(rq, 0);
- guc_set_lrc_tail(rq);
- ret = guc_add_request(guc, rq);
- if (ret == -EBUSY)
- guc->stalled_request = rq;
+ if (is_multi_lrc_rq(rq)) {
+ if (multi_lrc_submit(rq)) {
+ ret = guc_wq_item_append(guc, rq);
+ if (!ret)
+ ret = guc_add_request(guc, rq);
+ }
+ } else {
+ guc_set_lrc_tail(rq);
+ ret = guc_add_request(guc, rq);
+ }
if (unlikely(ret == -EPIPE))
disable_submission(guc);
@@ -1195,6 +1563,16 @@ static int guc_bypass_tasklet_submit(struct intel_guc *guc,
return ret;
}
+static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq)
+{
+ struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
+ struct intel_context *ce = request_to_scheduling_context(rq);
+
+ return submission_disabled(guc) || guc->stalled_request ||
+ !i915_sched_engine_is_empty(sched_engine) ||
+ !lrc_desc_registered(guc, ce->guc_id.id);
+}
+
static void guc_submit_request(struct i915_request *rq)
{
struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
@@ -1204,8 +1582,7 @@ static void guc_submit_request(struct i915_request *rq)
/* Will be called from irq-context when using foreign fences. */
spin_lock_irqsave(&sched_engine->lock, flags);
- if (submission_disabled(guc) || guc->stalled_request ||
- !i915_sched_engine_is_empty(sched_engine))
+ if (need_tasklet(guc, rq))
queue_request(sched_engine, rq, rq_prio(rq));
else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
tasklet_hi_schedule(&sched_engine->tasklet);
@@ -1213,17 +1590,43 @@ static void guc_submit_request(struct i915_request *rq)
spin_unlock_irqrestore(&sched_engine->lock, flags);
}
-static int new_guc_id(struct intel_guc *guc)
+static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
{
- return ida_simple_get(&guc->guc_ids, 0,
- GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL |
- __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+ int ret;
+
+ GEM_BUG_ON(intel_context_is_child(ce));
+
+ if (intel_context_is_parent(ce))
+ ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
+ NUMBER_MULTI_LRC_GUC_ID,
+ order_base_2(ce->parallel.number_children
+ + 1));
+ else
+ ret = ida_simple_get(&guc->submission_state.guc_ids,
+ NUMBER_MULTI_LRC_GUC_ID,
+ GUC_MAX_LRC_DESCRIPTORS,
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL |
+ __GFP_NOWARN);
+ if (unlikely(ret < 0))
+ return ret;
+
+ ce->guc_id.id = ret;
+ return 0;
}
static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
{
+ GEM_BUG_ON(intel_context_is_child(ce));
+
if (!context_guc_id_invalid(ce)) {
- ida_simple_remove(&guc->guc_ids, ce->guc_id.id);
+ if (intel_context_is_parent(ce))
+ bitmap_release_region(guc->submission_state.guc_ids_bitmap,
+ ce->guc_id.id,
+ order_base_2(ce->parallel.number_children
+ + 1));
+ else
+ ida_simple_remove(&guc->submission_state.guc_ids,
+ ce->guc_id.id);
reset_lrc_desc(guc, ce->guc_id.id);
set_context_guc_id_invalid(ce);
}
@@ -1235,54 +1638,69 @@ static void release_guc_id(struct intel_guc *guc, struct intel_context *ce)
{
unsigned long flags;
- spin_lock_irqsave(&guc->contexts_lock, flags);
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
__release_guc_id(guc, ce);
- spin_unlock_irqrestore(&guc->contexts_lock, flags);
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
}
-static int steal_guc_id(struct intel_guc *guc)
+static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce)
{
- struct intel_context *ce;
- int guc_id;
+ struct intel_context *cn;
- lockdep_assert_held(&guc->contexts_lock);
+ lockdep_assert_held(&guc->submission_state.lock);
+ GEM_BUG_ON(intel_context_is_child(ce));
+ GEM_BUG_ON(intel_context_is_parent(ce));
- if (!list_empty(&guc->guc_id_list)) {
- ce = list_first_entry(&guc->guc_id_list,
+ if (!list_empty(&guc->submission_state.guc_id_list)) {
+ cn = list_first_entry(&guc->submission_state.guc_id_list,
struct intel_context,
guc_id.link);
- GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
- GEM_BUG_ON(context_guc_id_invalid(ce));
+ GEM_BUG_ON(atomic_read(&cn->guc_id.ref));
+ GEM_BUG_ON(context_guc_id_invalid(cn));
+ GEM_BUG_ON(intel_context_is_child(cn));
+ GEM_BUG_ON(intel_context_is_parent(cn));
- list_del_init(&ce->guc_id.link);
- guc_id = ce->guc_id.id;
+ list_del_init(&cn->guc_id.link);
+ ce->guc_id = cn->guc_id;
spin_lock(&ce->guc_state.lock);
- clr_context_registered(ce);
+ clr_context_registered(cn);
spin_unlock(&ce->guc_state.lock);
- set_context_guc_id_invalid(ce);
- return guc_id;
+ set_context_guc_id_invalid(cn);
+
+ return 0;
} else {
return -EAGAIN;
}
}
-static int assign_guc_id(struct intel_guc *guc, u16 *out)
+static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce)
{
int ret;
- lockdep_assert_held(&guc->contexts_lock);
+ lockdep_assert_held(&guc->submission_state.lock);
+ GEM_BUG_ON(intel_context_is_child(ce));
- ret = new_guc_id(guc);
+ ret = new_guc_id(guc, ce);
if (unlikely(ret < 0)) {
- ret = steal_guc_id(guc);
+ if (intel_context_is_parent(ce))
+ return -ENOSPC;
+
+ ret = steal_guc_id(guc, ce);
if (ret < 0)
return ret;
}
- *out = ret;
+ if (intel_context_is_parent(ce)) {
+ struct intel_context *child;
+ int i = 1;
+
+ for_each_child(ce, child)
+ child->guc_id.id = ce->guc_id.id + i++;
+ }
+
return 0;
}
@@ -1295,12 +1713,12 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
try_again:
- spin_lock_irqsave(&guc->contexts_lock, flags);
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
might_lock(&ce->guc_state.lock);
if (context_guc_id_invalid(ce)) {
- ret = assign_guc_id(guc, &ce->guc_id.id);
+ ret = assign_guc_id(guc, ce);
if (ret)
goto out_unlock;
ret = 1; /* Indidcates newly assigned guc_id */
@@ -1310,7 +1728,7 @@ try_again:
atomic_inc(&ce->guc_id.ref);
out_unlock:
- spin_unlock_irqrestore(&guc->contexts_lock, flags);
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
/*
* -EAGAIN indicates no guc_id are available, let's retire any
@@ -1342,15 +1760,42 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
unsigned long flags;
GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0);
+ GEM_BUG_ON(intel_context_is_child(ce));
- if (unlikely(context_guc_id_invalid(ce)))
+ if (unlikely(context_guc_id_invalid(ce) ||
+ intel_context_is_parent(ce)))
return;
- spin_lock_irqsave(&guc->contexts_lock, flags);
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) &&
!atomic_read(&ce->guc_id.ref))
- list_add_tail(&ce->guc_id.link, &guc->guc_id_list);
- spin_unlock_irqrestore(&guc->contexts_lock, flags);
+ list_add_tail(&ce->guc_id.link,
+ &guc->submission_state.guc_id_list);
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+}
+
+static int __guc_action_register_multi_lrc(struct intel_guc *guc,
+ struct intel_context *ce,
+ u32 guc_id,
+ u32 offset,
+ bool loop)
+{
+ struct intel_context *child;
+ u32 action[4 + MAX_ENGINE_INSTANCE];
+ int len = 0;
+
+ GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
+
+ action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
+ action[len++] = guc_id;
+ action[len++] = ce->parallel.number_children + 1;
+ action[len++] = offset;
+ for_each_child(ce, child) {
+ offset += sizeof(struct guc_lrc_desc);
+ action[len++] = offset;
+ }
+
+ return guc_submission_send_busy_loop(guc, action, len, 0, loop);
}
static int __guc_action_register_context(struct intel_guc *guc,
@@ -1375,9 +1820,15 @@ static int register_context(struct intel_context *ce, bool loop)
ce->guc_id.id * sizeof(struct guc_lrc_desc);
int ret;
+ GEM_BUG_ON(intel_context_is_child(ce));
trace_intel_context_register(ce);
- ret = __guc_action_register_context(guc, ce->guc_id.id, offset, loop);
+ if (intel_context_is_parent(ce))
+ ret = __guc_action_register_multi_lrc(guc, ce, ce->guc_id.id,
+ offset, loop);
+ else
+ ret = __guc_action_register_context(guc, ce->guc_id.id, offset,
+ loop);
if (likely(!ret)) {
unsigned long flags;
@@ -1406,26 +1857,31 @@ static int deregister_context(struct intel_context *ce, u32 guc_id)
{
struct intel_guc *guc = ce_to_guc(ce);
+ GEM_BUG_ON(intel_context_is_child(ce));
trace_intel_context_deregister(ce);
return __guc_action_deregister_context(guc, guc_id);
}
-static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask)
+static inline void clear_children_join_go_memory(struct intel_context *ce)
{
- switch (class) {
- case RENDER_CLASS:
- return mask >> RCS0;
- case VIDEO_ENHANCEMENT_CLASS:
- return mask >> VECS0;
- case VIDEO_DECODE_CLASS:
- return mask >> VCS0;
- case COPY_ENGINE_CLASS:
- return mask >> BCS0;
- default:
- MISSING_CASE(class);
- return 0;
- }
+ struct parent_scratch *ps = __get_parent_scratch(ce);
+ int i;
+
+ ps->go.semaphore = 0;
+ for (i = 0; i < ce->parallel.number_children + 1; ++i)
+ ps->join[i].semaphore = 0;
+}
+
+static inline u32 get_children_go_value(struct intel_context *ce)
+{
+ return __get_parent_scratch(ce)->go.semaphore;
+}
+
+static inline u32 get_children_join_value(struct intel_context *ce,
+ u8 child_index)
+{
+ return __get_parent_scratch(ce)->join[child_index].semaphore;
}
static void guc_context_policy_init(struct intel_engine_cs *engine,
@@ -1450,6 +1906,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
struct guc_lrc_desc *desc;
bool context_registered;
intel_wakeref_t wakeref;
+ struct intel_context *child;
int ret = 0;
GEM_BUG_ON(!engine->mask);
@@ -1469,14 +1926,50 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
desc = __get_lrc_desc(guc, desc_idx);
desc->engine_class = engine_class_to_guc_class(engine->class);
- desc->engine_submit_mask = adjust_engine_mask(engine->class,
- engine->mask);
+ desc->engine_submit_mask = engine->logical_mask;
desc->hw_context_desc = ce->lrc.lrca;
desc->priority = ce->guc_state.prio;
desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
guc_context_policy_init(engine, desc);
/*
+ * If context is a parent, we need to register a process descriptor
+ * describing a work queue and register all child contexts.
+ */
+ if (intel_context_is_parent(ce)) {
+ struct guc_process_desc *pdesc;
+
+ ce->parallel.guc.wqi_tail = 0;
+ ce->parallel.guc.wqi_head = 0;
+
+ desc->process_desc = i915_ggtt_offset(ce->state) +
+ __get_parent_scratch_offset(ce);
+ desc->wq_addr = i915_ggtt_offset(ce->state) +
+ __get_wq_offset(ce);
+ desc->wq_size = WQ_SIZE;
+
+ pdesc = __get_process_desc(ce);
+ memset(pdesc, 0, sizeof(*(pdesc)));
+ pdesc->stage_id = ce->guc_id.id;
+ pdesc->wq_base_addr = desc->wq_addr;
+ pdesc->wq_size_bytes = desc->wq_size;
+ pdesc->wq_status = WQ_STATUS_ACTIVE;
+
+ for_each_child(ce, child) {
+ desc = __get_lrc_desc(guc, child->guc_id.id);
+
+ desc->engine_class =
+ engine_class_to_guc_class(engine->class);
+ desc->hw_context_desc = child->lrc.lrca;
+ desc->priority = ce->guc_state.prio;
+ desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
+ guc_context_policy_init(engine, desc);
+ }
+
+ clear_children_join_go_memory(ce);
+ }
+
+ /*
* The context_lookup xarray is used to determine if the hardware
* context is currently registered. There are two cases in which it
* could be registered either the guc_id has been stolen from another
@@ -1559,7 +2052,12 @@ static int guc_context_pre_pin(struct intel_context *ce,
static int guc_context_pin(struct intel_context *ce, void *vaddr)
{
- return __guc_context_pin(ce, ce->engine, vaddr);
+ int ret = __guc_context_pin(ce, ce->engine, vaddr);
+
+ if (likely(!ret && !intel_context_is_barrier(ce)))
+ intel_engine_pm_get(ce->engine);
+
+ return ret;
}
static void guc_context_unpin(struct intel_context *ce)
@@ -1568,6 +2066,9 @@ static void guc_context_unpin(struct intel_context *ce)
unpin_guc_id(guc, ce);
lrc_unpin(ce);
+
+ if (likely(!intel_context_is_barrier(ce)))
+ intel_engine_pm_put_async(ce->engine);
}
static void guc_context_post_unpin(struct intel_context *ce)
@@ -1602,6 +2103,7 @@ static void __guc_context_sched_disable(struct intel_guc *guc,
GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID);
+ GEM_BUG_ON(intel_context_is_child(ce));
trace_intel_context_sched_disable(ce);
guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
@@ -1653,6 +2155,8 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
u16 guc_id;
bool enabled;
+ GEM_BUG_ON(intel_context_is_child(ce));
+
spin_lock_irqsave(&ce->guc_state.lock, flags);
incr_context_blocked(ce);
@@ -1707,6 +2211,7 @@ static void guc_context_unblock(struct intel_context *ce)
bool enable;
GEM_BUG_ON(context_enabled(ce));
+ GEM_BUG_ON(intel_context_is_child(ce));
spin_lock_irqsave(&ce->guc_state.lock, flags);
@@ -1733,11 +2238,14 @@ static void guc_context_unblock(struct intel_context *ce)
static void guc_context_cancel_request(struct intel_context *ce,
struct i915_request *rq)
{
+ struct intel_context *block_context =
+ request_to_scheduling_context(rq);
+
if (i915_sw_fence_signaled(&rq->submit)) {
struct i915_sw_fence *fence;
intel_context_get(ce);
- fence = guc_context_block(ce);
+ fence = guc_context_block(block_context);
i915_sw_fence_wait(fence);
if (!i915_request_completed(rq)) {
__i915_request_skip(rq);
@@ -1751,7 +2259,7 @@ static void guc_context_cancel_request(struct intel_context *ce,
*/
flush_work(&ce_to_guc(ce)->ct.requests.worker);
- guc_context_unblock(ce);
+ guc_context_unblock(block_context);
intel_context_put(ce);
}
}
@@ -1777,6 +2285,8 @@ static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
intel_wakeref_t wakeref;
unsigned long flags;
+ GEM_BUG_ON(intel_context_is_child(ce));
+
guc_flush_submissions(guc);
spin_lock_irqsave(&ce->guc_state.lock, flags);
@@ -1827,6 +2337,8 @@ static void guc_context_sched_disable(struct intel_context *ce)
intel_wakeref_t wakeref;
u16 guc_id;
+ GEM_BUG_ON(intel_context_is_child(ce));
+
spin_lock_irqsave(&ce->guc_state.lock, flags);
/*
@@ -1857,11 +2369,30 @@ unpin:
static inline void guc_lrc_desc_unpin(struct intel_context *ce)
{
struct intel_guc *guc = ce_to_guc(ce);
+ struct intel_gt *gt = guc_to_gt(guc);
+ unsigned long flags;
+ bool disabled;
+ GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id));
GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
GEM_BUG_ON(context_enabled(ce));
+ /* Seal race with Reset */
+ spin_lock_irqsave(&ce->guc_state.lock, flags);
+ disabled = submission_disabled(guc);
+ if (likely(!disabled)) {
+ __intel_gt_pm_get(gt);
+ set_context_destroyed(ce);
+ clr_context_registered(ce);
+ }
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+ if (unlikely(disabled)) {
+ release_guc_id(guc, ce);
+ __guc_context_destroy(ce);
+ return;
+ }
+
deregister_context(ce, ce->guc_id.id);
}
@@ -1889,78 +2420,86 @@ static void __guc_context_destroy(struct intel_context *ce)
}
}
+static void guc_flush_destroyed_contexts(struct intel_guc *guc)
+{
+ struct intel_context *ce, *cn;
+ unsigned long flags;
+
+ GEM_BUG_ON(!submission_disabled(guc) &&
+ guc_submission_initialized(guc));
+
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
+ list_for_each_entry_safe(ce, cn,
+ &guc->submission_state.destroyed_contexts,
+ destroyed_link) {
+ list_del_init(&ce->destroyed_link);
+ __release_guc_id(guc, ce);
+ __guc_context_destroy(ce);
+ }
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+}
+
+static void deregister_destroyed_contexts(struct intel_guc *guc)
+{
+ struct intel_context *ce, *cn;
+ unsigned long flags;
+
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
+ list_for_each_entry_safe(ce, cn,
+ &guc->submission_state.destroyed_contexts,
+ destroyed_link) {
+ list_del_init(&ce->destroyed_link);
+ guc_lrc_desc_unpin(ce);
+ }
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+}
+
+static void destroyed_worker_func(struct work_struct *w)
+{
+ struct intel_guc *guc = container_of(w, struct intel_guc,
+ submission_state.destroyed_worker);
+ struct intel_gt *gt = guc_to_gt(guc);
+ int tmp;
+
+ with_intel_gt_pm(gt, tmp)
+ deregister_destroyed_contexts(guc);
+}
+
static void guc_context_destroy(struct kref *kref)
{
struct intel_context *ce = container_of(kref, typeof(*ce), ref);
- struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
struct intel_guc *guc = ce_to_guc(ce);
- intel_wakeref_t wakeref;
unsigned long flags;
- bool disabled;
+ bool destroy;
/*
* If the guc_id is invalid this context has been stolen and we can free
* it immediately. Also can be freed immediately if the context is not
* registered with the GuC or the GuC is in the middle of a reset.
*/
- if (context_guc_id_invalid(ce)) {
- __guc_context_destroy(ce);
- return;
- } else if (submission_disabled(guc) ||
- !lrc_desc_registered(guc, ce->guc_id.id)) {
- release_guc_id(guc, ce);
- __guc_context_destroy(ce);
- return;
- }
-
- /*
- * We have to acquire the context spinlock and check guc_id again, if it
- * is valid it hasn't been stolen and needs to be deregistered. We
- * delete this context from the list of unpinned guc_id available to
- * steal to seal a race with guc_lrc_desc_pin(). When the G2H CTB
- * returns indicating this context has been deregistered the guc_id is
- * returned to the pool of available guc_id.
- */
- spin_lock_irqsave(&guc->contexts_lock, flags);
- if (context_guc_id_invalid(ce)) {
- spin_unlock_irqrestore(&guc->contexts_lock, flags);
- __guc_context_destroy(ce);
- return;
- }
-
- if (!list_empty(&ce->guc_id.link))
- list_del_init(&ce->guc_id.link);
- spin_unlock_irqrestore(&guc->contexts_lock, flags);
-
- /* Seal race with Reset */
- spin_lock_irqsave(&ce->guc_state.lock, flags);
- disabled = submission_disabled(guc);
- if (likely(!disabled)) {
- set_context_destroyed(ce);
- clr_context_registered(ce);
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
+ destroy = submission_disabled(guc) || context_guc_id_invalid(ce) ||
+ !lrc_desc_registered(guc, ce->guc_id.id);
+ if (likely(!destroy)) {
+ if (!list_empty(&ce->guc_id.link))
+ list_del_init(&ce->guc_id.link);
+ list_add_tail(&ce->destroyed_link,
+ &guc->submission_state.destroyed_contexts);
+ } else {
+ __release_guc_id(guc, ce);
}
- spin_unlock_irqrestore(&ce->guc_state.lock, flags);
- if (unlikely(disabled)) {
- release_guc_id(guc, ce);
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+ if (unlikely(destroy)) {
__guc_context_destroy(ce);
return;
}
/*
- * We defer GuC context deregistration until the context is destroyed
- * in order to save on CTBs. With this optimization ideally we only need
- * 1 CTB to register the context during the first pin and 1 CTB to
- * deregister the context when the context is destroyed. Without this
- * optimization, a CTB would be needed every pin & unpin.
- *
- * XXX: Need to acqiure the runtime wakeref as this can be triggered
- * from context_free_worker when runtime wakeref is not held.
- * guc_lrc_desc_unpin requires the runtime as a GuC register is written
- * in H2G CTB to deregister the context. A future patch may defer this
- * H2G CTB if the runtime wakeref is zero.
+ * We use a worker to issue the H2G to deregister the context as we can
+ * take the GT PM for the first time which isn't allowed from an atomic
+ * context.
*/
- with_intel_runtime_pm(runtime_pm, wakeref)
- guc_lrc_desc_unpin(ce);
+ queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker);
}
static int guc_context_alloc(struct intel_context *ce)
@@ -2056,9 +2595,10 @@ static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio)
static void add_to_context(struct i915_request *rq)
{
- struct intel_context *ce = rq->context;
+ struct intel_context *ce = request_to_scheduling_context(rq);
u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq));
+ GEM_BUG_ON(intel_context_is_child(ce));
GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI);
spin_lock(&ce->guc_state.lock);
@@ -2091,7 +2631,9 @@ static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce)
static void remove_from_context(struct i915_request *rq)
{
- struct intel_context *ce = rq->context;
+ struct intel_context *ce = request_to_scheduling_context(rq);
+
+ GEM_BUG_ON(intel_context_is_child(ce));
spin_lock_irq(&ce->guc_state.lock);
@@ -2132,6 +2674,7 @@ static const struct intel_context_ops guc_context_ops = {
.destroy = guc_context_destroy,
.create_virtual = guc_create_virtual,
+ .create_parallel = guc_create_parallel,
};
static void submit_work_cb(struct irq_work *wrk)
@@ -2168,6 +2711,8 @@ static void guc_signal_context_fence(struct intel_context *ce)
{
unsigned long flags;
+ GEM_BUG_ON(intel_context_is_child(ce));
+
spin_lock_irqsave(&ce->guc_state.lock, flags);
clr_context_wait_for_deregister_to_register(ce);
__guc_signal_context_fence(ce);
@@ -2198,7 +2743,7 @@ static void guc_context_init(struct intel_context *ce)
static int guc_request_alloc(struct i915_request *rq)
{
- struct intel_context *ce = rq->context;
+ struct intel_context *ce = request_to_scheduling_context(rq);
struct intel_guc *guc = ce_to_guc(ce);
unsigned long flags;
int ret;
@@ -2302,8 +2847,30 @@ static int guc_virtual_context_pre_pin(struct intel_context *ce,
static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr)
{
struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
+ int ret = __guc_context_pin(ce, engine, vaddr);
+ intel_engine_mask_t tmp, mask = ce->engine->mask;
- return __guc_context_pin(ce, engine, vaddr);
+ if (likely(!ret))
+ for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
+ intel_engine_pm_get(engine);
+
+ return ret;
+}
+
+static void guc_virtual_context_unpin(struct intel_context *ce)
+{
+ intel_engine_mask_t tmp, mask = ce->engine->mask;
+ struct intel_engine_cs *engine;
+ struct intel_guc *guc = ce_to_guc(ce);
+
+ GEM_BUG_ON(context_enabled(ce));
+ GEM_BUG_ON(intel_context_is_barrier(ce));
+
+ unpin_guc_id(guc, ce);
+ lrc_unpin(ce);
+
+ for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
+ intel_engine_pm_put_async(engine);
}
static void guc_virtual_context_enter(struct intel_context *ce)
@@ -2340,7 +2907,98 @@ static const struct intel_context_ops virtual_guc_context_ops = {
.pre_pin = guc_virtual_context_pre_pin,
.pin = guc_virtual_context_pin,
- .unpin = guc_context_unpin,
+ .unpin = guc_virtual_context_unpin,
+ .post_unpin = guc_context_post_unpin,
+
+ .ban = guc_context_ban,
+
+ .cancel_request = guc_context_cancel_request,
+
+ .enter = guc_virtual_context_enter,
+ .exit = guc_virtual_context_exit,
+
+ .sched_disable = guc_context_sched_disable,
+
+ .destroy = guc_context_destroy,
+
+ .get_sibling = guc_virtual_get_sibling,
+};
+
+static int guc_parent_context_pin(struct intel_context *ce, void *vaddr)
+{
+ struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
+ struct intel_guc *guc = ce_to_guc(ce);
+ int ret;
+
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+ GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
+
+ ret = pin_guc_id(guc, ce);
+ if (unlikely(ret < 0))
+ return ret;
+
+ return __guc_context_pin(ce, engine, vaddr);
+}
+
+static int guc_child_context_pin(struct intel_context *ce, void *vaddr)
+{
+ struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
+
+ GEM_BUG_ON(!intel_context_is_child(ce));
+ GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
+
+ __intel_context_pin(ce->parallel.parent);
+ return __guc_context_pin(ce, engine, vaddr);
+}
+
+static void guc_parent_context_unpin(struct intel_context *ce)
+{
+ struct intel_guc *guc = ce_to_guc(ce);
+
+ GEM_BUG_ON(context_enabled(ce));
+ GEM_BUG_ON(intel_context_is_barrier(ce));
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+ GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
+
+ if (ce->parallel.last_rq)
+ i915_request_put(ce->parallel.last_rq);
+ unpin_guc_id(guc, ce);
+ lrc_unpin(ce);
+}
+
+static void guc_child_context_unpin(struct intel_context *ce)
+{
+ GEM_BUG_ON(context_enabled(ce));
+ GEM_BUG_ON(intel_context_is_barrier(ce));
+ GEM_BUG_ON(!intel_context_is_child(ce));
+ GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
+
+ lrc_unpin(ce);
+}
+
+static void guc_child_context_post_unpin(struct intel_context *ce)
+{
+ GEM_BUG_ON(!intel_context_is_child(ce));
+ GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent));
+ GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
+
+ lrc_post_unpin(ce);
+ intel_context_unpin(ce->parallel.parent);
+}
+
+static void guc_child_context_destroy(struct kref *kref)
+{
+ struct intel_context *ce = container_of(kref, typeof(*ce), ref);
+
+ __guc_context_destroy(ce);
+}
+
+static const struct intel_context_ops virtual_parent_context_ops = {
+ .alloc = guc_virtual_context_alloc,
+
+ .pre_pin = guc_context_pre_pin,
+ .pin = guc_parent_context_pin,
+ .unpin = guc_parent_context_unpin,
.post_unpin = guc_context_post_unpin,
.ban = guc_context_ban,
@@ -2357,6 +3015,110 @@ static const struct intel_context_ops virtual_guc_context_ops = {
.get_sibling = guc_virtual_get_sibling,
};
+static const struct intel_context_ops virtual_child_context_ops = {
+ .alloc = guc_virtual_context_alloc,
+
+ .pre_pin = guc_context_pre_pin,
+ .pin = guc_child_context_pin,
+ .unpin = guc_child_context_unpin,
+ .post_unpin = guc_child_context_post_unpin,
+
+ .cancel_request = guc_context_cancel_request,
+
+ .enter = guc_virtual_context_enter,
+ .exit = guc_virtual_context_exit,
+
+ .destroy = guc_child_context_destroy,
+
+ .get_sibling = guc_virtual_get_sibling,
+};
+
+/*
+ * The below override of the breadcrumbs is enabled when the user configures a
+ * context for parallel submission (multi-lrc, parent-child).
+ *
+ * The overridden breadcrumbs implements an algorithm which allows the GuC to
+ * safely preempt all the hw contexts configured for parallel submission
+ * between each BB. The contract between the i915 and GuC is if the parent
+ * context can be preempted, all the children can be preempted, and the GuC will
+ * always try to preempt the parent before the children. A handshake between the
+ * parent / children breadcrumbs ensures the i915 holds up its end of the deal
+ * creating a window to preempt between each set of BBs.
+ */
+static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags);
+static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags);
+static u32 *
+emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
+ u32 *cs);
+static u32 *
+emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
+ u32 *cs);
+
+static struct intel_context *
+guc_create_parallel(struct intel_engine_cs **engines,
+ unsigned int num_siblings,
+ unsigned int width)
+{
+ struct intel_engine_cs **siblings = NULL;
+ struct intel_context *parent = NULL, *ce, *err;
+ int i, j;
+
+ siblings = kmalloc_array(num_siblings,
+ sizeof(*siblings),
+ GFP_KERNEL);
+ if (!siblings)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; i < width; ++i) {
+ for (j = 0; j < num_siblings; ++j)
+ siblings[j] = engines[i * num_siblings + j];
+
+ ce = intel_engine_create_virtual(siblings, num_siblings,
+ FORCE_VIRTUAL);
+ if (!ce) {
+ err = ERR_PTR(-ENOMEM);
+ goto unwind;
+ }
+
+ if (i == 0) {
+ parent = ce;
+ parent->ops = &virtual_parent_context_ops;
+ } else {
+ ce->ops = &virtual_child_context_ops;
+ intel_context_bind_parent_child(parent, ce);
+ }
+ }
+
+ parent->parallel.fence_context = dma_fence_context_alloc(1);
+
+ parent->engine->emit_bb_start =
+ emit_bb_start_parent_no_preempt_mid_batch;
+ parent->engine->emit_fini_breadcrumb =
+ emit_fini_breadcrumb_parent_no_preempt_mid_batch;
+ parent->engine->emit_fini_breadcrumb_dw =
+ 12 + 4 * parent->parallel.number_children;
+ for_each_child(parent, ce) {
+ ce->engine->emit_bb_start =
+ emit_bb_start_child_no_preempt_mid_batch;
+ ce->engine->emit_fini_breadcrumb =
+ emit_fini_breadcrumb_child_no_preempt_mid_batch;
+ ce->engine->emit_fini_breadcrumb_dw = 16;
+ }
+
+ kfree(siblings);
+ return parent;
+
+unwind:
+ if (parent)
+ intel_context_put(parent);
+ kfree(siblings);
+ return err;
+}
+
static bool
guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b)
{
@@ -2416,7 +3178,7 @@ static void guc_init_breadcrumbs(struct intel_engine_cs *engine)
static void guc_bump_inflight_request_prio(struct i915_request *rq,
int prio)
{
- struct intel_context *ce = rq->context;
+ struct intel_context *ce = request_to_scheduling_context(rq);
u8 new_guc_prio = map_i915_prio_to_guc_prio(prio);
/* Short circuit function */
@@ -2439,7 +3201,7 @@ static void guc_bump_inflight_request_prio(struct i915_request *rq,
static void guc_retire_inflight_request_prio(struct i915_request *rq)
{
- struct intel_context *ce = rq->context;
+ struct intel_context *ce = request_to_scheduling_context(rq);
spin_lock(&ce->guc_state.lock);
guc_prio_fini(rq, ce);
@@ -2753,6 +3515,12 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx)
return NULL;
}
+ if (unlikely(intel_context_is_child(ce))) {
+ drm_err(&guc_to_gt(guc)->i915->drm,
+ "Context is child, desc_idx %u", desc_idx);
+ return NULL;
+ }
+
return ce;
}
@@ -2796,6 +3564,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
intel_context_put(ce);
} else if (context_destroyed(ce)) {
/* Context has been destroyed */
+ intel_gt_pm_put_async(guc_to_gt(guc));
release_guc_id(guc, ce);
__guc_context_destroy(ce);
}
@@ -3122,6 +3891,25 @@ static inline void guc_log_context_priority(struct drm_printer *p,
drm_printf(p, "\n");
}
+static inline void guc_log_context(struct drm_printer *p,
+ struct intel_context *ce)
+{
+ drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
+ drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
+ drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
+ ce->ring->head,
+ ce->lrc_reg_state[CTX_RING_HEAD]);
+ drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
+ ce->ring->tail,
+ ce->lrc_reg_state[CTX_RING_TAIL]);
+ drm_printf(p, "\t\tContext Pin Count: %u\n",
+ atomic_read(&ce->pin_count));
+ drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
+ atomic_read(&ce->guc_id.ref));
+ drm_printf(p, "\t\tSchedule State: 0x%x\n\n",
+ ce->guc_state.sched_state);
+}
+
void intel_guc_submission_print_context_info(struct intel_guc *guc,
struct drm_printer *p)
{
@@ -3131,28 +3919,310 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc,
xa_lock_irqsave(&guc->context_lookup, flags);
xa_for_each(&guc->context_lookup, index, ce) {
- drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
- drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
- drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
- ce->ring->head,
- ce->lrc_reg_state[CTX_RING_HEAD]);
- drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
- ce->ring->tail,
- ce->lrc_reg_state[CTX_RING_TAIL]);
- drm_printf(p, "\t\tContext Pin Count: %u\n",
- atomic_read(&ce->pin_count));
- drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
- atomic_read(&ce->guc_id.ref));
- drm_printf(p, "\t\tSchedule State: 0x%x\n\n",
- ce->guc_state.sched_state);
+ GEM_BUG_ON(intel_context_is_child(ce));
+ guc_log_context(p, ce);
guc_log_context_priority(p, ce);
+
+ if (intel_context_is_parent(ce)) {
+ struct guc_process_desc *desc = __get_process_desc(ce);
+ struct intel_context *child;
+
+ drm_printf(p, "\t\tNumber children: %u\n",
+ ce->parallel.number_children);
+ drm_printf(p, "\t\tWQI Head: %u\n",
+ READ_ONCE(desc->head));
+ drm_printf(p, "\t\tWQI Tail: %u\n",
+ READ_ONCE(desc->tail));
+ drm_printf(p, "\t\tWQI Status: %u\n\n",
+ READ_ONCE(desc->wq_status));
+
+ if (ce->engine->emit_bb_start ==
+ emit_bb_start_parent_no_preempt_mid_batch) {
+ u8 i;
+
+ drm_printf(p, "\t\tChildren Go: %u\n\n",
+ get_children_go_value(ce));
+ for (i = 0; i < ce->parallel.number_children; ++i)
+ drm_printf(p, "\t\tChildren Join: %u\n",
+ get_children_join_value(ce, i));
+ }
+
+ for_each_child(ce, child)
+ guc_log_context(p, child);
+ }
}
xa_unlock_irqrestore(&guc->context_lookup, flags);
}
+static inline u32 get_children_go_addr(struct intel_context *ce)
+{
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ return i915_ggtt_offset(ce->state) +
+ __get_parent_scratch_offset(ce) +
+ offsetof(struct parent_scratch, go.semaphore);
+}
+
+static inline u32 get_children_join_addr(struct intel_context *ce,
+ u8 child_index)
+{
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ return i915_ggtt_offset(ce->state) +
+ __get_parent_scratch_offset(ce) +
+ offsetof(struct parent_scratch, join[child_index].semaphore);
+}
+
+#define PARENT_GO_BB 1
+#define PARENT_GO_FINI_BREADCRUMB 0
+#define CHILD_GO_BB 1
+#define CHILD_GO_FINI_BREADCRUMB 0
+static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags)
+{
+ struct intel_context *ce = rq->context;
+ u32 *cs;
+ u8 i;
+
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /* Wait on children */
+ for (i = 0; i < ce->parallel.number_children; ++i) {
+ *cs++ = (MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD);
+ *cs++ = PARENT_GO_BB;
+ *cs++ = get_children_join_addr(ce, i);
+ *cs++ = 0;
+ }
+
+ /* Turn off preemption */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+ *cs++ = MI_NOOP;
+
+ /* Tell children go */
+ cs = gen8_emit_ggtt_write(cs,
+ CHILD_GO_BB,
+ get_children_go_addr(ce),
+ 0);
+
+ /* Jump to batch */
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 |
+ (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
+ *cs++ = lower_32_bits(offset);
+ *cs++ = upper_32_bits(offset);
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags)
+{
+ struct intel_context *ce = rq->context;
+ struct intel_context *parent = intel_context_to_parent(ce);
+ u32 *cs;
+
+ GEM_BUG_ON(!intel_context_is_child(ce));
+
+ cs = intel_ring_begin(rq, 12);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /* Signal parent */
+ cs = gen8_emit_ggtt_write(cs,
+ PARENT_GO_BB,
+ get_children_join_addr(parent,
+ ce->parallel.child_index),
+ 0);
+
+ /* Wait on parent for go */
+ *cs++ = (MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD);
+ *cs++ = CHILD_GO_BB;
+ *cs++ = get_children_go_addr(parent);
+ *cs++ = 0;
+
+ /* Turn off preemption */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+ /* Jump to batch */
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 |
+ (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
+ *cs++ = lower_32_bits(offset);
+ *cs++ = upper_32_bits(offset);
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static u32 *
+__emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
+ u32 *cs)
+{
+ struct intel_context *ce = rq->context;
+ u8 i;
+
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ /* Wait on children */
+ for (i = 0; i < ce->parallel.number_children; ++i) {
+ *cs++ = (MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD);
+ *cs++ = PARENT_GO_FINI_BREADCRUMB;
+ *cs++ = get_children_join_addr(ce, i);
+ *cs++ = 0;
+ }
+
+ /* Turn on preemption */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ *cs++ = MI_NOOP;
+
+ /* Tell children go */
+ cs = gen8_emit_ggtt_write(cs,
+ CHILD_GO_FINI_BREADCRUMB,
+ get_children_go_addr(ce),
+ 0);
+
+ return cs;
+}
+
+/*
+ * If this true, a submission of multi-lrc requests had an error and the
+ * requests need to be skipped. The front end (execuf IOCTL) should've called
+ * i915_request_skip which squashes the BB but we still need to emit the fini
+ * breadrcrumbs seqno write. At this point we don't know how many of the
+ * requests in the multi-lrc submission were generated so we can't do the
+ * handshake between the parent and children (e.g. if 4 requests should be
+ * generated but 2nd hit an error only 1 would be seen by the GuC backend).
+ * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error
+ * has occurred on any of the requests in submission / relationship.
+ */
+static inline bool skip_handshake(struct i915_request *rq)
+{
+ return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
+}
+
+static u32 *
+emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
+ u32 *cs)
+{
+ struct intel_context *ce = rq->context;
+
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ if (unlikely(skip_handshake(rq))) {
+ /*
+ * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch,
+ * the -6 comes from the length of the emits below.
+ */
+ memset(cs, 0, sizeof(u32) *
+ (ce->engine->emit_fini_breadcrumb_dw - 6));
+ cs += ce->engine->emit_fini_breadcrumb_dw - 6;
+ } else {
+ cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
+ }
+
+ /* Emit fini breadcrumb */
+ cs = gen8_emit_ggtt_write(cs,
+ rq->fence.seqno,
+ i915_request_active_timeline(rq)->hwsp_offset,
+ 0);
+
+ /* User interrupt */
+ *cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_NOOP;
+
+ rq->tail = intel_ring_offset(rq, cs);
+
+ return cs;
+}
+
+static u32 *
+__emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
+ u32 *cs)
+{
+ struct intel_context *ce = rq->context;
+ struct intel_context *parent = intel_context_to_parent(ce);
+
+ GEM_BUG_ON(!intel_context_is_child(ce));
+
+ /* Turn on preemption */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ *cs++ = MI_NOOP;
+
+ /* Signal parent */
+ cs = gen8_emit_ggtt_write(cs,
+ PARENT_GO_FINI_BREADCRUMB,
+ get_children_join_addr(parent,
+ ce->parallel.child_index),
+ 0);
+
+ /* Wait parent on for go */
+ *cs++ = (MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD);
+ *cs++ = CHILD_GO_FINI_BREADCRUMB;
+ *cs++ = get_children_go_addr(parent);
+ *cs++ = 0;
+
+ return cs;
+}
+
+static u32 *
+emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
+ u32 *cs)
+{
+ struct intel_context *ce = rq->context;
+
+ GEM_BUG_ON(!intel_context_is_child(ce));
+
+ if (unlikely(skip_handshake(rq))) {
+ /*
+ * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch,
+ * the -6 comes from the length of the emits below.
+ */
+ memset(cs, 0, sizeof(u32) *
+ (ce->engine->emit_fini_breadcrumb_dw - 6));
+ cs += ce->engine->emit_fini_breadcrumb_dw - 6;
+ } else {
+ cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs);
+ }
+
+ /* Emit fini breadcrumb */
+ cs = gen8_emit_ggtt_write(cs,
+ rq->fence.seqno,
+ i915_request_active_timeline(rq)->hwsp_offset,
+ 0);
+
+ /* User interrupt */
+ *cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_NOOP;
+
+ rq->tail = intel_ring_offset(rq, cs);
+
+ return cs;
+}
+
static struct intel_context *
-guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
+guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
+ unsigned long flags)
{
struct guc_virtual_engine *ve;
struct intel_guc *guc;
@@ -3201,6 +4271,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
}
ve->base.mask |= sibling->mask;
+ ve->base.logical_mask |= sibling->logical_mask;
if (n != 0 && ve->base.class != sibling->class) {
DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
@@ -3259,4 +4330,5 @@ bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_guc.c"
+#include "selftest_guc_multi_lrc.c"
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
new file mode 100644
index 000000000000..50953c8e8b53
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright �� 2019 Intel Corporation
+ */
+
+#include "selftests/igt_spinner.h"
+#include "selftests/igt_reset.h"
+#include "selftests/intel_scheduler_helpers.h"
+#include "gt/intel_engine_heartbeat.h"
+#include "gem/selftests/mock_context.h"
+
+static void logical_sort(struct intel_engine_cs **engines, int num_engines)
+{
+ struct intel_engine_cs *sorted[MAX_ENGINE_INSTANCE + 1];
+ int i, j;
+
+ for (i = 0; i < num_engines; ++i)
+ for (j = 0; j < MAX_ENGINE_INSTANCE + 1; ++j) {
+ if (engines[j]->logical_mask & BIT(i)) {
+ sorted[i] = engines[j];
+ break;
+ }
+ }
+
+ memcpy(*engines, *sorted,
+ sizeof(struct intel_engine_cs *) * num_engines);
+}
+
+static struct intel_context *
+multi_lrc_create_parent(struct intel_gt *gt, u8 class,
+ unsigned long flags)
+{
+ struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int i = 0;
+
+ for_each_engine(engine, gt, id) {
+ if (engine->class != class)
+ continue;
+
+ siblings[i++] = engine;
+ }
+
+ if (i <= 1)
+ return ERR_PTR(0);
+
+ logical_sort(siblings, i);
+
+ return intel_engine_create_parallel(siblings, 1, i);
+}
+
+static void multi_lrc_context_unpin(struct intel_context *ce)
+{
+ struct intel_context *child;
+
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ for_each_child(ce, child)
+ intel_context_unpin(child);
+ intel_context_unpin(ce);
+}
+
+static void multi_lrc_context_put(struct intel_context *ce)
+{
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ /*
+ * Only the parent gets the creation ref put in the uAPI, the parent
+ * itself is responsible for creation ref put on the children.
+ */
+ intel_context_put(ce);
+}
+
+static struct i915_request *
+multi_lrc_nop_request(struct intel_context *ce)
+{
+ struct intel_context *child;
+ struct i915_request *rq, *child_rq;
+ int i = 0;
+
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return rq;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ for_each_child(ce, child) {
+ child_rq = intel_context_create_request(child);
+ if (IS_ERR(child_rq))
+ goto child_error;
+
+ if (++i == ce->parallel.number_children)
+ set_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL,
+ &child_rq->fence.flags);
+ i915_request_add(child_rq);
+ }
+
+ return rq;
+
+child_error:
+ i915_request_put(rq);
+
+ return ERR_PTR(-ENOMEM);
+}
+
+static int __intel_guc_multi_lrc_basic(struct intel_gt *gt, unsigned int class)
+{
+ struct intel_context *parent;
+ struct i915_request *rq;
+ int ret;
+
+ parent = multi_lrc_create_parent(gt, class, 0);
+ if (IS_ERR(parent)) {
+ pr_err("Failed creating contexts: %ld", PTR_ERR(parent));
+ return PTR_ERR(parent);
+ } else if (!parent) {
+ pr_debug("Not enough engines in class: %d", class);
+ return 0;
+ }
+
+ rq = multi_lrc_nop_request(parent);
+ if (IS_ERR(rq)) {
+ ret = PTR_ERR(rq);
+ pr_err("Failed creating requests: %d", ret);
+ goto out;
+ }
+
+ ret = intel_selftest_wait_for_rq(rq);
+ if (ret)
+ pr_err("Failed waiting on request: %d", ret);
+
+ i915_request_put(rq);
+
+ if (ret >= 0) {
+ ret = intel_gt_wait_for_idle(gt, HZ * 5);
+ if (ret < 0)
+ pr_err("GT failed to idle: %d\n", ret);
+ }
+
+out:
+ multi_lrc_context_unpin(parent);
+ multi_lrc_context_put(parent);
+ return ret;
+}
+
+static int intel_guc_multi_lrc_basic(void *arg)
+{
+ struct intel_gt *gt = arg;
+ unsigned int class;
+ int ret;
+
+ for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
+ ret = __intel_guc_multi_lrc_basic(gt, class);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+int intel_guc_multi_lrc_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(intel_guc_multi_lrc_basic),
+ };
+ struct intel_gt *gt = &i915->gt;
+
+ if (intel_gt_is_wedged(gt))
+ return 0;
+
+ if (!intel_uc_uses_guc_submission(&gt->uc))
+ return 0;
+
+ return intel_gt_live_subtests(tests, gt);
+}