summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/gen2_engine_cs.c2
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.c167
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.h21
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c36
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_regs.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c153
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h79
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_defines.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_mcr.c6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_regs.h26
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c108
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c112
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ppgtt.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_region_lmem.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c37
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c85
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sa_media.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_tlb.c159
-rw-r--r--drivers/gpu/drm/i915/gt/intel_tlb.h29
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c568
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_context.c5
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_cs.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c4
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c6
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_migrate.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_mocs.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rc6.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_timeline.c4
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_tlb.c3
-rw-r--r--drivers/gpu/drm/i915/gt/shmem_utils.c3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h75
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c341
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c141
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h21
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.c39
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.h14
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c20
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h7
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c75
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h15
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c10
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c8
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c85
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c47
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c10
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c16
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c163
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc.c6
74 files changed, 1858 insertions, 1022 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c
index 1c82caf525c3..8fe0499308ff 100644
--- a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c
@@ -76,7 +76,7 @@ int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode)
cmd = MI_FLUSH;
if (mode & EMIT_INVALIDATE) {
cmd |= MI_EXE_FLUSH;
- if (IS_G4X(rq->engine->i915) || GRAPHICS_VER(rq->engine->i915) == 5)
+ if (IS_G4X(rq->i915) || GRAPHICS_VER(rq->i915) == 5)
cmd |= MI_INVALIDATE_ISP;
}
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 23857cc08eca..ba4c2422b340 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -4,9 +4,9 @@
*/
#include "gen8_engine_cs.h"
-#include "i915_drv.h"
#include "intel_engine_regs.h"
#include "intel_gpu_commands.h"
+#include "intel_gt.h"
#include "intel_lrc.h"
#include "intel_ring.h"
@@ -39,11 +39,11 @@ int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode)
* On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
* pipe control.
*/
- if (GRAPHICS_VER(rq->engine->i915) == 9)
+ if (GRAPHICS_VER(rq->i915) == 9)
vf_flush_wa = true;
/* WaForGAMHang:kbl */
- if (IS_KBL_GRAPHICS_STEP(rq->engine->i915, 0, STEP_C0))
+ if (IS_KABYLAKE(rq->i915) && IS_GRAPHICS_STEP(rq->i915, 0, STEP_C0))
dc_flush_wa = true;
}
@@ -165,14 +165,60 @@ static u32 preparser_disable(bool state)
return MI_ARB_CHECK | 1 << 8 | state;
}
-u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg)
+static i915_reg_t gen12_get_aux_inv_reg(struct intel_engine_cs *engine)
{
- u32 gsi_offset = gt->uncore->gsi_offset;
+ switch (engine->id) {
+ case RCS0:
+ return GEN12_CCS_AUX_INV;
+ case BCS0:
+ return GEN12_BCS0_AUX_INV;
+ case VCS0:
+ return GEN12_VD0_AUX_INV;
+ case VCS2:
+ return GEN12_VD2_AUX_INV;
+ case VECS0:
+ return GEN12_VE0_AUX_INV;
+ case CCS0:
+ return GEN12_CCS0_AUX_INV;
+ default:
+ return INVALID_MMIO_REG;
+ }
+}
+
+static bool gen12_needs_ccs_aux_inv(struct intel_engine_cs *engine)
+{
+ i915_reg_t reg = gen12_get_aux_inv_reg(engine);
+
+ if (IS_PONTEVECCHIO(engine->i915))
+ return false;
+
+ /*
+ * So far platforms supported by i915 having flat ccs do not require
+ * AUX invalidation. Check also whether the engine requires it.
+ */
+ return i915_mmio_reg_valid(reg) && !HAS_FLAT_CCS(engine->i915);
+}
+
+u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs)
+{
+ i915_reg_t inv_reg = gen12_get_aux_inv_reg(engine);
+ u32 gsi_offset = engine->gt->uncore->gsi_offset;
+
+ if (!gen12_needs_ccs_aux_inv(engine))
+ return cs;
*cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
*cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset;
*cs++ = AUX_INV;
- *cs++ = MI_NOOP;
+
+ *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
+ MI_SEMAPHORE_REGISTER_POLL |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD;
+ *cs++ = 0;
+ *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset;
+ *cs++ = 0;
+ *cs++ = 0;
return cs;
}
@@ -180,8 +226,8 @@ u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv
static int mtl_dummy_pipe_control(struct i915_request *rq)
{
/* Wa_14016712196 */
- if (IS_MTL_GRAPHICS_STEP(rq->engine->i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(rq->engine->i915, P, STEP_A0, STEP_B0)) {
+ if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 71)) ||
+ IS_DG2(rq->i915)) {
u32 *cs;
/* dummy PIPE_CONTROL + depth flush */
@@ -202,8 +248,13 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
{
struct intel_engine_cs *engine = rq->engine;
- if (mode & EMIT_FLUSH) {
- u32 flags = 0;
+ /*
+ * On Aux CCS platforms the invalidation of the Aux
+ * table requires quiescing memory traffic beforehand
+ */
+ if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) {
+ u32 bit_group_0 = 0;
+ u32 bit_group_1 = 0;
int err;
u32 *cs;
@@ -211,32 +262,49 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
if (err)
return err;
- flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
- flags |= PIPE_CONTROL_FLUSH_L3;
- flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
- flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
+
+ /*
+ * When required, in MTL and beyond platforms we
+ * need to set the CCS_FLUSH bit in the pipe control
+ */
+ if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
+ bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
+
+ /*
+ * L3 fabric flush is needed for AUX CCS invalidation
+ * which happens as part of pipe-control so we can
+ * ignore PIPE_CONTROL_FLUSH_L3. Also PIPE_CONTROL_FLUSH_L3
+ * deals with Protected Memory which is not needed for
+ * AUX CCS invalidation and lead to unwanted side effects.
+ */
+ if (mode & EMIT_FLUSH)
+ bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
+
+ bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+ bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+ bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
/* Wa_1409600907:tgl,adl-p */
- flags |= PIPE_CONTROL_DEPTH_STALL;
- flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
- flags |= PIPE_CONTROL_FLUSH_ENABLE;
+ bit_group_1 |= PIPE_CONTROL_DEPTH_STALL;
+ bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+ bit_group_1 |= PIPE_CONTROL_FLUSH_ENABLE;
- flags |= PIPE_CONTROL_STORE_DATA_INDEX;
- flags |= PIPE_CONTROL_QW_WRITE;
+ bit_group_1 |= PIPE_CONTROL_STORE_DATA_INDEX;
+ bit_group_1 |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_CS_STALL;
+ bit_group_1 |= PIPE_CONTROL_CS_STALL;
if (!HAS_3D_PIPELINE(engine->i915))
- flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
+ bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
else if (engine->class == COMPUTE_CLASS)
- flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
+ bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
- cs = gen12_emit_pipe_control(cs,
- PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
- flags, LRC_PPHWSP_SCRATCH_ADDR);
+ cs = gen12_emit_pipe_control(cs, bit_group_0, bit_group_1,
+ LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(rq, cs);
}
@@ -267,10 +335,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
else if (engine->class == COMPUTE_CLASS)
flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
- if (!HAS_FLAT_CCS(rq->engine->i915))
- count = 8 + 4;
- else
- count = 8;
+ count = 8;
+ if (gen12_needs_ccs_aux_inv(rq->engine))
+ count += 8;
cs = intel_ring_begin(rq, count);
if (IS_ERR(cs))
@@ -285,11 +352,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
- if (!HAS_FLAT_CCS(rq->engine->i915)) {
- /* hsdes: 1809175790 */
- cs = gen12_emit_aux_table_inv(rq->engine->gt,
- cs, GEN12_GFX_CCS_AUX_NV);
- }
+ cs = gen12_emit_aux_table_inv(engine, cs);
*cs++ = preparser_disable(false);
intel_ring_advance(rq, cs);
@@ -300,21 +363,14 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
{
- intel_engine_mask_t aux_inv = 0;
- u32 cmd, *cs;
+ u32 cmd = 4;
+ u32 *cs;
- cmd = 4;
if (mode & EMIT_INVALIDATE) {
cmd += 2;
- if (!HAS_FLAT_CCS(rq->engine->i915) &&
- (rq->engine->class == VIDEO_DECODE_CLASS ||
- rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
- aux_inv = rq->engine->mask &
- ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
- if (aux_inv)
- cmd += 4;
- }
+ if (gen12_needs_ccs_aux_inv(rq->engine))
+ cmd += 8;
}
cs = intel_ring_begin(rq, cmd);
@@ -338,6 +394,10 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
cmd |= MI_INVALIDATE_TLB;
if (rq->engine->class == VIDEO_DECODE_CLASS)
cmd |= MI_INVALIDATE_BSD;
+
+ if (gen12_needs_ccs_aux_inv(rq->engine) &&
+ rq->engine->class == COPY_ENGINE_CLASS)
+ cmd |= MI_FLUSH_DW_CCS;
}
*cs++ = cmd;
@@ -345,14 +405,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
*cs++ = 0; /* upper addr */
*cs++ = 0; /* value */
- if (aux_inv) { /* hsdes: 1809175790 */
- if (rq->engine->class == VIDEO_DECODE_CLASS)
- cs = gen12_emit_aux_table_inv(rq->engine->gt,
- cs, GEN12_VD0_AUX_NV);
- else
- cs = gen12_emit_aux_table_inv(rq->engine->gt,
- cs, GEN12_VE0_AUX_NV);
- }
+ cs = gen12_emit_aux_table_inv(rq->engine, cs);
if (mode & EMIT_INVALIDATE)
*cs++ = preparser_disable(false);
@@ -754,7 +807,8 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
{
- struct drm_i915_private *i915 = rq->engine->i915;
+ struct drm_i915_private *i915 = rq->i915;
+ struct intel_gt *gt = rq->engine->gt;
u32 flags = (PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TLB_INVALIDATE |
PIPE_CONTROL_TILE_CACHE_FLUSH |
@@ -765,8 +819,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
PIPE_CONTROL_FLUSH_ENABLE);
/* Wa_14016712196 */
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915))
/* dummy PIPE_CONTROL + depth flush */
cs = gen12_emit_pipe_control(cs, 0,
PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0);
@@ -775,7 +828,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
/* Wa_1409600907 */
flags |= PIPE_CONTROL_DEPTH_STALL;
- if (!HAS_3D_PIPELINE(rq->engine->i915))
+ if (!HAS_3D_PIPELINE(rq->i915))
flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
else if (rq->engine->class == COMPUTE_CLASS)
flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
index 655e5c00ddc2..867ba697aceb 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
@@ -13,6 +13,7 @@
#include "intel_gt_regs.h"
#include "intel_gpu_commands.h"
+struct intel_engine_cs;
struct intel_gt;
struct i915_request;
@@ -46,28 +47,32 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
-u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg);
+u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs);
static inline u32 *
-__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
+__gen8_emit_pipe_control(u32 *batch, u32 bit_group_0,
+ u32 bit_group_1, u32 offset)
{
memset(batch, 0, 6 * sizeof(u32));
- batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0;
- batch[1] = flags1;
+ batch[0] = GFX_OP_PIPE_CONTROL(6) | bit_group_0;
+ batch[1] = bit_group_1;
batch[2] = offset;
return batch + 6;
}
-static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
+static inline u32 *gen8_emit_pipe_control(u32 *batch,
+ u32 bit_group_1, u32 offset)
{
- return __gen8_emit_pipe_control(batch, 0, flags, offset);
+ return __gen8_emit_pipe_control(batch, 0, bit_group_1, offset);
}
-static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
+static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 bit_group_0,
+ u32 bit_group_1, u32 offset)
{
- return __gen8_emit_pipe_control(batch, flags0, flags1, offset);
+ return __gen8_emit_pipe_control(batch, bit_group_0,
+ bit_group_1, offset);
}
static inline u32 *
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index c8568e5d1147..9895e18df043 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -242,9 +242,9 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
len = gen8_pd_range(start, end, lvl--, &idx);
- DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
- __func__, vm, lvl + 1, start, end,
- idx, len, atomic_read(px_used(pd)));
+ GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
+ __func__, vm, lvl + 1, start, end,
+ idx, len, atomic_read(px_used(pd)));
GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
do {
@@ -252,8 +252,8 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
gen8_pd_contains(start, end, lvl)) {
- DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
- __func__, vm, lvl + 1, idx, start, end);
+ GTT_TRACE("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
+ __func__, vm, lvl + 1, idx, start, end);
clear_pd_entry(pd, idx, scratch);
__gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
start += (u64)I915_PDES << gen8_pd_shift(lvl);
@@ -270,10 +270,10 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
u64 *vaddr;
count = gen8_pt_count(start, end);
- DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n",
- __func__, vm, lvl, start, end,
- gen8_pd_index(start, 0), count,
- atomic_read(&pt->used));
+ GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n",
+ __func__, vm, lvl, start, end,
+ gen8_pd_index(start, 0), count,
+ atomic_read(&pt->used));
GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
num_ptes = count;
@@ -325,9 +325,9 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
len = gen8_pd_range(*start, end, lvl--, &idx);
- DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
- __func__, vm, lvl + 1, *start, end,
- idx, len, atomic_read(px_used(pd)));
+ GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
+ __func__, vm, lvl + 1, *start, end,
+ idx, len, atomic_read(px_used(pd)));
GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1));
spin_lock(&pd->lock);
@@ -338,8 +338,8 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
if (!pt) {
spin_unlock(&pd->lock);
- DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
- __func__, vm, lvl + 1, idx);
+ GTT_TRACE("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
+ __func__, vm, lvl + 1, idx);
pt = stash->pt[!!lvl];
__i915_gem_object_pin_pages(pt->base);
@@ -369,10 +369,10 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
} else {
unsigned int count = gen8_pt_count(*start, end);
- DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n",
- __func__, vm, lvl, *start, end,
- gen8_pd_index(*start, 0), count,
- atomic_read(&pt->used));
+ GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n",
+ __func__, vm, lvl, *start, end,
+ gen8_pd_index(*start, 0), count,
+ atomic_read(&pt->used));
atomic_add(count, &pt->used);
/* All other pdes may be simultaneously removed */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 0aff5bb13c53..84a75c95f3f7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -558,7 +558,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
DRIVER_CAPS(i915)->has_logical_contexts = true;
ewma__engine_latency_init(&engine->latency);
- seqcount_init(&engine->stats.execlists.lock);
ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
@@ -1333,6 +1332,7 @@ static int measure_breadcrumb_dw(struct intel_context *ce)
if (!frame)
return -ENOMEM;
+ frame->rq.i915 = engine->i915;
frame->rq.engine = engine;
frame->rq.context = ce;
rcu_assign_pointer(frame->rq.timeline, ce->timeline);
@@ -1616,9 +1616,7 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine,
* Wa_22011802037: Prior to doing a reset, ensure CS is
* stopped, set ring stop bit and prefetch disable bit to halt CS
*/
- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
- (GRAPHICS_VER(engine->i915) >= 11 &&
- GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70)))
+ if (intel_engine_reset_needs_wa_22011802037(engine->gt))
intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base),
_MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE));
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 21af0ec52223..e91fc881dbf1 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -21,7 +21,7 @@ static void intel_gsc_idle_msg_enable(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
- if (IS_METEORLAKE(i915) && engine->id == GSC0) {
+ if (MEDIA_VER(i915) >= 13 && engine->id == GSC0) {
intel_uncore_write(engine->gt->uncore,
RC_PSMI_CTRL_GSCCS,
_MASKED_BIT_DISABLE(IDLE_MSG_DISABLE));
@@ -39,7 +39,7 @@ static void dbg_poison_ce(struct intel_context *ce)
if (ce->state) {
struct drm_i915_gem_object *obj = ce->state->obj;
- int type = i915_coherent_map_type(ce->engine->i915, obj, true);
+ int type = intel_gt_coherent_map_type(ce->engine->gt, obj, true);
void *map;
if (!i915_gem_object_trylock(obj, NULL))
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
index 6b9d9f837669..fdd4ddd3a978 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
@@ -177,6 +177,7 @@
#define CTX_CTRL_RS_CTX_ENABLE REG_BIT(1)
#define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT REG_BIT(2)
#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3)
+#define GEN12_CTX_CTRL_RUNALONE_MODE REG_BIT(7)
#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE REG_BIT(8)
#define RING_CTX_SR_CTL(base) _MMIO((base) + 0x244)
#define RING_SEMA_WAIT_POLL(base) _MMIO((base) + 0x24c)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index e99a6fa03d45..a7e677598004 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -58,6 +58,7 @@ struct i915_perf_group;
typedef u32 intel_engine_mask_t;
#define ALL_ENGINES ((intel_engine_mask_t)~0ul)
+#define VIRTUAL_ENGINES BIT(BITS_PER_TYPE(intel_engine_mask_t) - 1)
struct intel_hw_status_page {
struct list_head timelines;
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 2ebd937f3b4c..e8f42ec6b1b4 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2718,7 +2718,7 @@ static int emit_pdps(struct i915_request *rq)
int err, i;
u32 *cs;
- GEM_BUG_ON(intel_vgpu_active(rq->engine->i915));
+ GEM_BUG_ON(intel_vgpu_active(rq->i915));
/*
* Beware ye of the dragons, this sequence is magic!
@@ -3001,9 +3001,7 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
* Wa_22011802037: In addition to stopping the cs, we need
* to wait for any pending mi force wakeups
*/
- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
- (GRAPHICS_VER(engine->i915) >= 11 &&
- GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70)))
+ if (intel_engine_reset_needs_wa_22011802037(engine->gt))
intel_engine_wait_for_pending_mi_fw(engine);
engine->execlists.reset_ccid = active_ccid(engine);
@@ -3550,22 +3548,24 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
logical_ring_default_vfuncs(engine);
logical_ring_default_irqs(engine);
+ seqcount_init(&engine->stats.execlists.lock);
+
if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)
rcs_submission_override(engine);
lrc_init_wa_ctx(engine);
if (HAS_LOGICAL_RING_ELSQ(i915)) {
- execlists->submit_reg = uncore->regs +
+ execlists->submit_reg = intel_uncore_regs(uncore) +
i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
- execlists->ctrl_reg = uncore->regs +
+ execlists->ctrl_reg = intel_uncore_regs(uncore) +
i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
engine->fw_domain = intel_uncore_forcewake_for_reg(engine->uncore,
RING_EXECLIST_CONTROL(engine->mmio_base),
FW_REG_WRITE);
} else {
- execlists->submit_reg = uncore->regs +
+ execlists->submit_reg = intel_uncore_regs(uncore) +
i915_mmio_reg_offset(RING_ELSP(base));
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index dd0ed941441a..da21f2786b5d 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -511,20 +511,31 @@ void intel_ggtt_unbind_vma(struct i915_address_space *vm,
vm->clear_range(vm, vma_res->start, vma_res->vma_size);
}
+/*
+ * Reserve the top of the GuC address space for firmware images. Addresses
+ * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC,
+ * which makes for a suitable range to hold GuC/HuC firmware images if the
+ * size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT
+ * is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk
+ * of the same size anyway, which is far more than needed, to keep the logic
+ * in uc_fw_ggtt_offset() simple.
+ */
+#define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP)
+
static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
{
- u64 size;
+ u64 offset;
int ret;
if (!intel_uc_uses_guc(&ggtt->vm.gt->uc))
return 0;
- GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
- size = ggtt->vm.total - GUC_GGTT_TOP;
+ GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE);
+ offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE;
- ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, size,
- GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
- PIN_NOEVICT);
+ ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw,
+ GUC_TOP_RESERVE_SIZE, offset,
+ I915_COLOR_UNEVICTABLE, PIN_NOEVICT);
if (ret)
drm_dbg(&ggtt->vm.i915->drm,
"Failed to reserve top of GGTT for GuC\n");
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
index 37d0b0fe791d..40371b8a9bbb 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
@@ -818,7 +818,7 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
if (obj->bit_17 == NULL) {
obj->bit_17 = bitmap_zalloc(page_count, GFP_KERNEL);
if (obj->bit_17 == NULL) {
- drm_err(&to_i915(obj->base.dev)->drm,
+ drm_err(obj->base.dev,
"Failed to allocate memory for bit 17 record\n");
return;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 5d143e2a8db0..2bd8d98d2110 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -121,6 +121,7 @@
#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15)
#define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */
#define MI_SEMAPHORE_WAIT_TOKEN MI_INSTR(0x1c, 3) /* GEN12+ */
+#define MI_SEMAPHORE_REGISTER_POLL (1 << 16)
#define MI_SEMAPHORE_POLL (1 << 15)
#define MI_SEMAPHORE_SAD_GT_SDD (0 << 12)
#define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12)
@@ -299,6 +300,7 @@
#define PIPE_CONTROL_QW_WRITE (1<<14)
#define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14)
#define PIPE_CONTROL_DEPTH_STALL (1<<13)
+#define PIPE_CONTROL_CCS_FLUSH (1<<13) /* MTL+ */
#define PIPE_CONTROL_WRITE_FLUSH (1<<12)
#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */
#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 7a008e829d4d..93062c35e072 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -33,6 +33,7 @@
#include "intel_rps.h"
#include "intel_sa_media.h"
#include "intel_gt_sysfs.h"
+#include "intel_tlb.h"
#include "intel_uncore.h"
#include "shmem_utils.h"
@@ -50,8 +51,7 @@ void intel_gt_common_init_early(struct intel_gt *gt)
intel_gt_init_reset(gt);
intel_gt_init_requests(gt);
intel_gt_init_timelines(gt);
- mutex_init(&gt->tlb.invalidate_lock);
- seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
+ intel_gt_init_tlb(gt);
intel_gt_pm_init_early(gt);
intel_wopcm_init_early(&gt->wopcm);
@@ -179,7 +179,7 @@ int intel_gt_init_hw(struct intel_gt *gt)
if (IS_HASWELL(i915))
intel_uncore_write(uncore,
HSW_MI_PREDICATE_RESULT_2,
- IS_HSW_GT3(i915) ?
+ IS_HASWELL_GT3(i915) ?
LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
/* Apply the GT workarounds... */
@@ -466,7 +466,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
obj = i915_gem_object_create_lmem(i915, size,
I915_BO_ALLOC_VOLATILE |
I915_BO_ALLOC_GPU_ONLY);
- if (IS_ERR(obj))
+ if (IS_ERR(obj) && !IS_METEORLAKE(i915)) /* Wa_22018444074 */
obj = i915_gem_object_create_stolen(i915, size);
if (IS_ERR(obj))
obj = i915_gem_object_create_internal(i915, size);
@@ -846,7 +846,7 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915)
intel_gt_fini_requests(gt);
intel_gt_fini_reset(gt);
intel_gt_fini_timelines(gt);
- mutex_destroy(&gt->tlb.invalidate_lock);
+ intel_gt_fini_tlb(gt);
intel_engines_free(gt);
}
}
@@ -887,7 +887,7 @@ static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr)
int intel_gt_probe_all(struct drm_i915_private *i915)
{
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
- struct intel_gt *gt = &i915->gt0;
+ struct intel_gt *gt = to_gt(i915);
const struct intel_gt_definition *gtdef;
phys_addr_t phys_addr;
unsigned int mmio_bar;
@@ -904,7 +904,7 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
*/
gt->i915 = i915;
gt->name = "Primary GT";
- gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask;
+ gt->info.engine_mask = INTEL_INFO(i915)->platform_engine_mask;
gt_dbg(gt, "Setting up %s\n", gt->name);
ret = intel_gt_tile_setup(gt, phys_addr);
@@ -1004,136 +1004,23 @@ void intel_gt_info_print(const struct intel_gt_info *info,
intel_sseu_dump(&info->sseu, p);
}
-/*
- * HW architecture suggest typical invalidation time at 40us,
- * with pessimistic cases up to 100us and a recommendation to
- * cap at 1ms. We go a bit higher just in case.
- */
-#define TLB_INVAL_TIMEOUT_US 100
-#define TLB_INVAL_TIMEOUT_MS 4
-
-/*
- * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
- * but are now considered MCR registers. Since they exist within a GAM range,
- * the primary instance of the register rolls up the status from each unit.
- */
-static int wait_for_invalidate(struct intel_engine_cs *engine)
-{
- if (engine->tlb_inv.mcr)
- return intel_gt_mcr_wait_for_reg(engine->gt,
- engine->tlb_inv.reg.mcr_reg,
- engine->tlb_inv.done,
- 0,
- TLB_INVAL_TIMEOUT_US,
- TLB_INVAL_TIMEOUT_MS);
- else
- return __intel_wait_for_register_fw(engine->gt->uncore,
- engine->tlb_inv.reg.reg,
- engine->tlb_inv.done,
- 0,
- TLB_INVAL_TIMEOUT_US,
- TLB_INVAL_TIMEOUT_MS,
- NULL);
-}
-
-static void mmio_invalidate_full(struct intel_gt *gt)
+enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt,
+ struct drm_i915_gem_object *obj,
+ bool always_coherent)
{
- struct drm_i915_private *i915 = gt->i915;
- struct intel_uncore *uncore = gt->uncore;
- struct intel_engine_cs *engine;
- intel_engine_mask_t awake, tmp;
- enum intel_engine_id id;
- unsigned long flags;
-
- if (GRAPHICS_VER(i915) < 8)
- return;
-
- intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
-
- intel_gt_mcr_lock(gt, &flags);
- spin_lock(&uncore->lock); /* serialise invalidate with GT reset */
-
- awake = 0;
- for_each_engine(engine, gt, id) {
- if (!intel_engine_pm_is_awake(engine))
- continue;
-
- if (engine->tlb_inv.mcr)
- intel_gt_mcr_multicast_write_fw(gt,
- engine->tlb_inv.reg.mcr_reg,
- engine->tlb_inv.request);
- else
- intel_uncore_write_fw(uncore,
- engine->tlb_inv.reg.reg,
- engine->tlb_inv.request);
-
- awake |= engine->mask;
- }
-
- GT_TRACE(gt, "invalidated engines %08x\n", awake);
-
- /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
- if (awake &&
- (IS_TIGERLAKE(i915) ||
- IS_DG1(i915) ||
- IS_ROCKETLAKE(i915) ||
- IS_ALDERLAKE_S(i915) ||
- IS_ALDERLAKE_P(i915)))
- intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
-
- spin_unlock(&uncore->lock);
- intel_gt_mcr_unlock(gt, flags);
-
- for_each_engine_masked(engine, gt, awake, tmp) {
- if (wait_for_invalidate(engine))
- gt_err_ratelimited(gt,
- "%s TLB invalidation did not complete in %ums!\n",
- engine->name, TLB_INVAL_TIMEOUT_MS);
- }
-
/*
- * Use delayed put since a) we mostly expect a flurry of TLB
- * invalidations so it is good to avoid paying the forcewake cost and
- * b) it works around a bug in Icelake which cannot cope with too rapid
- * transitions.
+ * Wa_22016122933: always return I915_MAP_WC for Media
+ * version 13.0 when the object is on the Media GT
*/
- intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
+ if (i915_gem_object_is_lmem(obj) || intel_gt_needs_wa_22016122933(gt))
+ return I915_MAP_WC;
+ if (HAS_LLC(gt->i915) || always_coherent)
+ return I915_MAP_WB;
+ else
+ return I915_MAP_WC;
}
-static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
+bool intel_gt_needs_wa_22016122933(struct intel_gt *gt)
{
- u32 cur = intel_gt_tlb_seqno(gt);
-
- /* Only skip if a *full* TLB invalidate barrier has passed */
- return (s32)(cur - ALIGN(seqno, 2)) > 0;
+ return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == GT_MEDIA;
}
-
-void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno)
-{
- intel_wakeref_t wakeref;
-
- if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
- return;
-
- if (intel_gt_is_wedged(gt))
- return;
-
- if (tlb_seqno_passed(gt, seqno))
- return;
-
- with_intel_gt_pm_if_awake(gt, wakeref) {
- mutex_lock(&gt->tlb.invalidate_lock);
- if (tlb_seqno_passed(gt, seqno))
- goto unlock;
-
- mmio_invalidate_full(gt);
-
- write_seqcount_invalidate(&gt->tlb.seqno);
-unlock:
- mutex_unlock(&gt->tlb.invalidate_lock);
- }
-}
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftest_tlb.c"
-#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index d2f4fbde5f9f..2cac499d5aa3 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -13,6 +13,69 @@
struct drm_i915_private;
struct drm_printer;
+/*
+ * Check that the GT is a graphics GT and has an IP version within the
+ * specified range (inclusive).
+ */
+#define IS_GFX_GT_IP_RANGE(gt, from, until) ( \
+ BUILD_BUG_ON_ZERO((from) < IP_VER(2, 0)) + \
+ BUILD_BUG_ON_ZERO((until) < (from)) + \
+ ((gt)->type != GT_MEDIA && \
+ GRAPHICS_VER_FULL((gt)->i915) >= (from) && \
+ GRAPHICS_VER_FULL((gt)->i915) <= (until)))
+
+/*
+ * Check that the GT is a media GT and has an IP version within the
+ * specified range (inclusive).
+ *
+ * Only usable on platforms with a standalone media design (i.e., IP version 13
+ * and higher).
+ */
+#define IS_MEDIA_GT_IP_RANGE(gt, from, until) ( \
+ BUILD_BUG_ON_ZERO((from) < IP_VER(13, 0)) + \
+ BUILD_BUG_ON_ZERO((until) < (from)) + \
+ ((gt) && (gt)->type == GT_MEDIA && \
+ MEDIA_VER_FULL((gt)->i915) >= (from) && \
+ MEDIA_VER_FULL((gt)->i915) <= (until)))
+
+/*
+ * Check that the GT is a graphics GT with a specific IP version and has
+ * a stepping in the range [from, until). The lower stepping bound is
+ * inclusive, the upper bound is exclusive. The most common use-case of this
+ * macro is for checking bounds for workarounds, which usually have a stepping
+ * ("from") at which the hardware issue is first present and another stepping
+ * ("until") at which a hardware fix is present and the software workaround is
+ * no longer necessary. E.g.,
+ *
+ * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0)
+ * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B1, STEP_FOREVER)
+ *
+ * "STEP_FOREVER" can be passed as "until" for workarounds that have no upper
+ * stepping bound for the specified IP version.
+ */
+#define IS_GFX_GT_IP_STEP(gt, ipver, from, until) ( \
+ BUILD_BUG_ON_ZERO((until) <= (from)) + \
+ (IS_GFX_GT_IP_RANGE((gt), (ipver), (ipver)) && \
+ IS_GRAPHICS_STEP((gt)->i915, (from), (until))))
+
+/*
+ * Check that the GT is a media GT with a specific IP version and has
+ * a stepping in the range [from, until). The lower stepping bound is
+ * inclusive, the upper bound is exclusive. The most common use-case of this
+ * macro is for checking bounds for workarounds, which usually have a stepping
+ * ("from") at which the hardware issue is first present and another stepping
+ * ("until") at which a hardware fix is present and the software workaround is
+ * no longer necessary. "STEP_FOREVER" can be passed as "until" for
+ * workarounds that have no upper stepping bound for the specified IP version.
+ *
+ * This macro may only be used to match on platforms that have a standalone
+ * media design (i.e., media version 13 or higher).
+ */
+#define IS_MEDIA_GT_IP_STEP(gt, ipver, from, until) ( \
+ BUILD_BUG_ON_ZERO((until) <= (from)) + \
+ (IS_MEDIA_GT_IP_RANGE((gt), (ipver), (ipver)) && \
+ IS_MEDIA_STEP((gt)->i915, (from), (until))))
+
#define GT_TRACE(gt, fmt, ...) do { \
const struct intel_gt *gt__ __maybe_unused = (gt); \
GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \
@@ -24,6 +87,8 @@ static inline bool gt_is_root(struct intel_gt *gt)
return !gt->info.id;
}
+bool intel_gt_needs_wa_22016122933(struct intel_gt *gt);
+
static inline struct intel_gt *uc_to_gt(struct intel_uc *uc)
{
return container_of(uc, struct intel_gt, uc);
@@ -107,16 +172,8 @@ void intel_gt_info_print(const struct intel_gt_info *info,
void intel_gt_watchdog_work(struct work_struct *work);
-static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt)
-{
- return seqprop_sequence(&gt->tlb.seqno);
-}
-
-static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt)
-{
- return intel_gt_tlb_seqno(gt) | 1;
-}
-
-void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno);
+enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt,
+ struct drm_i915_gem_object *obj,
+ bool always_coherent);
#endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_defines.h b/drivers/gpu/drm/i915/gt/intel_gt_defines.h
new file mode 100644
index 000000000000..5017788bac8f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_defines.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_DEFINES__
+#define __INTEL_GT_DEFINES__
+
+#define I915_MAX_GT 2
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index 62fd00c9e519..77fb57223465 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -31,7 +31,7 @@ static u32
gen11_gt_engine_identity(struct intel_gt *gt,
const unsigned int bank, const unsigned int bit)
{
- void __iomem * const regs = gt->uncore->regs;
+ void __iomem * const regs = intel_uncore_regs(gt->uncore);
u32 timeout_ts;
u32 ident;
@@ -148,7 +148,7 @@ gen11_gt_identity_handler(struct intel_gt *gt, const u32 identity)
static void
gen11_gt_bank_handler(struct intel_gt *gt, const unsigned int bank)
{
- void __iomem * const regs = gt->uncore->regs;
+ void __iomem * const regs = intel_uncore_regs(gt->uncore);
unsigned long intr_dw;
unsigned int bit;
@@ -183,7 +183,7 @@ void gen11_gt_irq_handler(struct intel_gt *gt, const u32 master_ctl)
bool gen11_gt_reset_one_iir(struct intel_gt *gt,
const unsigned int bank, const unsigned int bit)
{
- void __iomem * const regs = gt->uncore->regs;
+ void __iomem * const regs = intel_uncore_regs(gt->uncore);
u32 dw;
lockdep_assert_held(gt->irq_lock);
@@ -404,7 +404,7 @@ void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
{
- void __iomem * const regs = gt->uncore->regs;
+ void __iomem * const regs = intel_uncore_regs(gt->uncore);
u32 iir;
if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index 0b414eae1683..bf4a933de03a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -4,7 +4,7 @@
*/
#include "i915_drv.h"
-
+#include "intel_gt.h"
#include "intel_gt_mcr.h"
#include "intel_gt_print.h"
#include "intel_gt_regs.h"
@@ -166,8 +166,8 @@ void intel_gt_mcr_init(struct intel_gt *gt)
gt->steering_table[OADDRM] = xelpmp_oaddrm_steering_table;
} else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
/* Wa_14016747170 */
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
fuse = REG_FIELD_GET(MTL_GT_L3_EXC_MASK,
intel_uncore_read(gt->uncore,
MTL_GT_ACTIVITY_FACTOR));
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 357e2f865727..f900cc68d6d9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -290,7 +290,6 @@ static int mtl_drpc(struct seq_file *m)
seq_puts(m, "RC6\n");
break;
default:
- MISSING_CASE(REG_FIELD_GET(MTL_CC_MASK, gt_core_status));
seq_puts(m, "Unknown\n");
break;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 718cb2c80f79..cca4bac8f8b0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -26,7 +26,7 @@
#define MTL_CAGF_MASK REG_GENMASK(8, 0)
#define MTL_CC0 0x0
#define MTL_CC6 0x3
-#define MTL_CC_MASK REG_GENMASK(12, 9)
+#define MTL_CC_MASK REG_GENMASK(10, 9)
/* RPM unit config (Gen8+) */
#define RPM_CONFIG0 _MMIO(0xd00)
@@ -164,6 +164,8 @@
#define GEN9_CSFE_CHICKEN1_RCS _MMIO(0x20d4)
#define GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE (1 << 2)
#define GEN11_ENABLE_32_PLANE_MODE (1 << 7)
+#define GEN12_CS_DEBUG_MODE2 _MMIO(0x20d8)
+#define INSTRUCTION_STATE_CACHE_INVALIDATE REG_BIT(6)
#define GEN7_FF_SLICE_CS_CHICKEN1 _MMIO(0x20e0)
#define GEN9_FFSC_PERCTX_PREEMPT_CTRL (1 << 14)
@@ -332,9 +334,11 @@
#define GEN8_PRIVATE_PAT_HI _MMIO(0x40e0 + 4)
#define GEN10_PAT_INDEX(index) _MMIO(0x40e0 + (index) * 4)
#define BSD_HWS_PGA_GEN7 _MMIO(0x4180)
-#define GEN12_GFX_CCS_AUX_NV _MMIO(0x4208)
-#define GEN12_VD0_AUX_NV _MMIO(0x4218)
-#define GEN12_VD1_AUX_NV _MMIO(0x4228)
+
+#define GEN12_CCS_AUX_INV _MMIO(0x4208)
+#define GEN12_VD0_AUX_INV _MMIO(0x4218)
+#define GEN12_VE0_AUX_INV _MMIO(0x4238)
+#define GEN12_BCS0_AUX_INV _MMIO(0x4248)
#define GEN8_RTCR _MMIO(0x4260)
#define GEN8_M1TCR _MMIO(0x4264)
@@ -342,14 +346,12 @@
#define GEN8_BTCR _MMIO(0x426c)
#define GEN8_VTCR _MMIO(0x4270)
-#define GEN12_VD2_AUX_NV _MMIO(0x4298)
-#define GEN12_VD3_AUX_NV _MMIO(0x42a8)
-#define GEN12_VE0_AUX_NV _MMIO(0x4238)
-
#define BLT_HWS_PGA_GEN7 _MMIO(0x4280)
-#define GEN12_VE1_AUX_NV _MMIO(0x42b8)
+#define GEN12_VD2_AUX_INV _MMIO(0x4298)
+#define GEN12_CCS0_AUX_INV _MMIO(0x42c8)
#define AUX_INV REG_BIT(0)
+
#define VEBOX_HWS_PGA_GEN7 _MMIO(0x4380)
#define GEN12_AUX_ERR_DBG _MMIO(0x43f4)
@@ -412,9 +414,6 @@
#define XEHP_CULLBIT1 MCR_REG(0x6100)
-#define CHICKEN_RASTER_1 MCR_REG(0x6204)
-#define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8)
-
#define CHICKEN_RASTER_2 MCR_REG(0x6208)
#define TBIMR_FAST_CLIP REG_BIT(5)
@@ -1221,6 +1220,8 @@
#define XEHP_HDC_CHICKEN0 MCR_REG(0xe5f0)
#define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11)
+#define DIS_ATOMIC_CHAINING_TYPED_WRITES REG_BIT(3)
+
#define ICL_HDC_MODE MCR_REG(0xe5f4)
#define EU_PERF_CNTL2 PERF_REG(0xe658)
@@ -1231,6 +1232,7 @@
#define DISABLE_D8_D16_COASLESCE REG_BIT(30)
#define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15)
#define LSC_CHICKEN_BIT_0_UDW MCR_REG(0xe7c8 + 4)
+#define UGM_FRAGMENT_THRESHOLD_TO_3 REG_BIT(58 - 32)
#define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32)
#define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32)
#define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
index ee2b44f896a2..f0dea54880af 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
@@ -701,6 +701,80 @@ static const struct attribute *media_perf_power_attrs[] = {
};
static ssize_t
+rps_up_threshold_pct_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
+ struct intel_rps *rps = &gt->rps;
+
+ return sysfs_emit(buf, "%u\n", intel_rps_get_up_threshold(rps));
+}
+
+static ssize_t
+rps_up_threshold_pct_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
+ struct intel_rps *rps = &gt->rps;
+ int ret;
+ u8 val;
+
+ ret = kstrtou8(buf, 10, &val);
+ if (ret)
+ return ret;
+
+ ret = intel_rps_set_up_threshold(rps, val);
+
+ return ret == 0 ? count : ret;
+}
+
+static struct kobj_attribute rps_up_threshold_pct =
+ __ATTR(rps_up_threshold_pct,
+ 0664,
+ rps_up_threshold_pct_show,
+ rps_up_threshold_pct_store);
+
+static ssize_t
+rps_down_threshold_pct_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
+ struct intel_rps *rps = &gt->rps;
+
+ return sysfs_emit(buf, "%u\n", intel_rps_get_down_threshold(rps));
+}
+
+static ssize_t
+rps_down_threshold_pct_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
+ struct intel_rps *rps = &gt->rps;
+ int ret;
+ u8 val;
+
+ ret = kstrtou8(buf, 10, &val);
+ if (ret)
+ return ret;
+
+ ret = intel_rps_set_down_threshold(rps, val);
+
+ return ret == 0 ? count : ret;
+}
+
+static struct kobj_attribute rps_down_threshold_pct =
+ __ATTR(rps_down_threshold_pct,
+ 0664,
+ rps_down_threshold_pct_show,
+ rps_down_threshold_pct_store);
+
+static const struct attribute * const gen6_gt_rps_attrs[] = {
+ &rps_up_threshold_pct.attr,
+ &rps_down_threshold_pct.attr,
+ NULL
+};
+
+static ssize_t
default_min_freq_mhz_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
struct intel_gt *gt = kobj_to_gt(kobj->parent);
@@ -722,9 +796,37 @@ default_max_freq_mhz_show(struct kobject *kobj, struct kobj_attribute *attr, cha
static struct kobj_attribute default_max_freq_mhz =
__ATTR(rps_max_freq_mhz, 0444, default_max_freq_mhz_show, NULL);
+static ssize_t
+default_rps_up_threshold_pct_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ struct intel_gt *gt = kobj_to_gt(kobj->parent);
+
+ return sysfs_emit(buf, "%u\n", gt->defaults.rps_up_threshold);
+}
+
+static struct kobj_attribute default_rps_up_threshold_pct =
+__ATTR(rps_up_threshold_pct, 0444, default_rps_up_threshold_pct_show, NULL);
+
+static ssize_t
+default_rps_down_threshold_pct_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ struct intel_gt *gt = kobj_to_gt(kobj->parent);
+
+ return sysfs_emit(buf, "%u\n", gt->defaults.rps_down_threshold);
+}
+
+static struct kobj_attribute default_rps_down_threshold_pct =
+__ATTR(rps_down_threshold_pct, 0444, default_rps_down_threshold_pct_show, NULL);
+
static const struct attribute * const rps_defaults_attrs[] = {
&default_min_freq_mhz.attr,
&default_max_freq_mhz.attr,
+ &default_rps_up_threshold_pct.attr,
+ &default_rps_down_threshold_pct.attr,
NULL
};
@@ -752,6 +854,12 @@ static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj)
if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915))
ret = sysfs_create_file(kobj, vlv_attr);
+ if (is_object_gt(kobj) && !intel_uc_uses_guc_slpc(&gt->uc)) {
+ ret = sysfs_create_files(kobj, gen6_gt_rps_attrs);
+ if (ret)
+ return ret;
+ }
+
return ret;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index f08c2556aa25..def7dd0eb6f1 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -83,6 +83,9 @@ enum intel_submission_method {
struct gt_defaults {
u32 min_freq;
u32 max_freq;
+
+ u8 rps_up_threshold;
+ u8 rps_down_threshold;
};
enum intel_gt_type {
@@ -306,4 +309,6 @@ enum intel_gt_scratch_field {
INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256,
};
+#define intel_gt_support_legacy_fencing(gt) ((gt)->ggtt->num_fences > 0)
+
#endif /* __INTEL_GT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 731d9f2bbc56..13944a14ea2d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -89,7 +89,7 @@ int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
enum i915_map_type type;
void *vaddr;
- type = i915_coherent_map_type(vm->i915, obj, true);
+ type = intel_gt_coherent_map_type(vm->gt, obj, true);
vaddr = i915_gem_object_pin_map_unlocked(obj, type);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
@@ -103,7 +103,7 @@ int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object
enum i915_map_type type;
void *vaddr;
- type = i915_coherent_map_type(vm->i915, obj, true);
+ type = intel_gt_coherent_map_type(vm->gt, obj, true);
vaddr = i915_gem_object_pin_map(obj, type);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 4d6296cdbcfd..346ec8ec2edd 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -35,9 +35,9 @@
#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT)
-#define DBG(...) trace_printk(__VA_ARGS__)
+#define GTT_TRACE(...) trace_printk(__VA_ARGS__)
#else
-#define DBG(...)
+#define GTT_TRACE(...)
#endif
#define NALLOC 3 /* 1 normal, 1 for concurrent threads, 1 for preallocation */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index a4ec20aaafe2..eaf66d903166 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -845,6 +845,27 @@ lrc_setup_indirect_ctx(u32 *regs,
lrc_ring_indirect_offset_default(engine) << 6;
}
+static bool ctx_needs_runalone(const struct intel_context *ce)
+{
+ struct i915_gem_context *gem_ctx;
+ bool ctx_is_protected = false;
+
+ /*
+ * On MTL and newer platforms, protected contexts require setting
+ * the LRC run-alone bit or else the encryption will not happen.
+ */
+ if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 70) &&
+ (ce->engine->class == COMPUTE_CLASS || ce->engine->class == RENDER_CLASS)) {
+ rcu_read_lock();
+ gem_ctx = rcu_dereference(ce->gem_context);
+ if (gem_ctx)
+ ctx_is_protected = gem_ctx->uses_protected_content;
+ rcu_read_unlock();
+ }
+
+ return ctx_is_protected;
+}
+
static void init_common_regs(u32 * const regs,
const struct intel_context *ce,
const struct intel_engine_cs *engine,
@@ -860,6 +881,8 @@ static void init_common_regs(u32 * const regs,
if (GRAPHICS_VER(engine->i915) < 11)
ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
CTX_CTRL_RS_CTX_ENABLE);
+ if (ctx_needs_runalone(ce))
+ ctl |= _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_RUNALONE_MODE);
regs[CTX_CONTEXT_CONTROL] = ctl;
regs[CTX_TIMESTAMP] = ce->stats.runtime.last;
@@ -1092,10 +1115,19 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
obj = i915_gem_object_create_lmem(engine->i915, context_size,
I915_BO_ALLOC_PM_VOLATILE);
- if (IS_ERR(obj))
+ if (IS_ERR(obj)) {
obj = i915_gem_object_create_shmem(engine->i915, context_size);
- if (IS_ERR(obj))
- return ERR_CAST(obj);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ /*
+ * Wa_22016122933: For Media version 13.0, all Media GT shared
+ * memory needs to be mapped as WC on CPU side and UC (PAT
+ * index 2) on GPU side.
+ */
+ if (intel_gt_needs_wa_22016122933(engine->gt))
+ i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
+ }
vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
@@ -1184,9 +1216,9 @@ lrc_pre_pin(struct intel_context *ce,
GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
*vaddr = i915_gem_object_pin_map(ce->state->obj,
- i915_coherent_map_type(ce->engine->i915,
- ce->state->obj,
- false) |
+ intel_gt_coherent_map_type(ce->engine->gt,
+ ce->state->obj,
+ false) |
I915_MAP_OVERRIDE);
return PTR_ERR_OR_ZERO(*vaddr);
@@ -1308,29 +1340,6 @@ gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
}
/*
- * On DG2 during context restore of a preempted context in GPGPU mode,
- * RCS restore hang is detected. This is extremely timing dependent.
- * To address this below sw wabb is implemented for DG2 A steppings.
- */
-static u32 *
-dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs)
-{
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG(ce->engine->mmio_base));
- *cs++ = 0x21;
-
- *cs++ = MI_LOAD_REGISTER_REG;
- *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
- *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT1);
-
- *cs++ = MI_LOAD_REGISTER_REG;
- *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
- *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT2);
-
- return cs;
-}
-
-/*
* The bspec's tuning guide asks us to program a vertical watermark value of
* 0x3FF. However this register is not saved/restored properly by the
* hardware, so we're required to apply the desired value via INDIRECT_CTX
@@ -1348,30 +1357,34 @@ dg2_emit_draw_watermark_setting(u32 *cs)
}
static u32 *
+gen12_invalidate_state_cache(u32 *cs)
+{
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(GEN12_CS_DEBUG_MODE2);
+ *cs++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE);
+ return cs;
+}
+
+static u32 *
gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
{
cs = gen12_emit_timestamp_wa(ce, cs);
cs = gen12_emit_cmd_buf_wa(ce, cs);
cs = gen12_emit_restore_scratch(ce, cs);
- /* Wa_22011450934:dg2 */
- if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(ce->engine->i915, G11, STEP_A0, STEP_B0))
- cs = dg2_emit_rcs_hang_wabb(ce, cs);
-
/* Wa_16013000631:dg2 */
- if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
- IS_DG2_G11(ce->engine->i915))
+ if (IS_DG2_G11(ce->engine->i915))
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
- /* hsdes: 1809175790 */
- if (!HAS_FLAT_CCS(ce->engine->i915))
- cs = gen12_emit_aux_table_inv(ce->engine->gt,
- cs, GEN12_GFX_CCS_AUX_NV);
+ cs = gen12_emit_aux_table_inv(ce->engine, cs);
+
+ /* Wa_18022495364 */
+ if (IS_GFX_GT_IP_RANGE(ce->engine->gt, IP_VER(12, 0), IP_VER(12, 10)))
+ cs = gen12_invalidate_state_cache(cs);
/* Wa_16014892111 */
- if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(ce->engine->i915, P, STEP_A0, STEP_B0) ||
+ if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
IS_DG2(ce->engine->i915))
cs = dg2_emit_draw_watermark_setting(cs);
@@ -1385,24 +1398,13 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
cs = gen12_emit_restore_scratch(ce, cs);
/* Wa_16013000631:dg2 */
- if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
- IS_DG2_G11(ce->engine->i915))
+ if (IS_DG2_G11(ce->engine->i915))
if (ce->engine->class == COMPUTE_CLASS)
cs = gen8_emit_pipe_control(cs,
PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
0);
- /* hsdes: 1809175790 */
- if (!HAS_FLAT_CCS(ce->engine->i915)) {
- if (ce->engine->class == VIDEO_DECODE_CLASS)
- cs = gen12_emit_aux_table_inv(ce->engine->gt,
- cs, GEN12_VD0_AUX_NV);
- else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS)
- cs = gen12_emit_aux_table_inv(ce->engine->gt,
- cs, GEN12_VE0_AUX_NV);
- }
-
- return cs;
+ return gen12_emit_aux_table_inv(ce->engine, cs);
}
static void
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 6023288b0e2d..576e5ef0289b 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -366,7 +366,7 @@ static int emit_pte(struct i915_request *rq,
u64 offset,
int length)
{
- bool has_64K_pages = HAS_64K_PAGES(rq->engine->i915);
+ bool has_64K_pages = HAS_64K_PAGES(rq->i915);
const u64 encode = rq->context->vm->pte_encode(0, pat_index,
is_lmem ? PTE_LM : 0);
struct intel_ring *ring = rq->ring;
@@ -375,7 +375,7 @@ static int emit_pte(struct i915_request *rq,
u32 page_size;
u32 *hdr, *cs;
- GEM_BUG_ON(GRAPHICS_VER(rq->engine->i915) < 8);
+ GEM_BUG_ON(GRAPHICS_VER(rq->i915) < 8);
page_size = I915_GTT_PAGE_SIZE;
dword_length = 0x400;
@@ -531,7 +531,7 @@ static int emit_copy_ccs(struct i915_request *rq,
u32 dst_offset, u8 dst_access,
u32 src_offset, u8 src_access, int size)
{
- struct drm_i915_private *i915 = rq->engine->i915;
+ struct drm_i915_private *i915 = rq->i915;
int mocs = rq->engine->gt->mocs.uc_index << 1;
u32 num_ccs_blks;
u32 *cs;
@@ -581,7 +581,7 @@ static int emit_copy_ccs(struct i915_request *rq,
static int emit_copy(struct i915_request *rq,
u32 dst_offset, u32 src_offset, int size)
{
- const int ver = GRAPHICS_VER(rq->engine->i915);
+ const int ver = GRAPHICS_VER(rq->i915);
u32 instance = rq->engine->instance;
u32 *cs;
@@ -917,7 +917,7 @@ out_ce:
static int emit_clear(struct i915_request *rq, u32 offset, int size,
u32 value, bool is_lmem)
{
- struct drm_i915_private *i915 = rq->engine->i915;
+ struct drm_i915_private *i915 = rq->i915;
int mocs = rq->engine->gt->mocs.uc_index << 1;
const int ver = GRAPHICS_VER(i915);
int ring_sz;
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 2c014407225c..07269ff3be13 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -404,18 +404,6 @@ static const struct drm_i915_mocs_entry dg2_mocs_table[] = {
MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
};
-static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = {
- /* Wa_14011441408: Set Go to Memory for MOCS#0 */
- MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
- /* UC - Coherent; GO:Memory */
- MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
- /* UC - Non-Coherent; GO:Memory */
- MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
-
- /* WB - LC */
- MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
-};
-
static const struct drm_i915_mocs_entry pvc_mocs_table[] = {
/* Error */
MOCS_ENTRY(0, 0, L3_3_WB),
@@ -507,7 +495,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
memset(table, 0, sizeof(struct drm_i915_mocs_table));
table->unused_entries_index = I915_MOCS_PTE;
- if (IS_METEORLAKE(i915)) {
+ if (IS_GFX_GT_IP_RANGE(&i915->gt0, IP_VER(12, 70), IP_VER(12, 71))) {
table->size = ARRAY_SIZE(mtl_mocs_table);
table->table = mtl_mocs_table;
table->n_entries = MTL_NUM_MOCS_ENTRIES;
@@ -521,13 +509,8 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
table->wb_index = 2;
table->unused_entries_index = 2;
} else if (IS_DG2(i915)) {
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
- table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax);
- table->table = dg2_mocs_table_g10_ax;
- } else {
- table->size = ARRAY_SIZE(dg2_mocs_table);
- table->table = dg2_mocs_table;
- }
+ table->size = ARRAY_SIZE(dg2_mocs_table);
+ table->table = dg2_mocs_table;
table->uc_index = 1;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
table->unused_entries_index = 3;
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index 436756bfbb1a..d07a4f97b943 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -8,6 +8,7 @@
#include "gem/i915_gem_lmem.h"
#include "i915_trace.h"
+#include "intel_gt.h"
#include "intel_gtt.h"
#include "gen6_ppgtt.h"
#include "gen8_ppgtt.h"
@@ -210,8 +211,7 @@ void ppgtt_unbind_vma(struct i915_address_space *vm,
return;
vm->clear_range(vm, vma_res->start, vma_res->vma_size);
- if (vma_res->tlb)
- vma_invalidate_tlb(vm, vma_res->tlb);
+ vma_invalidate_tlb(vm, vma_res->tlb);
}
static unsigned long pd_count(u64 size, int shift)
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 58bb1c55294c..8b67abd720be 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -118,14 +118,12 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
GEN6_RC_CTL_EI_MODE(1);
/*
- * Wa_16011777198 and BSpec 52698 - Render powergating must be off.
+ * BSpec 52698 - Render powergating must be off.
* FIXME BSpec is outdated, disabling powergating for MTL is just
* temporary wa and should be removed after fixing real cause
* of forcewake timeouts.
*/
- if (IS_METEORLAKE(gt->i915) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)))
pg_enable =
GEN9_MEDIA_PG_ENABLE |
GEN11_MEDIA_SAMPLER_PG_ENABLE;
@@ -526,8 +524,7 @@ static bool rc6_supported(struct intel_rc6 *rc6)
return false;
}
- if (IS_MTL_MEDIA_STEP(gt->i915, STEP_A0, STEP_B0) &&
- gt->type == GT_MEDIA) {
+ if (IS_MEDIA_GT_IP_STEP(gt, IP_VER(13, 0), STEP_A0, STEP_B0)) {
drm_notice(&i915->drm,
"Media RC6 disabled on A step\n");
return false;
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index 2a3217e2890f..f8512aee58a8 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -220,7 +220,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
resource_size_t lmem_range;
u64 tile_stolen, flat_ccs_base;
- lmem_range = intel_gt_mcr_read_any(&i915->gt0, XEHP_TILE0_ADDR_RANGE) & 0xFFFF;
+ lmem_range = intel_gt_mcr_read_any(to_gt(i915), XEHP_TILE0_ADDR_RANGE) & 0xFFFF;
lmem_size = lmem_range >> XEHP_TILE_LMEM_RANGE_SHIFT;
lmem_size *= SZ_1G;
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index e2152f75ba2e..a21e939fdbf6 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -35,9 +35,6 @@
#define RESET_MAX_RETRIES 3
-/* XXX How to handle concurrent GGTT updates using tiling registers? */
-#define RESET_UNDER_STOP_MACHINE 0
-
static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
{
struct drm_i915_file_private *file_priv = ctx->file_priv;
@@ -164,16 +161,16 @@ static int i915_do_reset(struct intel_gt *gt,
struct pci_dev *pdev = to_pci_dev(gt->i915->drm.dev);
int err;
- /* Assert reset for at least 20 usec, and wait for acknowledgement. */
+ /* Assert reset for at least 50 usec, and wait for acknowledgement. */
pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
udelay(50);
- err = wait_for_atomic(i915_in_reset(pdev), 50);
+ err = _wait_for_atomic(i915_in_reset(pdev), 50000, 0);
/* Clear the reset request. */
pci_write_config_byte(pdev, I915_GDRST, 0);
udelay(50);
if (!err)
- err = wait_for_atomic(!i915_in_reset(pdev), 50);
+ err = _wait_for_atomic(!i915_in_reset(pdev), 50000, 0);
return err;
}
@@ -193,7 +190,7 @@ static int g33_do_reset(struct intel_gt *gt,
struct pci_dev *pdev = to_pci_dev(gt->i915->drm.dev);
pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
- return wait_for_atomic(g4x_reset_complete(pdev), 50);
+ return _wait_for_atomic(g4x_reset_complete(pdev), 50000, 0);
}
static int g4x_do_reset(struct intel_gt *gt,
@@ -210,7 +207,7 @@ static int g4x_do_reset(struct intel_gt *gt,
pci_write_config_byte(pdev, I915_GDRST,
GRDOM_MEDIA | GRDOM_RESET_ENABLE);
- ret = wait_for_atomic(g4x_reset_complete(pdev), 50);
+ ret = _wait_for_atomic(g4x_reset_complete(pdev), 50000, 0);
if (ret) {
GT_TRACE(gt, "Wait for media reset failed\n");
goto out;
@@ -218,7 +215,7 @@ static int g4x_do_reset(struct intel_gt *gt,
pci_write_config_byte(pdev, I915_GDRST,
GRDOM_RENDER | GRDOM_RESET_ENABLE);
- ret = wait_for_atomic(g4x_reset_complete(pdev), 50);
+ ret = _wait_for_atomic(g4x_reset_complete(pdev), 50000, 0);
if (ret) {
GT_TRACE(gt, "Wait for render reset failed\n");
goto out;
@@ -708,7 +705,7 @@ static int __reset_guc(struct intel_gt *gt)
static bool needs_wa_14015076503(struct intel_gt *gt, intel_engine_mask_t engine_mask)
{
- if (!IS_METEORLAKE(gt->i915) || !HAS_ENGINE(gt, GSC0))
+ if (MEDIA_VER_FULL(gt->i915) != IP_VER(13, 0) || !HAS_ENGINE(gt, GSC0))
return false;
if (!__HAS_ENGINE(engine_mask, GSC0))
@@ -788,9 +785,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
reset_mask = wa_14015076503_start(gt, engine_mask, !retry);
GT_TRACE(gt, "engine_mask=%x\n", reset_mask);
- preempt_disable();
ret = reset(gt, reset_mask, retry);
- preempt_enable();
wa_14015076503_end(gt, reset_mask);
}
@@ -1635,6 +1630,24 @@ void __intel_fini_wedge(struct intel_wedge_me *w)
w->gt = NULL;
}
+/*
+ * Wa_22011802037 requires that we (or the GuC) ensure that no command
+ * streamers are executing MI_FORCE_WAKE while an engine reset is initiated.
+ */
+bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt)
+{
+ if (GRAPHICS_VER(gt->i915) < 11)
+ return false;
+
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0))
+ return true;
+
+ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+ return false;
+
+ return true;
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_reset.c"
#include "selftest_hangcheck.c"
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
index 25c975b6e8fc..f615b30b81c5 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -78,4 +78,6 @@ void __intel_fini_wedge(struct intel_wedge_me *w);
bool intel_has_gpu_reset(const struct intel_gt *gt);
bool intel_has_reset_engine(const struct intel_gt *gt);
+bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt);
+
#endif /* I915_RESET_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index fb99143be98e..59da4b7bd262 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -13,6 +13,7 @@
#include "intel_engine_regs.h"
#include "intel_gpu_commands.h"
#include "intel_ring.h"
+#include "intel_gt.h"
#include "intel_timeline.h"
unsigned int intel_ring_update_space(struct intel_ring *ring)
@@ -56,7 +57,7 @@ int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww)
if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915)) {
addr = (void __force *)i915_vma_pin_iomap(vma);
} else {
- int type = i915_coherent_map_type(vma->vm->i915, vma->obj, false);
+ int type = intel_gt_coherent_map_type(vma->vm->gt, vma->obj, false);
addr = i915_gem_object_pin_map(vma->obj, type);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 3fd795c3263f..92085ffd23de 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -805,7 +805,7 @@ static int mi_set_context(struct i915_request *rq,
static int remap_l3_slice(struct i915_request *rq, int slice)
{
#define L3LOG_DW (GEN7_L3LOG_SIZE / sizeof(u32))
- u32 *cs, *remap_info = rq->engine->i915->l3_parity.remap_info[slice];
+ u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice];
int i;
if (!remap_info)
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index e92e626d4994..4feef874e6d6 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -16,7 +16,9 @@
#include "intel_gt.h"
#include "intel_gt_clock_utils.h"
#include "intel_gt_irq.h"
+#include "intel_gt_pm.h"
#include "intel_gt_pm_irq.h"
+#include "intel_gt_print.h"
#include "intel_gt_regs.h"
#include "intel_mchbar_regs.h"
#include "intel_pcode.h"
@@ -672,7 +674,6 @@ static void rps_set_power(struct intel_rps *rps, int new_power)
{
struct intel_gt *gt = rps_to_gt(rps);
struct intel_uncore *uncore = gt->uncore;
- u32 threshold_up = 0, threshold_down = 0; /* in % */
u32 ei_up = 0, ei_down = 0;
lockdep_assert_held(&rps->power.mutex);
@@ -680,9 +681,6 @@ static void rps_set_power(struct intel_rps *rps, int new_power)
if (new_power == rps->power.mode)
return;
- threshold_up = 95;
- threshold_down = 85;
-
/* Note the units here are not exactly 1us, but 1280ns. */
switch (new_power) {
case LOW_POWER:
@@ -709,17 +707,22 @@ static void rps_set_power(struct intel_rps *rps, int new_power)
GT_TRACE(gt,
"changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n",
- new_power, threshold_up, ei_up, threshold_down, ei_down);
+ new_power,
+ rps->power.up_threshold, ei_up,
+ rps->power.down_threshold, ei_down);
set(uncore, GEN6_RP_UP_EI,
intel_gt_ns_to_pm_interval(gt, ei_up * 1000));
set(uncore, GEN6_RP_UP_THRESHOLD,
- intel_gt_ns_to_pm_interval(gt, ei_up * threshold_up * 10));
+ intel_gt_ns_to_pm_interval(gt,
+ ei_up * rps->power.up_threshold * 10));
set(uncore, GEN6_RP_DOWN_EI,
intel_gt_ns_to_pm_interval(gt, ei_down * 1000));
set(uncore, GEN6_RP_DOWN_THRESHOLD,
- intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10));
+ intel_gt_ns_to_pm_interval(gt,
+ ei_down *
+ rps->power.down_threshold * 10));
set(uncore, GEN6_RP_CONTROL,
(GRAPHICS_VER(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
@@ -731,8 +734,6 @@ static void rps_set_power(struct intel_rps *rps, int new_power)
skip_hw_write:
rps->power.mode = new_power;
- rps->power.up_threshold = threshold_up;
- rps->power.down_threshold = threshold_down;
}
static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val)
@@ -1160,7 +1161,7 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c
{
struct drm_i915_private *i915 = rps_to_i915(rps);
- if (IS_METEORLAKE(i915))
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
return mtl_get_freq_caps(rps, caps);
else
return __gen6_rps_get_freq_caps(rps, caps);
@@ -1559,10 +1560,12 @@ void intel_rps_enable(struct intel_rps *rps)
return;
GT_TRACE(rps_to_gt(rps),
- "min:%x, max:%x, freq:[%d, %d]\n",
+ "min:%x, max:%x, freq:[%d, %d], thresholds:[%u, %u]\n",
rps->min_freq, rps->max_freq,
intel_gpu_freq(rps, rps->min_freq),
- intel_gpu_freq(rps, rps->max_freq));
+ intel_gpu_freq(rps, rps->max_freq),
+ rps->power.up_threshold,
+ rps->power.down_threshold);
GEM_BUG_ON(rps->max_freq < rps->min_freq);
GEM_BUG_ON(rps->idle_freq > rps->max_freq);
@@ -2015,6 +2018,12 @@ void intel_rps_init(struct intel_rps *rps)
}
}
+ /* Set default thresholds in % */
+ rps->power.up_threshold = 95;
+ rps_to_gt(rps)->defaults.rps_up_threshold = rps->power.up_threshold;
+ rps->power.down_threshold = 85;
+ rps_to_gt(rps)->defaults.rps_down_threshold = rps->power.down_threshold;
+
/* Finally allow us to boost to max by default */
rps->boost_freq = rps->max_freq;
rps->idle_freq = rps->min_freq;
@@ -2569,6 +2578,58 @@ int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val)
return set_min_freq(rps, val);
}
+u8 intel_rps_get_up_threshold(struct intel_rps *rps)
+{
+ return rps->power.up_threshold;
+}
+
+static int rps_set_threshold(struct intel_rps *rps, u8 *threshold, u8 val)
+{
+ int ret;
+
+ if (val > 100)
+ return -EINVAL;
+
+ ret = mutex_lock_interruptible(&rps->lock);
+ if (ret)
+ return ret;
+
+ if (*threshold == val)
+ goto out_unlock;
+
+ *threshold = val;
+
+ /* Force reset. */
+ rps->last_freq = -1;
+ mutex_lock(&rps->power.mutex);
+ rps->power.mode = -1;
+ mutex_unlock(&rps->power.mutex);
+
+ intel_rps_set(rps, clamp(rps->cur_freq,
+ rps->min_freq_softlimit,
+ rps->max_freq_softlimit));
+
+out_unlock:
+ mutex_unlock(&rps->lock);
+
+ return ret;
+}
+
+int intel_rps_set_up_threshold(struct intel_rps *rps, u8 threshold)
+{
+ return rps_set_threshold(rps, &rps->power.up_threshold, threshold);
+}
+
+u8 intel_rps_get_down_threshold(struct intel_rps *rps)
+{
+ return rps->power.down_threshold;
+}
+
+int intel_rps_set_down_threshold(struct intel_rps *rps, u8 threshold)
+{
+ return rps_set_threshold(rps, &rps->power.down_threshold, threshold);
+}
+
static void intel_rps_set_manual(struct intel_rps *rps, bool enable)
{
struct intel_uncore *uncore = rps_to_uncore(rps);
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h
index a3fa987aa91f..92fb01f5a452 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -37,6 +37,10 @@ void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive);
int intel_gpu_freq(struct intel_rps *rps, int val);
int intel_freq_opcode(struct intel_rps *rps, int val);
+u8 intel_rps_get_up_threshold(struct intel_rps *rps);
+int intel_rps_set_up_threshold(struct intel_rps *rps, u8 threshold);
+u8 intel_rps_get_down_threshold(struct intel_rps *rps);
+int intel_rps_set_down_threshold(struct intel_rps *rps, u8 threshold);
u32 intel_rps_read_actual_frequency(struct intel_rps *rps);
u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps);
u32 intel_rps_get_requested_frequency(struct intel_rps *rps);
diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.c b/drivers/gpu/drm/i915/gt/intel_sa_media.c
index e8f3d18c12b8..8c1dbcbcbc4f 100644
--- a/drivers/gpu/drm/i915/gt/intel_sa_media.c
+++ b/drivers/gpu/drm/i915/gt/intel_sa_media.c
@@ -29,7 +29,7 @@ int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t phys_addr,
* Standalone media shares the general MMIO space with the primary
* GT. We'll re-use the primary GT's mapping.
*/
- uncore->regs = i915->uncore.regs;
+ uncore->regs = intel_uncore_regs(&i915->uncore);
if (drm_WARN_ON(&i915->drm, uncore->regs == NULL))
return -EIO;
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 1141f875f5bd..f602895f6d0d 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -302,7 +302,7 @@ static void gen11_sseu_info_init(struct intel_gt *gt)
u8 eu_en;
u8 s_en;
- if (IS_JSL_EHL(gt->i915))
+ if (IS_JASPERLAKE(gt->i915) || IS_ELKHARTLAKE(gt->i915))
intel_sseu_set_info(sseu, 1, 4, 8);
else
intel_sseu_set_info(sseu, 1, 8, 8);
diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c b/drivers/gpu/drm/i915/gt/intel_tlb.c
new file mode 100644
index 000000000000..139608c30d97
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_tlb.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_perf_oa_regs.h"
+#include "intel_engine_pm.h"
+#include "intel_gt.h"
+#include "intel_gt_mcr.h"
+#include "intel_gt_pm.h"
+#include "intel_gt_print.h"
+#include "intel_gt_regs.h"
+#include "intel_tlb.h"
+
+/*
+ * HW architecture suggest typical invalidation time at 40us,
+ * with pessimistic cases up to 100us and a recommendation to
+ * cap at 1ms. We go a bit higher just in case.
+ */
+#define TLB_INVAL_TIMEOUT_US 100
+#define TLB_INVAL_TIMEOUT_MS 4
+
+/*
+ * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
+ * but are now considered MCR registers. Since they exist within a GAM range,
+ * the primary instance of the register rolls up the status from each unit.
+ */
+static int wait_for_invalidate(struct intel_engine_cs *engine)
+{
+ if (engine->tlb_inv.mcr)
+ return intel_gt_mcr_wait_for_reg(engine->gt,
+ engine->tlb_inv.reg.mcr_reg,
+ engine->tlb_inv.done,
+ 0,
+ TLB_INVAL_TIMEOUT_US,
+ TLB_INVAL_TIMEOUT_MS);
+ else
+ return __intel_wait_for_register_fw(engine->gt->uncore,
+ engine->tlb_inv.reg.reg,
+ engine->tlb_inv.done,
+ 0,
+ TLB_INVAL_TIMEOUT_US,
+ TLB_INVAL_TIMEOUT_MS,
+ NULL);
+}
+
+static void mmio_invalidate_full(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ struct intel_engine_cs *engine;
+ intel_engine_mask_t awake, tmp;
+ enum intel_engine_id id;
+ unsigned long flags;
+
+ if (GRAPHICS_VER(i915) < 8)
+ return;
+
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+ intel_gt_mcr_lock(gt, &flags);
+ spin_lock(&uncore->lock); /* serialise invalidate with GT reset */
+
+ awake = 0;
+ for_each_engine(engine, gt, id) {
+ if (!intel_engine_pm_is_awake(engine))
+ continue;
+
+ if (engine->tlb_inv.mcr)
+ intel_gt_mcr_multicast_write_fw(gt,
+ engine->tlb_inv.reg.mcr_reg,
+ engine->tlb_inv.request);
+ else
+ intel_uncore_write_fw(uncore,
+ engine->tlb_inv.reg.reg,
+ engine->tlb_inv.request);
+
+ awake |= engine->mask;
+ }
+
+ GT_TRACE(gt, "invalidated engines %08x\n", awake);
+
+ /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
+ if (awake &&
+ (IS_TIGERLAKE(i915) ||
+ IS_DG1(i915) ||
+ IS_ROCKETLAKE(i915) ||
+ IS_ALDERLAKE_S(i915) ||
+ IS_ALDERLAKE_P(i915)))
+ intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
+
+ spin_unlock(&uncore->lock);
+ intel_gt_mcr_unlock(gt, flags);
+
+ for_each_engine_masked(engine, gt, awake, tmp) {
+ if (wait_for_invalidate(engine))
+ gt_err_ratelimited(gt,
+ "%s TLB invalidation did not complete in %ums!\n",
+ engine->name, TLB_INVAL_TIMEOUT_MS);
+ }
+
+ /*
+ * Use delayed put since a) we mostly expect a flurry of TLB
+ * invalidations so it is good to avoid paying the forcewake cost and
+ * b) it works around a bug in Icelake which cannot cope with too rapid
+ * transitions.
+ */
+ intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
+}
+
+static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
+{
+ u32 cur = intel_gt_tlb_seqno(gt);
+
+ /* Only skip if a *full* TLB invalidate barrier has passed */
+ return (s32)(cur - ALIGN(seqno, 2)) > 0;
+}
+
+void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
+{
+ intel_wakeref_t wakeref;
+
+ if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
+ return;
+
+ if (intel_gt_is_wedged(gt))
+ return;
+
+ if (tlb_seqno_passed(gt, seqno))
+ return;
+
+ with_intel_gt_pm_if_awake(gt, wakeref) {
+ mutex_lock(&gt->tlb.invalidate_lock);
+ if (tlb_seqno_passed(gt, seqno))
+ goto unlock;
+
+ mmio_invalidate_full(gt);
+
+ write_seqcount_invalidate(&gt->tlb.seqno);
+unlock:
+ mutex_unlock(&gt->tlb.invalidate_lock);
+ }
+}
+
+void intel_gt_init_tlb(struct intel_gt *gt)
+{
+ mutex_init(&gt->tlb.invalidate_lock);
+ seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
+}
+
+void intel_gt_fini_tlb(struct intel_gt *gt)
+{
+ mutex_destroy(&gt->tlb.invalidate_lock);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_tlb.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.h b/drivers/gpu/drm/i915/gt/intel_tlb.h
new file mode 100644
index 000000000000..337327af92ac
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_tlb.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef INTEL_TLB_H
+#define INTEL_TLB_H
+
+#include <linux/seqlock.h>
+#include <linux/types.h>
+
+#include "intel_gt_types.h"
+
+void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno);
+
+void intel_gt_init_tlb(struct intel_gt *gt);
+void intel_gt_fini_tlb(struct intel_gt *gt);
+
+static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt)
+{
+ return seqprop_sequence(&gt->tlb.seqno);
+}
+
+static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt)
+{
+ return intel_gt_tlb_seqno(gt) | 1;
+}
+
+#endif /* INTEL_TLB_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 4d2dece96011..b86a10b1f534 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -123,6 +123,22 @@ static void wa_init_finish(struct i915_wa_list *wal)
wal->wa_count, wal->name, wal->engine_name);
}
+static enum forcewake_domains
+wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
+{
+ enum forcewake_domains fw = 0;
+ struct i915_wa *wa;
+ unsigned int i;
+
+ for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
+ fw |= intel_uncore_forcewake_for_reg(uncore,
+ wa->reg,
+ FW_REG_READ |
+ FW_REG_WRITE);
+
+ return fw;
+}
+
static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
{
unsigned int addr = i915_mmio_reg_offset(wa->reg);
@@ -225,13 +241,13 @@ static void wa_mcr_add(struct i915_wa_list *wal, i915_mcr_reg_t reg,
static void
wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
{
- wa_add(wal, reg, clear, set, clear, false);
+ wa_add(wal, reg, clear, set, clear | set, false);
}
static void
wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clear, u32 set)
{
- wa_mcr_add(wal, reg, clear, set, clear, false);
+ wa_mcr_add(wal, reg, clear, set, clear | set, false);
}
static void
@@ -404,7 +420,7 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
/* WaForceContextSaveRestoreNonCoherent:bdw */
HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
/* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
- (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
+ (IS_BROADWELL_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
}
static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -584,7 +600,7 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
gen9_ctx_workarounds_init(engine, wal);
/* WaToEnableHwFixForPushConstHWBug:kbl */
- if (IS_KBL_GRAPHICS_STEP(i915, STEP_C0, STEP_FOREVER))
+ if (IS_KABYLAKE(i915) && IS_GRAPHICS_STEP(i915, STEP_C0, STEP_FOREVER))
wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
@@ -621,10 +637,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
/* Wa_1406697149 (WaDisableBankHangMode:icl) */
- wa_write(wal,
- GEN8_L3CNTLREG,
- intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
- GEN8_ERRDETBCTRL);
+ wa_write(wal, GEN8_L3CNTLREG, GEN8_ERRDETBCTRL);
/* WaForceEnableNonCoherent:icl
* This is not the same workaround as in early Gen9 platforms, where
@@ -653,7 +666,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
/* Wa_1604278689:icl,ehl */
wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
wa_write_clr_set(wal, IVB_FBC_RT_BASE_UPPER,
- 0, /* write-only register; skip validation */
+ 0,
0xFFFFFFFF);
/* Wa_1406306137:icl,ehl */
@@ -670,38 +683,8 @@ static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
wa_mcr_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
- wa_mcr_add(wal,
- XEHP_FF_MODE2,
- FF_MODE2_TDS_TIMER_MASK,
- FF_MODE2_TDS_TIMER_128,
- 0, false);
-}
-
-/*
- * These settings aren't actually workarounds, but general tuning settings that
- * need to be programmed on several platforms.
- */
-static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine,
- struct i915_wa_list *wal)
-{
- /*
- * Although some platforms refer to it as Wa_1604555607, we need to
- * program it even on those that don't explicitly list that
- * workaround.
- *
- * Note that the programming of this register is further modified
- * according to the FF_MODE2 guidance given by Wa_1608008084:gen12.
- * Wa_1608008084 tells us the FF_MODE2 register will return the wrong
- * value when read. The default value for this register is zero for all
- * fields and there are no bit masks. So instead of doing a RMW we
- * should just write TDS timer value. For the same reason read
- * verification is ignored.
- */
- wa_add(wal,
- GEN12_FF_MODE2,
- FF_MODE2_TDS_TIMER_MASK,
- FF_MODE2_TDS_TIMER_128,
- 0, false);
+ wa_mcr_write_clr_set(wal, XEHP_FF_MODE2, FF_MODE2_TDS_TIMER_MASK,
+ FF_MODE2_TDS_TIMER_128);
}
static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -709,8 +692,6 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
{
struct drm_i915_private *i915 = engine->i915;
- gen12_ctx_gt_tuning_init(engine, wal);
-
/*
* Wa_1409142259:tgl,dg1,adl-p
* Wa_1409347922:tgl,dg1,adl-p
@@ -732,15 +713,27 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
/*
- * Wa_16011163337
+ * Wa_16011163337 - GS_TIMER
+ *
+ * TDS_TIMER: Although some platforms refer to it as Wa_1604555607, we
+ * need to program it even on those that don't explicitly list that
+ * workaround.
*
- * Like in gen12_ctx_gt_tuning_init(), read verification is ignored due
- * to Wa_1608008084.
+ * Note that the programming of GEN12_FF_MODE2 is further modified
+ * according to the FF_MODE2 guidance given by Wa_1608008084.
+ * Wa_1608008084 tells us the FF_MODE2 register will return the wrong
+ * value when read from the CPU.
+ *
+ * The default value for this register is zero for all fields.
+ * So instead of doing a RMW we should just write the desired values
+ * for TDS and GS timers. Note that since the readback can't be trusted,
+ * the clear mask is just set to ~0 to make sure other bits are not
+ * inadvertently set. For the same reason read verification is ignored.
*/
wa_add(wal,
GEN12_FF_MODE2,
- FF_MODE2_GS_TIMER_MASK,
- FF_MODE2_GS_TIMER_224,
+ ~0,
+ FF_MODE2_TDS_TIMER_128 | FF_MODE2_GS_TIMER_224,
0, false);
if (!IS_DG1(i915)) {
@@ -771,68 +764,41 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
{
dg2_ctx_gt_tuning_init(engine, wal);
- /* Wa_16011186671:dg2_g11 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
- wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
- wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
- }
-
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
- /* Wa_14010469329:dg2_g10 */
- wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
- XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
-
- /*
- * Wa_22010465075:dg2_g10
- * Wa_22010613112:dg2_g10
- * Wa_14010698770:dg2_g10
- */
- wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
- GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
- }
-
/* Wa_16013271637:dg2 */
wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
/* Wa_14014947963:dg2 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
- wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
+ wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
/* Wa_18018764978:dg2 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_C0, STEP_FOREVER) ||
- IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
- wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
-
- /* Wa_15010599737:dg2 */
- wa_mcr_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN);
+ wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
/* Wa_18019271663:dg2 */
wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
}
-static void mtl_ctx_gt_tuning_init(struct intel_engine_cs *engine,
- struct i915_wa_list *wal)
+static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
{
- struct drm_i915_private *i915 = engine->i915;
+ struct intel_gt *gt = engine->gt;
dg2_ctx_gt_tuning_init(engine, wal);
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER))
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER))
wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false);
}
-static void mtl_ctx_workarounds_init(struct intel_engine_cs *engine,
- struct i915_wa_list *wal)
+static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
{
- struct drm_i915_private *i915 = engine->i915;
+ struct intel_gt *gt = engine->gt;
- mtl_ctx_gt_tuning_init(engine, wal);
+ xelpg_ctx_gt_tuning_init(engine, wal);
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
/* Wa_14014947963 */
wa_masked_field_set(wal, VF_PREEMPTION,
PREEMPTION_VERTEX_COUNT, 0x4000);
@@ -938,8 +904,8 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
if (engine->class != RENDER_CLASS)
goto done;
- if (IS_METEORLAKE(i915))
- mtl_ctx_workarounds_init(engine, wal);
+ if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71)))
+ xelpg_ctx_workarounds_init(engine, wal);
else if (IS_PONTEVECCHIO(i915))
; /* noop; none at this time */
else if (IS_DG2(i915))
@@ -987,6 +953,9 @@ void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
int intel_engine_emit_ctx_wa(struct i915_request *rq)
{
struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
+ struct intel_uncore *uncore = rq->engine->uncore;
+ enum forcewake_domains fw;
+ unsigned long flags;
struct i915_wa *wa;
unsigned int i;
u32 *cs;
@@ -1003,13 +972,36 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
if (IS_ERR(cs))
return PTR_ERR(cs);
+ fw = wal_get_fw_for_rmw(uncore, wal);
+
+ intel_gt_mcr_lock(wal->gt, &flags);
+ spin_lock(&uncore->lock);
+ intel_uncore_forcewake_get__locked(uncore, fw);
+
*cs++ = MI_LOAD_REGISTER_IMM(wal->count);
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
+ u32 val;
+
+ /* Skip reading the register if it's not really needed */
+ if (wa->masked_reg || (wa->clr | wa->set) == U32_MAX) {
+ val = wa->set;
+ } else {
+ val = wa->is_mcr ?
+ intel_gt_mcr_read_any_fw(wal->gt, wa->mcr_reg) :
+ intel_uncore_read_fw(uncore, wa->reg);
+ val &= ~wa->clr;
+ val |= wa->set;
+ }
+
*cs++ = i915_mmio_reg_offset(wa->reg);
- *cs++ = wa->set;
+ *cs++ = val;
}
*cs++ = MI_NOOP;
+ intel_uncore_forcewake_put__locked(uncore, fw);
+ spin_unlock(&uncore->lock);
+ intel_gt_mcr_unlock(wal->gt, flags);
+
intel_ring_advance(rq, cs);
ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
@@ -1173,7 +1165,7 @@ skl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
/* WaInPlaceDecompressionHang:skl */
- if (IS_SKL_GRAPHICS_STEP(gt->i915, STEP_A0, STEP_H0))
+ if (IS_SKYLAKE(gt->i915) && IS_GRAPHICS_STEP(gt->i915, STEP_A0, STEP_H0))
wa_write_or(wal,
GEN9_GAMT_ECO_REG_RW_IA,
GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
@@ -1185,7 +1177,7 @@ kbl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
gen9_gt_workarounds_init(gt, wal);
/* WaDisableDynamicCreditSharing:kbl */
- if (IS_KBL_GRAPHICS_STEP(gt->i915, 0, STEP_C0))
+ if (IS_KABYLAKE(gt->i915) && IS_GRAPHICS_STEP(gt->i915, 0, STEP_C0))
wa_write_or(wal,
GAMT_CHKN_BIT_REG,
GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
@@ -1441,7 +1433,8 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
/* Wa_1607087056:icl,ehl,jsl */
if (IS_ICELAKE(i915) ||
- IS_JSL_EHL_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
+ ((IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) &&
+ IS_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)))
wa_write_or(wal,
GEN11_SLICE_UNIT_LEVEL_CLKGATE,
L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
@@ -1485,6 +1478,18 @@ gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
/* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */
wa_mcr_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
+
+ /*
+ * Wa_14015795083
+ *
+ * Firmware on some gen12 platforms locks the MISCCPCTL register,
+ * preventing i915 from modifying it for this workaround. Skip the
+ * readback verification for this workaround on debug builds; if the
+ * workaround doesn't stick due to firmware behavior, it's not an error
+ * that we want CI to flag.
+ */
+ wa_add(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE,
+ 0, 0, false);
}
static void
@@ -1574,31 +1579,11 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
static void
dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- struct intel_engine_cs *engine;
- int id;
-
xehp_init_mcr(gt, wal);
/* Wa_14011060649:dg2 */
wa_14011060649(gt, wal);
- /*
- * Although there are per-engine instances of these registers,
- * they technically exist outside the engine itself and are not
- * impacted by engine resets. Furthermore, they're part of the
- * GuC blacklist so trying to treat them as engine workarounds
- * will result in GuC initialization failure and a wedged GPU.
- */
- for_each_engine(engine, gt, id) {
- if (engine->class != VIDEO_DECODE_CLASS)
- continue;
-
- /* Wa_16010515920:dg2_g10 */
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
- wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base),
- ALNUNIT_CLKGATE_DIS);
- }
-
if (IS_DG2_G10(gt->i915)) {
/* Wa_22010523718:dg2 */
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
@@ -1609,70 +1594,15 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
DSS_ROUTER_CLKGATE_DIS);
}
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) {
- /* Wa_14012362059:dg2 */
- wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
- }
-
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
- /* Wa_14010948348:dg2_g10 */
- wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS);
-
- /* Wa_14011037102:dg2_g10 */
- wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS);
-
- /* Wa_14011371254:dg2_g10 */
- wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
-
- /* Wa_14011431319:dg2_g10 */
- wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
- GAMTLBVDBOX7_CLKGATE_DIS |
- GAMTLBVDBOX6_CLKGATE_DIS |
- GAMTLBVDBOX5_CLKGATE_DIS |
- GAMTLBVDBOX4_CLKGATE_DIS |
- GAMTLBVDBOX3_CLKGATE_DIS |
- GAMTLBVDBOX2_CLKGATE_DIS |
- GAMTLBVDBOX1_CLKGATE_DIS |
- GAMTLBVDBOX0_CLKGATE_DIS |
- GAMTLBKCR_CLKGATE_DIS |
- GAMTLBGUC_CLKGATE_DIS |
- GAMTLBBLT_CLKGATE_DIS);
- wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
- GAMTLBGFXA1_CLKGATE_DIS |
- GAMTLBCOMPA0_CLKGATE_DIS |
- GAMTLBCOMPA1_CLKGATE_DIS |
- GAMTLBCOMPB0_CLKGATE_DIS |
- GAMTLBCOMPB1_CLKGATE_DIS |
- GAMTLBCOMPC0_CLKGATE_DIS |
- GAMTLBCOMPC1_CLKGATE_DIS |
- GAMTLBCOMPD0_CLKGATE_DIS |
- GAMTLBCOMPD1_CLKGATE_DIS |
- GAMTLBMERT_CLKGATE_DIS |
- GAMTLBVEBOX3_CLKGATE_DIS |
- GAMTLBVEBOX2_CLKGATE_DIS |
- GAMTLBVEBOX1_CLKGATE_DIS |
- GAMTLBVEBOX0_CLKGATE_DIS);
-
- /* Wa_14010569222:dg2_g10 */
- wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
- GAMEDIA_CLKGATE_DIS);
-
- /* Wa_14011028019:dg2_g10 */
- wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
-
- /* Wa_14010680813:dg2_g10 */
- wa_mcr_write_or(wal, XEHP_GAMSTLB_CTRL,
- CONTROL_BLOCK_CLKGATE_DIS |
- EGRESS_BLOCK_CLKGATE_DIS |
- TAG_BLOCK_CLKGATE_DIS);
- }
-
/* Wa_14014830051:dg2 */
wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
- /* Wa_14015795083 */
- wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
+ /*
+ * Wa_14015795083
+ * Skip verification for possibly locked register.
+ */
+ wa_add(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE,
+ 0, 0, false);
/* Wa_18018781329 */
wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
@@ -1710,14 +1640,13 @@ static void
xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
/* Wa_14018778641 / Wa_18018781329 */
- wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
/* Wa_22016670082 */
wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE);
- if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
/* Wa_14014830051 */
wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
@@ -1743,8 +1672,6 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
* GT, the media GT's versions are regular singleton registers.
*/
wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB);
- wa_write_or(wal, XELPMP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
- wa_write_or(wal, XELPMP_VEBX_MOD_CTRL, FORCE_MISS_FTLB);
debug_dump_steering(gt);
}
@@ -1762,10 +1689,8 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
*/
static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal)
{
- if (IS_METEORLAKE(gt->i915)) {
- if (gt->type != GT_MEDIA)
- wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
-
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) {
+ wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
}
@@ -1789,15 +1714,15 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
gt_tuning_settings(gt, wal);
if (gt->type == GT_MEDIA) {
- if (MEDIA_VER(i915) >= 13)
+ if (MEDIA_VER_FULL(i915) == IP_VER(13, 0))
xelpmp_gt_workarounds_init(gt, wal);
else
- MISSING_CASE(MEDIA_VER(i915));
+ MISSING_CASE(MEDIA_VER_FULL(i915));
return;
}
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)))
xelpg_gt_workarounds_init(gt, wal);
else if (IS_PONTEVECCHIO(i915))
pvc_gt_workarounds_init(gt, wal);
@@ -1850,22 +1775,6 @@ void intel_gt_init_workarounds(struct intel_gt *gt)
wa_init_finish(wal);
}
-static enum forcewake_domains
-wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
-{
- enum forcewake_domains fw = 0;
- struct i915_wa *wa;
- unsigned int i;
-
- for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
- fw |= intel_uncore_forcewake_for_reg(uncore,
- wa->reg,
- FW_REG_READ |
- FW_REG_WRITE);
-
- return fw;
-}
-
static bool
wa_verify(struct intel_gt *gt, const struct i915_wa *wa, u32 cur,
const char *name, const char *from)
@@ -2229,29 +2138,10 @@ static void dg2_whitelist_build(struct intel_engine_cs *engine)
switch (engine->class) {
case RENDER_CLASS:
- /*
- * Wa_1507100340:dg2_g10
- *
- * This covers 4 registers which are next to one another :
- * - PS_INVOCATION_COUNT
- * - PS_INVOCATION_COUNT_UDW
- * - PS_DEPTH_COUNT
- * - PS_DEPTH_COUNT_UDW
- */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
- whitelist_reg_ext(w, PS_INVOCATION_COUNT,
- RING_FORCE_TO_NONPRIV_ACCESS_RD |
- RING_FORCE_TO_NONPRIV_RANGE_4);
-
/* Required by recommended tuning setting (not a workaround) */
whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3);
break;
- case COMPUTE_CLASS:
- /* Wa_16011157294:dg2_g10 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
- whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
- break;
default:
break;
}
@@ -2281,7 +2171,7 @@ static void pvc_whitelist_build(struct intel_engine_cs *engine)
blacklist_trtt(engine);
}
-static void mtl_whitelist_build(struct intel_engine_cs *engine)
+static void xelpg_whitelist_build(struct intel_engine_cs *engine)
{
struct i915_wa_list *w = &engine->whitelist;
@@ -2303,8 +2193,10 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine)
wa_init_start(w, engine->gt, "whitelist", engine->name);
- if (IS_METEORLAKE(i915))
- mtl_whitelist_build(engine);
+ if (engine->gt->type == GT_MEDIA)
+ ; /* none yet */
+ else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71)))
+ xelpg_whitelist_build(engine);
else if (IS_PONTEVECCHIO(i915))
pvc_whitelist_build(engine);
else if (IS_DG2(i915))
@@ -2402,62 +2294,35 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
}
}
-static bool needs_wa_1308578152(struct intel_engine_cs *engine)
-{
- return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >=
- GEN_DSS_PER_GSLICE;
-}
-
static void
rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = engine->i915;
+ struct intel_gt *gt = engine->gt;
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
/* Wa_22014600077 */
wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
ENABLE_EU_COUNT_FOR_TDL_FLUSH);
}
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
+ IS_DG2(i915)) {
/* Wa_1509727124 */
wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
SC_DISABLE_POWER_OPTIMIZATION_EBB);
}
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(i915) || IS_DG2_G12(i915) ||
- IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_DG2(i915)) {
/* Wa_22012856258 */
wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
GEN12_DISABLE_READ_SUPPRESSION);
}
- if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
- /* Wa_14013392000:dg2_g11 */
- wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
- }
-
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
- /* Wa_14012419201:dg2 */
- wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4,
- GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
- }
-
- /* Wa_1308578152:dg2_g10 when first gslice is fused off */
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) &&
- needs_wa_1308578152(engine)) {
- wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON,
- GEN12_REPLAY_MODE_GRANULARITY);
- }
-
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
+ if (IS_DG2(i915)) {
/*
* Wa_22010960976:dg2
* Wa_14013347512:dg2
@@ -2466,34 +2331,15 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
}
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
- /*
- * Wa_1608949956:dg2_g10
- * Wa_14010198302:dg2_g10
- */
- wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
- MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) ||
+ IS_DG2(i915)) {
+ /* Wa_14015150844 */
+ wa_mcr_add(wal, XEHP_HDC_CHICKEN0, 0,
+ _MASKED_BIT_ENABLE(DIS_ATOMIC_CHAINING_TYPED_WRITES),
+ 0, true);
}
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0))
- /* Wa_22010430635:dg2 */
- wa_mcr_masked_en(wal,
- GEN9_ROW_CHICKEN4,
- GEN12_DISABLE_GRF_CLEAR);
-
- /* Wa_14013202645:dg2 */
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
- wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
-
- /* Wa_22012532006:dg2 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
- wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
- DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
-
- if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G10(i915)) {
+ if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) {
/* Wa_22014600077:dg2 */
wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
_MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
@@ -2501,6 +2347,19 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
true);
}
+ if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
+ IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
+ /*
+ * Wa_1606700617:tgl,dg1,adl-p
+ * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
+ * Wa_14010826681:tgl,dg1,rkl,adl-p
+ * Wa_18019627453:dg2
+ */
+ wa_masked_en(wal,
+ GEN9_CS_DEBUG_MODE1,
+ FF_DOP_CLOCK_GATE_DISABLE);
+ }
+
if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
/* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
@@ -2514,19 +2373,11 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
*/
wa_write_or(wal, GEN7_FF_THREAD_MODE,
GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
- }
- if (IS_ALDERLAKE_P(i915) || IS_DG2(i915) || IS_ALDERLAKE_S(i915) ||
- IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
- /*
- * Wa_1606700617:tgl,dg1,adl-p
- * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
- * Wa_14010826681:tgl,dg1,rkl,adl-p
- * Wa_18019627453:dg2
- */
- wa_masked_en(wal,
- GEN9_CS_DEBUG_MODE1,
- FF_DOP_CLOCK_GATE_DISABLE);
+ /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
+ wa_mcr_masked_en(wal,
+ GEN10_SAMPLER_MODE,
+ ENABLE_SMALLPL);
}
if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
@@ -2553,14 +2404,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN8_RC_SEMA_IDLE_MSG_DISABLE);
}
- if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) ||
- IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) {
- /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
- wa_mcr_masked_en(wal,
- GEN10_SAMPLER_MODE,
- ENABLE_SMALLPL);
- }
-
if (GRAPHICS_VER(i915) == 11) {
/* This is not an Wa. Enable for better image quality */
wa_masked_en(wal,
@@ -2933,7 +2776,7 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
struct drm_i915_private *i915 = engine->i915;
/* WaKBLVECSSemaphoreWaitPoll:kbl */
- if (IS_KBL_GRAPHICS_STEP(i915, STEP_A0, STEP_F0)) {
+ if (IS_KABYLAKE(i915) && IS_GRAPHICS_STEP(i915, STEP_A0, STEP_F0)) {
wa_write(wal,
RING_SEMA_WAIT_POLL(engine->mmio_base),
1);
@@ -2962,10 +2805,12 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
* function invoked by __intel_engine_init_ctx_wa().
*/
static void
-add_render_compute_tuning_settings(struct drm_i915_private *i915,
+add_render_compute_tuning_settings(struct intel_gt *gt,
struct i915_wa_list *wal)
{
- if (IS_METEORLAKE(i915) || IS_DG2(i915))
+ struct drm_i915_private *i915 = gt->i915;
+
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915))
wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
/*
@@ -2994,8 +2839,9 @@ static void
general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = engine->i915;
+ struct intel_gt *gt = engine->gt;
- add_render_compute_tuning_settings(i915, wal);
+ add_render_compute_tuning_settings(gt, wal);
if (GRAPHICS_VER(i915) >= 11) {
/* This is not a Wa (although referred to as
@@ -3016,13 +2862,13 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
GEN11_INDIRECT_STATE_BASE_ADDR_OVERRIDE);
}
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER))
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER))
/* Wa_14017856879 */
wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH);
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
/*
* Wa_14017066071
* Wa_14017654203
@@ -3030,37 +2876,47 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
MTL_DISABLE_SAMPLER_SC_OOO);
- if (IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
/* Wa_22015279794 */
wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
DISABLE_PREFETCH_INTO_IC);
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
+ IS_DG2(i915)) {
/* Wa_22013037850 */
wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
DISABLE_128B_EVICTION_COMMAND_UDW);
+
+ /* Wa_18017747507 */
+ wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
}
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
IS_PONTEVECCHIO(i915) ||
IS_DG2(i915)) {
/* Wa_22014226127 */
wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
}
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
- IS_DG2(i915)) {
- /* Wa_18017747507 */
- wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
+ if (IS_PONTEVECCHIO(i915) || IS_DG2(i915)) {
+ /* Wa_14015227452:dg2,pvc */
+ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
+
+ /* Wa_16015675438:dg2,pvc */
+ wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
}
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
- IS_DG2_G11(i915)) {
+ if (IS_DG2(i915)) {
+ /*
+ * Wa_16011620976:dg2_g11
+ * Wa_22015475538:dg2
+ */
+ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
+ }
+
+ if (IS_DG2_G11(i915)) {
/*
* Wa_22012826095:dg2
* Wa_22013059131:dg2
@@ -3072,18 +2928,23 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
/* Wa_22013059131:dg2 */
wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0,
FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
- }
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
/*
- * Wa_14010918519:dg2_g10
+ * Wa_22012654132
*
- * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping,
- * so ignoring verification.
+ * Note that register 0xE420 is write-only and cannot be read
+ * back for verification on DG2 (due to Wa_14012342262), so
+ * we need to explicitly skip the readback.
*/
- wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
- FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
- 0, false);
+ wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
+ _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
+ 0 /* write-only, so skip validation */,
+ true);
+ }
+
+ if (IS_DG2_G10(i915) || IS_DG2_G12(i915)) {
+ /* Wa_18028616096 */
+ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3);
}
if (IS_XEHPSDV(i915)) {
@@ -3101,35 +2962,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
}
-
- if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) {
- /* Wa_14015227452:dg2,pvc */
- wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
-
- /* Wa_16015675438:dg2,pvc */
- wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
- }
-
- if (IS_DG2(i915)) {
- /*
- * Wa_16011620976:dg2_g11
- * Wa_22015475538:dg2
- */
- wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
- }
-
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || IS_DG2_G11(i915))
- /*
- * Wa_22012654132
- *
- * Note that register 0xE420 is write-only and cannot be read
- * back for verification on DG2 (due to Wa_14012342262), so
- * we need to explicitly skip the readback.
- */
- wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
- _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
- 0 /* write-only, so skip validation */,
- true);
}
static void
@@ -3237,7 +3069,7 @@ wa_list_srm(struct i915_request *rq,
const struct i915_wa_list *wal,
struct i915_vma *vma)
{
- struct drm_i915_private *i915 = rq->engine->i915;
+ struct drm_i915_private *i915 = rq->i915;
unsigned int i, count = 0;
const struct i915_wa *wa;
u32 srm, *cs;
@@ -3336,7 +3168,7 @@ retry:
err = 0;
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
- if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg)))
+ if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg)))
continue;
if (!wa_verify(wal->gt, wa, results[i], wal->name, from))
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index 76fbae358072..47070cba7eb1 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -88,8 +88,9 @@ static int __live_context_size(struct intel_engine_cs *engine)
goto err;
vaddr = i915_gem_object_pin_map_unlocked(ce->state->obj,
- i915_coherent_map_type(engine->i915,
- ce->state->obj, false));
+ intel_gt_coherent_map_type(engine->gt,
+ ce->state->obj,
+ false));
if (IS_ERR(vaddr)) {
err = PTR_ERR(vaddr);
intel_context_unpin(ce);
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
index 78cdfc6f315f..86cecf7a1105 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
@@ -62,7 +62,7 @@ static int write_timestamp(struct i915_request *rq, int slot)
return PTR_ERR(cs);
cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
- if (GRAPHICS_VER(rq->engine->i915) >= 8)
+ if (GRAPHICS_VER(rq->i915) >= 8)
cmd++;
*cs++ = cmd;
*cs++ = i915_mmio_reg_offset(timestamp_reg(rq->engine));
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 8b0d84f2aad2..0dd4d00ee894 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -73,7 +73,7 @@ static int hang_init(struct hang *h, struct intel_gt *gt)
h->seqno = memset(vaddr, 0xff, PAGE_SIZE);
vaddr = i915_gem_object_pin_map_unlocked(h->obj,
- i915_coherent_map_type(gt->i915, h->obj, false));
+ intel_gt_coherent_map_type(gt, h->obj, false));
if (IS_ERR(vaddr)) {
err = PTR_ERR(vaddr);
goto err_unpin_hws;
@@ -119,7 +119,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
return ERR_CAST(obj);
}
- vaddr = i915_gem_object_pin_map_unlocked(obj, i915_coherent_map_type(gt->i915, obj, false));
+ vaddr = i915_gem_object_pin_map_unlocked(obj, intel_gt_coherent_map_type(gt, obj, false));
if (IS_ERR(vaddr)) {
i915_gem_object_put(obj);
i915_vm_put(vm);
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index a78a3d2c2e16..5f826b6dcf5d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1292,9 +1292,9 @@ static int compare_isolation(struct intel_engine_cs *engine,
}
lrc = i915_gem_object_pin_map_unlocked(ce->state->obj,
- i915_coherent_map_type(engine->i915,
- ce->state->obj,
- false));
+ intel_gt_coherent_map_type(engine->gt,
+ ce->state->obj,
+ false));
if (IS_ERR(lrc)) {
err = PTR_ERR(lrc);
goto err_B1;
diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c
index 3def5ca72dec..1a34cbe04fb6 100644
--- a/drivers/gpu/drm/i915/gt/selftest_migrate.c
+++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c
@@ -710,7 +710,7 @@ static int threaded_migrate(struct intel_migrate *migrate,
thread[i].tsk = tsk;
}
- msleep(10); /* start all threads before we kthread_stop() */
+ msleep(10 * n_cpus); /* start all threads before we kthread_stop() */
for (i = 0; i < n_cpus; ++i) {
struct task_struct *tsk = thread[i].tsk;
diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
index a8446ab82501..d73e438fb85f 100644
--- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
@@ -137,7 +137,7 @@ static int read_mocs_table(struct i915_request *rq,
if (!table)
return 0;
- if (HAS_GLOBAL_MOCS_REGISTERS(rq->engine->i915))
+ if (HAS_GLOBAL_MOCS_REGISTERS(rq->i915))
addr = global_mocs_offset() + gt->uncore->gsi_offset;
else
addr = mocs_offset(rq->engine);
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c
index 2ceeadecc639..a7189c2d660c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rc6.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c
@@ -140,7 +140,7 @@ static const u32 *__live_rc6_ctx(struct intel_context *ce)
}
cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
- if (GRAPHICS_VER(rq->engine->i915) >= 8)
+ if (GRAPHICS_VER(rq->i915) >= 8)
cmd++;
*cs++ = cmd;
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index 39c3ec12df1a..fa36cf920bde 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -459,12 +459,12 @@ static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
if (IS_ERR(cs))
return PTR_ERR(cs);
- if (GRAPHICS_VER(rq->engine->i915) >= 8) {
+ if (GRAPHICS_VER(rq->i915) >= 8) {
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = addr;
*cs++ = 0;
*cs++ = value;
- } else if (GRAPHICS_VER(rq->engine->i915) >= 4) {
+ } else if (GRAPHICS_VER(rq->i915) >= 4) {
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = 0;
*cs++ = addr;
diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c
index 3bd6b540257b..7e41f69fc818 100644
--- a/drivers/gpu/drm/i915/gt/selftest_tlb.c
+++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c
@@ -6,6 +6,7 @@
#include "i915_selftest.h"
#include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_lmem.h"
#include "gem/i915_gem_region.h"
#include "gen8_engine_cs.h"
@@ -354,7 +355,7 @@ out_a:
static void tlbinv_full(struct i915_address_space *vm, u64 addr, u64 length)
{
- intel_gt_invalidate_tlb(vm->gt, intel_gt_tlb_seqno(vm->gt) | 1);
+ intel_gt_invalidate_tlb_full(vm->gt, intel_gt_tlb_seqno(vm->gt) | 1);
}
static int invalidate_full(void *arg)
diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c
index 449c9ed44382..bccc3a1200bc 100644
--- a/drivers/gpu/drm/i915/gt/shmem_utils.c
+++ b/drivers/gpu/drm/i915/gt/shmem_utils.c
@@ -33,7 +33,6 @@ struct file *shmem_create_from_data(const char *name, void *data, size_t len)
struct file *shmem_create_from_object(struct drm_i915_gem_object *obj)
{
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
enum i915_map_type map_type;
struct file *file;
void *ptr;
@@ -44,7 +43,7 @@ struct file *shmem_create_from_object(struct drm_i915_gem_object *obj)
return file;
}
- map_type = i915_coherent_map_type(i915, obj, true);
+ map_type = i915_gem_object_is_lmem(obj) ? I915_MAP_WC : I915_MAP_WB;
ptr = i915_gem_object_pin_map_unlocked(obj, map_type);
if (IS_ERR(ptr))
return ERR_CAST(ptr);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h
index 714f0c256118..6d009a905269 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h
@@ -8,6 +8,74 @@
#include <linux/types.h>
+struct intel_gsc_version {
+ u16 major;
+ u16 minor;
+ u16 hotfix;
+ u16 build;
+} __packed;
+
+struct intel_gsc_partition {
+ u32 offset;
+ u32 size;
+} __packed;
+
+struct intel_gsc_layout_pointers {
+ u8 rom_bypass_vector[16];
+
+ /* size of pointers layout not including ROM bypass vector */
+ u16 size;
+
+ /*
+ * bit0: Backup copy of layout pointers exist
+ * bits1-15: reserved
+ */
+ u8 flags;
+
+ u8 reserved;
+
+ u32 crc32;
+
+ struct intel_gsc_partition datap;
+ struct intel_gsc_partition boot1;
+ struct intel_gsc_partition boot2;
+ struct intel_gsc_partition boot3;
+ struct intel_gsc_partition boot4;
+ struct intel_gsc_partition boot5;
+ struct intel_gsc_partition temp_pages;
+} __packed;
+
+/* Boot partition structures */
+struct intel_gsc_bpdt_header {
+ u32 signature;
+#define INTEL_GSC_BPDT_HEADER_SIGNATURE 0x000055AA
+
+ u16 descriptor_count; /* num of entries after the header */
+
+ u8 version;
+ u8 configuration;
+
+ u32 crc32;
+
+ u32 build_version;
+ struct intel_gsc_version tool_version;
+} __packed;
+
+struct intel_gsc_bpdt_entry {
+ /*
+ * Bits 0-15: BPDT entry type
+ * Bits 16-17: reserved
+ * Bit 18: code sub-partition
+ * Bits 19-31: reserved
+ */
+ u32 type;
+#define INTEL_GSC_BPDT_ENTRY_TYPE_MASK GENMASK(15, 0)
+#define INTEL_GSC_BPDT_ENTRY_TYPE_GSC_RBE 0x1
+
+ u32 sub_partition_offset; /* from the base of the BPDT header */
+ u32 sub_partition_size;
+} __packed;
+
/* Code partition directory (CPD) structures */
struct intel_gsc_cpd_header_v2 {
u32 header_marker;
@@ -44,13 +112,6 @@ struct intel_gsc_cpd_entry {
u8 reserved[4];
} __packed;
-struct intel_gsc_version {
- u16 major;
- u16 minor;
- u16 hotfix;
- u16 build;
-} __packed;
-
struct intel_gsc_manifest_header {
u32 header_type; /* 0x4 for manifest type */
u32 header_length; /* in dwords */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
index 60e9c6c9e775..e2e42b3e0d5d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
@@ -3,48 +3,216 @@
* Copyright © 2022 Intel Corporation
*/
+#include "gem/i915_gem_lmem.h"
#include "gt/intel_engine_pm.h"
#include "gt/intel_gpu_commands.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_print.h"
#include "gt/intel_ring.h"
+#include "intel_gsc_binary_headers.h"
#include "intel_gsc_fw.h"
-
-#define GSC_FW_STATUS_REG _MMIO(0x116C40)
-#define GSC_FW_CURRENT_STATE REG_GENMASK(3, 0)
-#define GSC_FW_CURRENT_STATE_RESET 0
-#define GSC_FW_PROXY_STATE_NORMAL 5
-#define GSC_FW_INIT_COMPLETE_BIT REG_BIT(9)
+#include "intel_gsc_uc_heci_cmd_submit.h"
+#include "i915_reg.h"
static bool gsc_is_in_reset(struct intel_uncore *uncore)
{
- u32 fw_status = intel_uncore_read(uncore, GSC_FW_STATUS_REG);
+ u32 fw_status = intel_uncore_read(uncore, HECI_FWSTS(MTL_GSC_HECI1_BASE, 1));
- return REG_FIELD_GET(GSC_FW_CURRENT_STATE, fw_status) ==
- GSC_FW_CURRENT_STATE_RESET;
+ return REG_FIELD_GET(HECI1_FWSTS1_CURRENT_STATE, fw_status) ==
+ HECI1_FWSTS1_CURRENT_STATE_RESET;
}
-static u32 gsc_uc_get_fw_status(struct intel_uncore *uncore)
+static u32 gsc_uc_get_fw_status(struct intel_uncore *uncore, bool needs_wakeref)
{
intel_wakeref_t wakeref;
u32 fw_status = 0;
- with_intel_runtime_pm(uncore->rpm, wakeref)
- fw_status = intel_uncore_read(uncore, GSC_FW_STATUS_REG);
+ if (needs_wakeref)
+ wakeref = intel_runtime_pm_get(uncore->rpm);
+
+ fw_status = intel_uncore_read(uncore, HECI_FWSTS(MTL_GSC_HECI1_BASE, 1));
+ if (needs_wakeref)
+ intel_runtime_pm_put(uncore->rpm, wakeref);
return fw_status;
}
-bool intel_gsc_uc_fw_proxy_init_done(struct intel_gsc_uc *gsc)
+bool intel_gsc_uc_fw_proxy_init_done(struct intel_gsc_uc *gsc, bool needs_wakeref)
+{
+ return REG_FIELD_GET(HECI1_FWSTS1_CURRENT_STATE,
+ gsc_uc_get_fw_status(gsc_uc_to_gt(gsc)->uncore,
+ needs_wakeref)) ==
+ HECI1_FWSTS1_PROXY_STATE_NORMAL;
+}
+
+int intel_gsc_uc_fw_proxy_get_status(struct intel_gsc_uc *gsc)
{
- return REG_FIELD_GET(GSC_FW_CURRENT_STATE,
- gsc_uc_get_fw_status(gsc_uc_to_gt(gsc)->uncore)) ==
- GSC_FW_PROXY_STATE_NORMAL;
+ if (!(IS_ENABLED(CONFIG_INTEL_MEI_GSC_PROXY)))
+ return -ENODEV;
+ if (!intel_uc_fw_is_loadable(&gsc->fw))
+ return -ENODEV;
+ if (__intel_uc_fw_status(&gsc->fw) == INTEL_UC_FIRMWARE_LOAD_FAIL)
+ return -ENOLINK;
+ if (!intel_gsc_uc_fw_proxy_init_done(gsc, true))
+ return -EAGAIN;
+
+ return 0;
}
bool intel_gsc_uc_fw_init_done(struct intel_gsc_uc *gsc)
{
- return gsc_uc_get_fw_status(gsc_uc_to_gt(gsc)->uncore) & GSC_FW_INIT_COMPLETE_BIT;
+ return gsc_uc_get_fw_status(gsc_uc_to_gt(gsc)->uncore, false) &
+ HECI1_FWSTS1_INIT_COMPLETE;
+}
+
+static inline u32 cpd_entry_offset(const struct intel_gsc_cpd_entry *entry)
+{
+ return entry->offset & INTEL_GSC_CPD_ENTRY_OFFSET_MASK;
+}
+
+int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, size_t size)
+{
+ struct intel_gsc_uc *gsc = container_of(gsc_fw, struct intel_gsc_uc, fw);
+ struct intel_gt *gt = gsc_uc_to_gt(gsc);
+ const struct intel_gsc_layout_pointers *layout = data;
+ const struct intel_gsc_bpdt_header *bpdt_header = NULL;
+ const struct intel_gsc_bpdt_entry *bpdt_entry = NULL;
+ const struct intel_gsc_cpd_header_v2 *cpd_header = NULL;
+ const struct intel_gsc_cpd_entry *cpd_entry = NULL;
+ const struct intel_gsc_manifest_header *manifest;
+ size_t min_size = sizeof(*layout);
+ int i;
+
+ if (size < min_size) {
+ gt_err(gt, "GSC FW too small! %zu < %zu\n", size, min_size);
+ return -ENODATA;
+ }
+
+ /*
+ * The GSC binary starts with the pointer layout, which contains the
+ * locations of the various partitions of the binary. The one we're
+ * interested in to get the version is the boot1 partition, where we can
+ * find a BPDT header followed by entries, one of which points to the
+ * RBE sub-section of the partition. From here, we can parse the CPD
+ * header and the following entries to find the manifest location
+ * (entry identified by the "RBEP.man" name), from which we can finally
+ * extract the version.
+ *
+ * --------------------------------------------------
+ * [ intel_gsc_layout_pointers ]
+ * [ ... ]
+ * [ boot1.offset >---------------------------]------o
+ * [ ... ] |
+ * -------------------------------------------------- |
+ * |
+ * -------------------------------------------------- |
+ * [ intel_gsc_bpdt_header ]<-----o
+ * --------------------------------------------------
+ * [ intel_gsc_bpdt_entry[] ]
+ * [ entry1 ]
+ * [ ... ]
+ * [ entryX ]
+ * [ type == GSC_RBE ]
+ * [ offset >-----------------------------]------o
+ * [ ... ] |
+ * -------------------------------------------------- |
+ * |
+ * -------------------------------------------------- |
+ * [ intel_gsc_cpd_header_v2 ]<-----o
+ * --------------------------------------------------
+ * [ intel_gsc_cpd_entry[] ]
+ * [ entry1 ]
+ * [ ... ]
+ * [ entryX ]
+ * [ "RBEP.man" ]
+ * [ ... ]
+ * [ offset >----------------------------]------o
+ * [ ... ] |
+ * -------------------------------------------------- |
+ * |
+ * -------------------------------------------------- |
+ * [ intel_gsc_manifest_header ]<-----o
+ * [ ... ]
+ * [ intel_gsc_version fw_version ]
+ * [ ... ]
+ * --------------------------------------------------
+ */
+
+ min_size = layout->boot1.offset + layout->boot1.size;
+ if (size < min_size) {
+ gt_err(gt, "GSC FW too small for boot section! %zu < %zu\n",
+ size, min_size);
+ return -ENODATA;
+ }
+
+ min_size = sizeof(*bpdt_header);
+ if (layout->boot1.size < min_size) {
+ gt_err(gt, "GSC FW boot section too small for BPDT header: %u < %zu\n",
+ layout->boot1.size, min_size);
+ return -ENODATA;
+ }
+
+ bpdt_header = data + layout->boot1.offset;
+ if (bpdt_header->signature != INTEL_GSC_BPDT_HEADER_SIGNATURE) {
+ gt_err(gt, "invalid signature for BPDT header: 0x%08x!\n",
+ bpdt_header->signature);
+ return -EINVAL;
+ }
+
+ min_size += sizeof(*bpdt_entry) * bpdt_header->descriptor_count;
+ if (layout->boot1.size < min_size) {
+ gt_err(gt, "GSC FW boot section too small for BPDT entries: %u < %zu\n",
+ layout->boot1.size, min_size);
+ return -ENODATA;
+ }
+
+ bpdt_entry = (void *)bpdt_header + sizeof(*bpdt_header);
+ for (i = 0; i < bpdt_header->descriptor_count; i++, bpdt_entry++) {
+ if ((bpdt_entry->type & INTEL_GSC_BPDT_ENTRY_TYPE_MASK) !=
+ INTEL_GSC_BPDT_ENTRY_TYPE_GSC_RBE)
+ continue;
+
+ cpd_header = (void *)bpdt_header + bpdt_entry->sub_partition_offset;
+ min_size = bpdt_entry->sub_partition_offset + sizeof(*cpd_header);
+ break;
+ }
+
+ if (!cpd_header) {
+ gt_err(gt, "couldn't find CPD header in GSC binary!\n");
+ return -ENODATA;
+ }
+
+ if (layout->boot1.size < min_size) {
+ gt_err(gt, "GSC FW boot section too small for CPD header: %u < %zu\n",
+ layout->boot1.size, min_size);
+ return -ENODATA;
+ }
+
+ if (cpd_header->header_marker != INTEL_GSC_CPD_HEADER_MARKER) {
+ gt_err(gt, "invalid marker for CPD header in GSC bin: 0x%08x!\n",
+ cpd_header->header_marker);
+ return -EINVAL;
+ }
+
+ min_size += sizeof(*cpd_entry) * cpd_header->num_of_entries;
+ if (layout->boot1.size < min_size) {
+ gt_err(gt, "GSC FW boot section too small for CPD entries: %u < %zu\n",
+ layout->boot1.size, min_size);
+ return -ENODATA;
+ }
+
+ cpd_entry = (void *)cpd_header + cpd_header->header_length;
+ for (i = 0; i < cpd_header->num_of_entries; i++, cpd_entry++) {
+ if (strcmp(cpd_entry->name, "RBEP.man") == 0) {
+ manifest = (void *)cpd_header + cpd_entry_offset(cpd_entry);
+ intel_uc_fw_version_from_gsc_manifest(&gsc->release,
+ manifest);
+ gsc->security_version = manifest->security_version;
+ break;
+ }
+ }
+
+ return 0;
}
static int emit_gsc_fw_load(struct i915_request *rq, struct intel_gsc_uc *gsc)
@@ -114,48 +282,25 @@ out_rq:
static int gsc_fw_load_prepare(struct intel_gsc_uc *gsc)
{
struct intel_gt *gt = gsc_uc_to_gt(gsc);
- struct drm_i915_private *i915 = gt->i915;
- struct drm_i915_gem_object *obj;
- void *src, *dst;
+ void *src;
if (!gsc->local)
return -ENODEV;
- obj = gsc->local->obj;
-
- if (obj->base.size < gsc->fw.size)
+ if (gsc->local->size < gsc->fw.size)
return -ENOSPC;
- /*
- * Wa_22016122933: For MTL the shared memory needs to be mapped
- * as WC on CPU side and UC (PAT index 2) on GPU side
- */
- if (IS_METEORLAKE(i915))
- i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
-
- dst = i915_gem_object_pin_map_unlocked(obj,
- i915_coherent_map_type(i915, obj, true));
- if (IS_ERR(dst))
- return PTR_ERR(dst);
-
src = i915_gem_object_pin_map_unlocked(gsc->fw.obj,
- i915_coherent_map_type(i915, gsc->fw.obj, true));
- if (IS_ERR(src)) {
- i915_gem_object_unpin_map(obj);
+ intel_gt_coherent_map_type(gt, gsc->fw.obj, true));
+ if (IS_ERR(src))
return PTR_ERR(src);
- }
- memset(dst, 0, obj->base.size);
- memcpy(dst, src, gsc->fw.size);
+ memcpy_toio(gsc->local_vaddr, src, gsc->fw.size);
+ memset_io(gsc->local_vaddr + gsc->fw.size, 0, gsc->local->size - gsc->fw.size);
- /*
- * Wa_22016122933: Making sure the data in dst is
- * visible to GSC right away
- */
intel_guc_write_barrier(&gt->uc.guc);
i915_gem_object_unpin_map(gsc->fw.obj);
- i915_gem_object_unpin_map(obj);
return 0;
}
@@ -163,12 +308,94 @@ static int gsc_fw_load_prepare(struct intel_gsc_uc *gsc)
static int gsc_fw_wait(struct intel_gt *gt)
{
return intel_wait_for_register(gt->uncore,
- GSC_FW_STATUS_REG,
- GSC_FW_INIT_COMPLETE_BIT,
- GSC_FW_INIT_COMPLETE_BIT,
+ HECI_FWSTS(MTL_GSC_HECI1_BASE, 1),
+ HECI1_FWSTS1_INIT_COMPLETE,
+ HECI1_FWSTS1_INIT_COMPLETE,
500);
}
+struct intel_gsc_mkhi_header {
+ u8 group_id;
+#define MKHI_GROUP_ID_GFX_SRV 0x30
+
+ u8 command;
+#define MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION (0x42)
+
+ u8 reserved;
+ u8 result;
+} __packed;
+
+struct mtl_gsc_ver_msg_in {
+ struct intel_gsc_mtl_header header;
+ struct intel_gsc_mkhi_header mkhi;
+} __packed;
+
+struct mtl_gsc_ver_msg_out {
+ struct intel_gsc_mtl_header header;
+ struct intel_gsc_mkhi_header mkhi;
+ u16 proj_major;
+ u16 compat_major;
+ u16 compat_minor;
+ u16 reserved[5];
+} __packed;
+
+#define GSC_VER_PKT_SZ SZ_4K
+
+static int gsc_fw_query_compatibility_version(struct intel_gsc_uc *gsc)
+{
+ struct intel_gt *gt = gsc_uc_to_gt(gsc);
+ struct mtl_gsc_ver_msg_in *msg_in;
+ struct mtl_gsc_ver_msg_out *msg_out;
+ struct i915_vma *vma;
+ u64 offset;
+ void *vaddr;
+ int err;
+
+ err = intel_guc_allocate_and_map_vma(&gt->uc.guc, GSC_VER_PKT_SZ * 2,
+ &vma, &vaddr);
+ if (err) {
+ gt_err(gt, "failed to allocate vma for GSC version query\n");
+ return err;
+ }
+
+ offset = i915_ggtt_offset(vma);
+ msg_in = vaddr;
+ msg_out = vaddr + GSC_VER_PKT_SZ;
+
+ intel_gsc_uc_heci_cmd_emit_mtl_header(&msg_in->header,
+ HECI_MEADDRESS_MKHI,
+ sizeof(*msg_in), 0);
+ msg_in->mkhi.group_id = MKHI_GROUP_ID_GFX_SRV;
+ msg_in->mkhi.command = MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION;
+
+ err = intel_gsc_uc_heci_cmd_submit_packet(&gt->uc.gsc,
+ offset,
+ sizeof(*msg_in),
+ offset + GSC_VER_PKT_SZ,
+ GSC_VER_PKT_SZ);
+ if (err) {
+ gt_err(gt,
+ "failed to submit GSC request for compatibility version: %d\n",
+ err);
+ goto out_vma;
+ }
+
+ if (msg_out->header.message_size != sizeof(*msg_out)) {
+ gt_err(gt, "invalid GSC reply length %u [expected %zu], s=0x%x, f=0x%x, r=0x%x\n",
+ msg_out->header.message_size, sizeof(*msg_out),
+ msg_out->header.status, msg_out->header.flags, msg_out->mkhi.result);
+ err = -EPROTO;
+ goto out_vma;
+ }
+
+ gsc->fw.file_selected.ver.major = msg_out->compat_major;
+ gsc->fw.file_selected.ver.minor = msg_out->compat_minor;
+
+out_vma:
+ i915_vma_unpin_and_release(&vma, I915_VMA_RELEASE_MAP);
+ return err;
+}
+
int intel_gsc_uc_fw_upload(struct intel_gsc_uc *gsc)
{
struct intel_gt *gt = gsc_uc_to_gt(gsc);
@@ -226,10 +453,24 @@ int intel_gsc_uc_fw_upload(struct intel_gsc_uc *gsc)
if (err)
goto fail;
+ err = gsc_fw_query_compatibility_version(gsc);
+ if (err)
+ goto fail;
+
+ /* we only support compatibility version 1.0 at the moment */
+ err = intel_uc_check_file_version(gsc_fw, NULL);
+ if (err)
+ goto fail;
+
/* FW is not fully operational until we enable SW proxy */
intel_uc_fw_change_status(gsc_fw, INTEL_UC_FIRMWARE_TRANSFERRED);
- gt_info(gt, "Loaded GSC firmware %s\n", gsc_fw->file_selected.path);
+ gt_info(gt, "Loaded GSC firmware %s (cv%u.%u, r%u.%u.%u.%u, svn %u)\n",
+ gsc_fw->file_selected.path,
+ gsc_fw->file_selected.ver.major, gsc_fw->file_selected.ver.minor,
+ gsc->release.major, gsc->release.minor,
+ gsc->release.patch, gsc->release.build,
+ gsc->security_version);
return 0;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h
index fff8928218df..bc9dd0de8aaf 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h
@@ -9,10 +9,13 @@
#include <linux/types.h>
struct intel_gsc_uc;
+struct intel_uc_fw;
struct intel_uncore;
+int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, size_t size);
int intel_gsc_uc_fw_upload(struct intel_gsc_uc *gsc);
bool intel_gsc_uc_fw_init_done(struct intel_gsc_uc *gsc);
-bool intel_gsc_uc_fw_proxy_init_done(struct intel_gsc_uc *gsc);
+bool intel_gsc_uc_fw_proxy_init_done(struct intel_gsc_uc *gsc, bool needs_wakeref);
+int intel_gsc_uc_fw_proxy_get_status(struct intel_gsc_uc *gsc);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c
index c659cc01f32f..0d3b22a74365 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c
@@ -7,10 +7,11 @@
#include "gt/intel_gt.h"
#include "gt/intel_gt_print.h"
-#include "intel_gsc_uc.h"
#include "intel_gsc_fw.h"
-#include "i915_drv.h"
#include "intel_gsc_proxy.h"
+#include "intel_gsc_uc.h"
+#include "i915_drv.h"
+#include "i915_reg.h"
static void gsc_work(struct work_struct *work)
{
@@ -61,8 +62,18 @@ static void gsc_work(struct work_struct *work)
}
ret = intel_gsc_proxy_request_handler(gsc);
- if (ret)
+ if (ret) {
+ if (actions & GSC_ACTION_FW_LOAD) {
+ /*
+ * A proxy failure right after firmware load means the proxy-init
+ * step has failed so mark GSC as not usable after this
+ */
+ drm_err(&gt->i915->drm,
+ "GSC proxy handler failed to init\n");
+ intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
+ }
goto out_put;
+ }
/* mark the GSC FW init as done the first time we run this */
if (actions & GSC_ACTION_FW_LOAD) {
@@ -71,12 +82,13 @@ static void gsc_work(struct work_struct *work)
* complete the request handling cleanly, so we need to check the
* status register to check if the proxy init was actually successful
*/
- if (intel_gsc_uc_fw_proxy_init_done(gsc)) {
+ if (intel_gsc_uc_fw_proxy_init_done(gsc, false)) {
drm_dbg(&gt->i915->drm, "GSC Proxy initialized\n");
intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_RUNNING);
} else {
drm_err(&gt->i915->drm,
"GSC status reports proxy init not complete\n");
+ intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
}
}
}
@@ -98,7 +110,7 @@ static bool gsc_engine_supported(struct intel_gt *gt)
GEM_BUG_ON(!gt_is_root(gt) && !gt->info.engine_mask);
if (gt_is_root(gt))
- mask = RUNTIME_INFO(gt->i915)->platform_engine_mask;
+ mask = INTEL_INFO(gt->i915)->platform_engine_mask;
else
mask = gt->info.engine_mask;
@@ -133,26 +145,85 @@ void intel_gsc_uc_init_early(struct intel_gsc_uc *gsc)
}
}
+static int gsc_allocate_and_map_vma(struct intel_gsc_uc *gsc, u32 size)
+{
+ struct intel_gt *gt = gsc_uc_to_gt(gsc);
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ void __iomem *vaddr;
+ int ret = 0;
+
+ /*
+ * The GSC FW doesn't immediately suspend after becoming idle, so there
+ * is a chance that it could still be awake after we successfully
+ * return from the pci suspend function, even if there are no pending
+ * operations.
+ * The FW might therefore try to access memory for its suspend operation
+ * after the kernel has completed the HW suspend flow; this can cause
+ * issues if the FW is mapped in normal RAM memory, as some of the
+ * involved HW units might've already lost power.
+ * The driver must therefore avoid this situation and the recommended
+ * way to do so is to use stolen memory for the GSC memory allocation,
+ * because stolen memory takes a different path in HW and it is
+ * guaranteed to always work as long as the GPU itself is awake (which
+ * it must be if the GSC is awake).
+ */
+ obj = i915_gem_object_create_stolen(gt->i915, size);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto err;
+ }
+
+ vaddr = i915_vma_pin_iomap(vma);
+ i915_vma_unpin(vma);
+ if (IS_ERR(vaddr)) {
+ ret = PTR_ERR(vaddr);
+ goto err;
+ }
+
+ i915_vma_make_unshrinkable(vma);
+
+ gsc->local = vma;
+ gsc->local_vaddr = vaddr;
+
+ return 0;
+
+err:
+ i915_gem_object_put(obj);
+ return ret;
+}
+
+static void gsc_unmap_and_free_vma(struct intel_gsc_uc *gsc)
+{
+ struct i915_vma *vma = fetch_and_zero(&gsc->local);
+
+ if (!vma)
+ return;
+
+ gsc->local_vaddr = NULL;
+ i915_vma_unpin_iomap(vma);
+ i915_gem_object_put(vma->obj);
+}
+
int intel_gsc_uc_init(struct intel_gsc_uc *gsc)
{
static struct lock_class_key gsc_lock;
struct intel_gt *gt = gsc_uc_to_gt(gsc);
struct intel_engine_cs *engine = gt->engine[GSC0];
struct intel_context *ce;
- struct i915_vma *vma;
int err;
err = intel_uc_fw_init(&gsc->fw);
if (err)
goto out;
- vma = intel_guc_allocate_vma(&gt->uc.guc, SZ_8M);
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
+ err = gsc_allocate_and_map_vma(gsc, SZ_4M);
+ if (err)
goto out_fw;
- }
-
- gsc->local = vma;
ce = intel_engine_create_pinned_context(engine, engine->gt->vm, SZ_4K,
I915_GEM_HWS_GSC_ADDR,
@@ -173,7 +244,7 @@ int intel_gsc_uc_init(struct intel_gsc_uc *gsc)
return 0;
out_vma:
- i915_vma_unpin_and_release(&gsc->local, 0);
+ gsc_unmap_and_free_vma(gsc);
out_fw:
intel_uc_fw_fini(&gsc->fw);
out:
@@ -197,7 +268,7 @@ void intel_gsc_uc_fini(struct intel_gsc_uc *gsc)
if (gsc->ce)
intel_engine_destroy_pinned_context(fetch_and_zero(&gsc->ce));
- i915_vma_unpin_and_release(&gsc->local, 0);
+ gsc_unmap_and_free_vma(gsc);
intel_uc_fw_fini(&gsc->fw);
}
@@ -245,3 +316,45 @@ void intel_gsc_uc_load_start(struct intel_gsc_uc *gsc)
queue_work(gsc->wq, &gsc->work);
}
+
+void intel_gsc_uc_load_status(struct intel_gsc_uc *gsc, struct drm_printer *p)
+{
+ struct intel_gt *gt = gsc_uc_to_gt(gsc);
+ struct intel_uncore *uncore = gt->uncore;
+ intel_wakeref_t wakeref;
+
+ if (!intel_gsc_uc_is_supported(gsc)) {
+ drm_printf(p, "GSC not supported\n");
+ return;
+ }
+
+ if (!intel_gsc_uc_is_wanted(gsc)) {
+ drm_printf(p, "GSC disabled\n");
+ return;
+ }
+
+ drm_printf(p, "GSC firmware: %s\n", gsc->fw.file_selected.path);
+ if (gsc->fw.file_selected.path != gsc->fw.file_wanted.path)
+ drm_printf(p, "GSC firmware wanted: %s\n", gsc->fw.file_wanted.path);
+ drm_printf(p, "\tstatus: %s\n", intel_uc_fw_status_repr(gsc->fw.status));
+
+ drm_printf(p, "Release: %u.%u.%u.%u\n",
+ gsc->release.major, gsc->release.minor,
+ gsc->release.patch, gsc->release.build);
+
+ drm_printf(p, "Compatibility Version: %u.%u [min expected %u.%u]\n",
+ gsc->fw.file_selected.ver.major, gsc->fw.file_selected.ver.minor,
+ gsc->fw.file_wanted.ver.major, gsc->fw.file_wanted.ver.minor);
+
+ drm_printf(p, "SVN: %u\n", gsc->security_version);
+
+ with_intel_runtime_pm(uncore->rpm, wakeref) {
+ u32 i;
+
+ for (i = 1; i <= 6; i++) {
+ u32 status = intel_uncore_read(uncore,
+ HECI_FWSTS(MTL_GSC_HECI1_BASE, i));
+ drm_printf(p, "HECI1 FWSTST%u = 0x%08x\n", i, status);
+ }
+ }
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h
index a2a0813b8a76..c8082cf200fc 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h
@@ -8,6 +8,7 @@
#include "intel_uc_fw.h"
+struct drm_printer;
struct i915_vma;
struct intel_context;
struct i915_gsc_proxy_component;
@@ -17,7 +18,26 @@ struct intel_gsc_uc {
struct intel_uc_fw fw;
/* GSC-specific additions */
+
+ /*
+ * The GSC has 3 version numbers:
+ * - Release version (incremented with each build)
+ * - Security version (incremented on security fix)
+ * - Compatibility version (incremented on interface change)
+ *
+ * The one we care about to use the binary is the last one, so that's
+ * the one we save inside the intel_uc_fw structure. The other two
+ * versions are only used for debug/info purposes, so we save them here.
+ *
+ * Note that the release and security versions are available in the
+ * binary header, while the compatibility version must be queried after
+ * loading the binary.
+ */
+ struct intel_uc_fw_ver release;
+ u32 security_version;
+
struct i915_vma *local; /* private memory for GSC usage */
+ void __iomem *local_vaddr; /* pointer to access the private memory */
struct intel_context *ce; /* for submission to GSC FW via GSC engine */
/* for delayed load and proxy handling */
@@ -44,6 +64,7 @@ void intel_gsc_uc_suspend(struct intel_gsc_uc *gsc);
void intel_gsc_uc_resume(struct intel_gsc_uc *gsc);
void intel_gsc_uc_flush_work(struct intel_gsc_uc *gsc);
void intel_gsc_uc_load_start(struct intel_gsc_uc *gsc);
+void intel_gsc_uc_load_status(struct intel_gsc_uc *gsc, struct drm_printer *p);
static inline bool intel_gsc_uc_is_supported(struct intel_gsc_uc *gsc)
{
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.c
new file mode 100644
index 000000000000..5baacd822a1c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_print.h>
+
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_debugfs.h"
+#include "gt/intel_gt_print.h"
+#include "intel_gsc_uc.h"
+#include "intel_gsc_uc_debugfs.h"
+#include "i915_drv.h"
+
+static int gsc_info_show(struct seq_file *m, void *data)
+{
+ struct drm_printer p = drm_seq_file_printer(m);
+ struct intel_gsc_uc *gsc = m->private;
+
+ if (!intel_gsc_uc_is_supported(gsc))
+ return -ENODEV;
+
+ intel_gsc_uc_load_status(gsc, &p);
+
+ return 0;
+}
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(gsc_info);
+
+void intel_gsc_uc_debugfs_register(struct intel_gsc_uc *gsc_uc, struct dentry *root)
+{
+ static const struct intel_gt_debugfs_file files[] = {
+ { "gsc_info", &gsc_info_fops, NULL },
+ };
+
+ if (!intel_gsc_uc_is_supported(gsc_uc))
+ return;
+
+ intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gsc_uc);
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.h
new file mode 100644
index 000000000000..3415ad39aabb
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef DEBUGFS_GSC_UC_H
+#define DEBUGFS_GSC_UC_H
+
+struct intel_gsc_uc;
+struct dentry;
+
+void intel_gsc_uc_debugfs_register(struct intel_gsc_uc *gsc, struct dentry *root);
+
+#endif /* DEBUGFS_GSC_UC_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c
index 89ed5ee9cded..2fde5c360cff 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c
@@ -81,8 +81,17 @@ out_rq:
i915_request_add(rq);
- if (!err && i915_request_wait(rq, 0, msecs_to_jiffies(500)) < 0)
- err = -ETIME;
+ if (!err) {
+ /*
+ * Start timeout for i915_request_wait only after considering one possible
+ * pending GSC-HECI submission cycle on the other (non-privileged) path.
+ */
+ if (wait_for(i915_request_started(rq), GSC_HECI_REPLY_LATENCY_MS))
+ drm_dbg(&gsc_uc_to_gt(gsc)->i915->drm,
+ "Delay in gsc-heci-priv submission to gsccs-hw");
+ if (i915_request_wait(rq, 0, msecs_to_jiffies(GSC_HECI_REPLY_LATENCY_MS)) < 0)
+ err = -ETIME;
+ }
i915_request_put(rq);
@@ -186,6 +195,13 @@ out_rq:
i915_request_add(rq);
if (!err) {
+ /*
+ * Start timeout for i915_request_wait only after considering one possible
+ * pending GSC-HECI submission cycle on the other (privileged) path.
+ */
+ if (wait_for(i915_request_started(rq), GSC_HECI_REPLY_LATENCY_MS))
+ drm_dbg(&gsc_uc_to_gt(gsc)->i915->drm,
+ "Delay in gsc-heci-non-priv submission to gsccs-hw");
if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE,
msecs_to_jiffies(timeout_ms)) < 0)
err = -ETIME;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h
index ef70e304904a..c4308291c003 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h
@@ -12,11 +12,18 @@ struct i915_vma;
struct intel_context;
struct intel_gsc_uc;
+#define GSC_HECI_REPLY_LATENCY_MS 500
+/*
+ * Max FW response time is 500ms, but this should be counted from the time the
+ * command has hit the GSC-CS hardware, not the preceding handoff to GuC CTB.
+ */
+
struct intel_gsc_mtl_header {
u32 validity_marker;
#define GSC_HECI_VALIDITY_MARKER 0xA578875A
u8 heci_client_id;
+#define HECI_MEADDRESS_MKHI 7
#define HECI_MEADDRESS_PROXY 10
#define HECI_MEADDRESS_PXP 17
#define HECI_MEADDRESS_HDCP 18
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 2eb891b270ae..27df41c53b89 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -159,6 +159,21 @@ static void gen11_disable_guc_interrupts(struct intel_guc *guc)
gen11_reset_guc_interrupts(guc);
}
+static void guc_dead_worker_func(struct work_struct *w)
+{
+ struct intel_guc *guc = container_of(w, struct intel_guc, dead_guc_worker);
+ struct intel_gt *gt = guc_to_gt(guc);
+ unsigned long last = guc->last_dead_guc_jiffies;
+ unsigned long delta = jiffies_to_msecs(jiffies - last);
+
+ if (delta < 500) {
+ intel_gt_set_wedged(gt);
+ } else {
+ intel_gt_handle_error(gt, ALL_ENGINES, I915_ERROR_CAPTURE, "dead GuC");
+ guc->last_dead_guc_jiffies = jiffies;
+ }
+}
+
void intel_guc_init_early(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
@@ -171,6 +186,8 @@ void intel_guc_init_early(struct intel_guc *guc)
intel_guc_slpc_init_early(&guc->slpc);
intel_guc_rc_init_early(guc);
+ INIT_WORK(&guc->dead_guc_worker, guc_dead_worker_func);
+
mutex_init(&guc->send_mutex);
spin_lock_init(&guc->irq_lock);
if (GRAPHICS_VER(i915) >= 11) {
@@ -272,18 +289,14 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50))
flags |= GUC_WA_POLLCS;
- /* Wa_16011759253:dg2_g10:a0 */
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
- flags |= GUC_WA_GAM_CREDITS;
-
/* Wa_14014475959 */
- if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
IS_DG2(gt->i915))
flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
/*
- * Wa_14012197797:dg2_g10:a0,dg2_g11:a0
- * Wa_22011391025:dg2_g10,dg2_g11,dg2_g12
+ * Wa_14012197797
+ * Wa_22011391025
*
* The same WA bit is used for both and 22011391025 is applicable to
* all DG2.
@@ -292,22 +305,14 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
flags |= GUC_WA_DUAL_QUEUE;
/* Wa_22011802037: graphics version 11/12 */
- if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
- (GRAPHICS_VER(gt->i915) >= 11 &&
- GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70)))
+ if (intel_engine_reset_needs_wa_22011802037(gt))
flags |= GUC_WA_PRE_PARSER;
- /* Wa_16011777198:dg2 */
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))
- flags |= GUC_WA_RCS_RESET_BEFORE_RC6;
-
/*
- * Wa_22012727170:dg2_g10[a0-c0), dg2_g11[a0..)
- * Wa_22012727685:dg2_g11[a0..)
+ * Wa_22012727170
+ * Wa_22012727685
*/
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_FOREVER))
+ if (IS_DG2_G11(gt->i915))
flags |= GUC_WA_CONTEXT_ISOLATION;
/* Wa_16015675438 */
@@ -461,6 +466,8 @@ void intel_guc_fini(struct intel_guc *guc)
if (!intel_uc_fw_is_loadable(&guc->fw))
return;
+ flush_work(&guc->dead_guc_worker);
+
if (intel_guc_slpc_is_used(guc))
intel_guc_slpc_fini(&guc->slpc);
@@ -585,6 +592,20 @@ out:
return ret;
}
+int intel_guc_crash_process_msg(struct intel_guc *guc, u32 action)
+{
+ if (action == INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED)
+ guc_err(guc, "Crash dump notification\n");
+ else if (action == INTEL_GUC_ACTION_NOTIFY_EXCEPTION)
+ guc_err(guc, "Exception notification\n");
+ else
+ guc_err(guc, "Unknown crash notification: 0x%04X\n", action);
+
+ queue_work(system_unbound_wq, &guc->dead_guc_worker);
+
+ return 0;
+}
+
int intel_guc_to_host_process_recv_msg(struct intel_guc *guc,
const u32 *payload, u32 len)
{
@@ -601,6 +622,9 @@ int intel_guc_to_host_process_recv_msg(struct intel_guc *guc,
if (msg & INTEL_GUC_RECV_MSG_EXCEPTION)
guc_err(guc, "Received early exception notification!\n");
+ if (msg & (INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED | INTEL_GUC_RECV_MSG_EXCEPTION))
+ queue_work(system_unbound_wq, &guc->dead_guc_worker);
+
return 0;
}
@@ -640,6 +664,8 @@ int intel_guc_suspend(struct intel_guc *guc)
return 0;
if (intel_guc_submission_is_used(guc)) {
+ flush_work(&guc->dead_guc_worker);
+
/*
* This H2G MMIO command tears down the GuC in two steps. First it will
* generate a G2H CTB for every active context indicating a reset. In
@@ -745,10 +771,11 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
return ERR_CAST(obj);
/*
- * Wa_22016122933: For MTL the shared memory needs to be mapped
- * as WC on CPU side and UC (PAT index 2) on GPU side
+ * Wa_22016122933: For Media version 13.0, all Media GT shared
+ * memory needs to be mapped as WC on CPU side and UC (PAT
+ * index 2) on GPU side.
*/
- if (IS_METEORLAKE(gt->i915))
+ if (intel_gt_needs_wa_22016122933(gt))
i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
@@ -792,8 +819,8 @@ int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size,
return PTR_ERR(vma);
vaddr = i915_gem_object_pin_map_unlocked(vma->obj,
- i915_coherent_map_type(guc_to_gt(guc)->i915,
- vma->obj, true));
+ intel_gt_coherent_map_type(guc_to_gt(guc),
+ vma->obj, true));
if (IS_ERR(vaddr)) {
i915_vma_unpin_and_release(&vma, 0);
return PTR_ERR(vaddr);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 8dc291ff0093..6c392bad29c1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -266,6 +266,20 @@ struct intel_guc {
unsigned long last_stat_jiffies;
} timestamp;
+ /**
+ * @dead_guc_worker: Asynchronous worker thread for forcing a GuC reset.
+ * Specifically used when the G2H handler wants to issue a reset. Resets
+ * require flushing the G2H queue. So, the G2H processing itself must not
+ * trigger a reset directly. Instead, go via this worker.
+ */
+ struct work_struct dead_guc_worker;
+ /**
+ * @last_dead_guc_jiffies: timestamp of previous 'dead guc' occurrance
+ * used to prevent a fundamentally broken system from continuously
+ * reloading the GuC.
+ */
+ unsigned long last_dead_guc_jiffies;
+
#ifdef CONFIG_DRM_I915_SELFTEST
/**
* @number_guc_id_stolen: The number of guc_ids that have been stolen
@@ -476,6 +490,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len);
int intel_guc_error_capture_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len);
+int intel_guc_crash_process_msg(struct intel_guc *guc, u32 action);
struct intel_engine_cs *
intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index f28a3a83742d..6e22af31513a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -960,10 +960,6 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
/* now update descriptor */
WRITE_ONCE(desc->head, head);
- /*
- * Wa_22016122933: Making sure the head update is
- * visible to GuC right away
- */
intel_guc_write_barrier(ct_to_guc(ct));
return available - len;
@@ -1116,12 +1112,8 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
ret = 0;
break;
case INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED:
- CT_ERROR(ct, "Received GuC crash dump notification!\n");
- ret = 0;
- break;
case INTEL_GUC_ACTION_NOTIFY_EXCEPTION:
- CT_ERROR(ct, "Received GuC exception notification!\n");
- ret = 0;
+ ret = intel_guc_crash_process_msg(guc, action);
break;
default:
ret = -EOPNOTSUPP;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 364d0d546ec8..0f79cb658518 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -251,9 +251,11 @@ static int guc_wait_ucode(struct intel_guc *guc)
if (ret == 0)
ret = -ENXIO;
} else if (delta_ms > 200) {
- guc_warn(guc, "excessive init time: %lldms! [freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d]\n",
- delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps),
- before_freq, status, count, ret);
+ guc_warn(guc, "excessive init time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n",
+ delta_ms, status, count, ret);
+ guc_warn(guc, "excessive init time: [freq = %dMHz, before = %dMHz, perf_limit_reasons = 0x%08X]\n",
+ intel_rps_read_actual_frequency(&uncore->gt->rps), before_freq,
+ intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt)));
} else {
guc_dbg(guc, "init took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n",
delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps),
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
index 852bea0208ce..cc9569af7f0c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
@@ -94,7 +94,7 @@ static int guc_hwconfig_fill_buffer(struct intel_guc *guc, struct intel_hwconfig
static bool has_table(struct drm_i915_private *i915)
{
- if (IS_ALDERLAKE_P(i915) && !IS_ADLP_N(i915))
+ if (IS_ALDERLAKE_P(i915) && !IS_ALDERLAKE_P_N(i915))
return true;
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55))
return true;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index ee9f83af7cf6..2dfb07cc4b33 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -138,17 +138,6 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)
return ret > 0 ? -EPROTO : ret;
}
-static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id)
-{
- u32 request[] = {
- GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
- SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1),
- id,
- };
-
- return intel_guc_send(guc, request, ARRAY_SIZE(request));
-}
-
static bool slpc_is_running(struct intel_guc_slpc *slpc)
{
return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING;
@@ -199,15 +188,6 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value)
return ret;
}
-static int slpc_unset_param(struct intel_guc_slpc *slpc, u8 id)
-{
- struct intel_guc *guc = slpc_to_guc(slpc);
-
- GEM_BUG_ON(id >= SLPC_MAX_PARAM);
-
- return guc_action_slpc_unset_param(guc, id);
-}
-
static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
{
struct intel_guc *guc = slpc_to_guc(slpc);
@@ -470,12 +450,19 @@ int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val)
ret = slpc_set_param(slpc,
SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
val);
- if (ret)
+ if (ret) {
guc_probe_error(slpc_to_guc(slpc), "Failed to set efficient freq(%d): %pe\n",
val, ERR_PTR(ret));
- else
+ } else {
slpc->ignore_eff_freq = val;
+ /* Set min to RPn when we disable efficient freq */
+ if (val)
+ ret = slpc_set_param(slpc,
+ SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+ slpc->min_freq);
+ }
+
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
mutex_unlock(&slpc->lock);
return ret;
@@ -602,9 +589,8 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc)
return ret;
if (!slpc->min_freq_softlimit) {
- ret = intel_guc_slpc_get_min_freq(slpc, &slpc->min_freq_softlimit);
- if (unlikely(ret))
- return ret;
+ /* Min softlimit is initialized to RPn */
+ slpc->min_freq_softlimit = slpc->min_freq;
slpc_to_gt(slpc)->defaults.min_freq = slpc->min_freq_softlimit;
} else {
return intel_guc_slpc_set_min_freq(slpc,
@@ -666,49 +652,6 @@ static void slpc_get_rp_values(struct intel_guc_slpc *slpc)
slpc->boost_freq = slpc->rp0_freq;
}
-/**
- * intel_guc_slpc_override_gucrc_mode() - override GUCRC mode
- * @slpc: pointer to intel_guc_slpc.
- * @mode: new value of the mode.
- *
- * This function will override the GUCRC mode.
- *
- * Return: 0 on success, non-zero error code on failure.
- */
-int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode)
-{
- int ret;
- struct drm_i915_private *i915 = slpc_to_i915(slpc);
- intel_wakeref_t wakeref;
-
- if (mode >= SLPC_GUCRC_MODE_MAX)
- return -EINVAL;
-
- with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
- ret = slpc_set_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE, mode);
- if (ret)
- guc_err(slpc_to_guc(slpc), "Override RC mode %d failed: %pe\n",
- mode, ERR_PTR(ret));
- }
-
- return ret;
-}
-
-int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc)
-{
- struct drm_i915_private *i915 = slpc_to_i915(slpc);
- intel_wakeref_t wakeref;
- int ret = 0;
-
- with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
- ret = slpc_unset_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE);
- if (ret)
- guc_err(slpc_to_guc(slpc), "Unsetting RC mode failed: %pe\n", ERR_PTR(ret));
- }
-
- return ret;
-}
-
/*
* intel_guc_slpc_enable() - Start SLPC
* @slpc: pointer to intel_guc_slpc.
@@ -755,6 +698,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
return ret;
}
+ /* Set cached value of ignore efficient freq */
+ intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq);
+
/* Revert SLPC min/max to softlimits if necessary */
ret = slpc_set_softlimits(slpc);
if (unlikely(ret)) {
@@ -765,9 +711,6 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
/* Set cached media freq ratio mode */
intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode);
- /* Set cached value of ignore efficient freq */
- intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq);
-
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
index 597eb5413ddf..6ac6503c39d4 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
@@ -44,8 +44,6 @@ int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val);
void intel_guc_pm_intrmsk_enable(struct intel_gt *gt);
void intel_guc_slpc_boost(struct intel_guc_slpc *slpc);
void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc);
-int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc);
-int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode);
int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index a0e3ef1c65d2..ae3495a9c814 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1433,6 +1433,36 @@ static void guc_timestamp_ping(struct work_struct *wrk)
int srcu, ret;
/*
+ * Ideally the busyness worker should take a gt pm wakeref because the
+ * worker only needs to be active while gt is awake. However, the
+ * gt_park path cancels the worker synchronously and this complicates
+ * the flow if the worker is also running at the same time. The cancel
+ * waits for the worker and when the worker releases the wakeref, that
+ * would call gt_park and would lead to a deadlock.
+ *
+ * The resolution is to take the global pm wakeref if runtime pm is
+ * already active. If not, we don't need to update the busyness stats as
+ * the stats would already be updated when the gt was parked.
+ *
+ * Note:
+ * - We do not requeue the worker if we cannot take a reference to runtime
+ * pm since intel_guc_busyness_unpark would requeue the worker in the
+ * resume path.
+ *
+ * - If the gt was parked longer than time taken for GT timestamp to roll
+ * over, we ignore those rollovers since we don't care about tracking
+ * the exact GT time. We only care about roll overs when the gt is
+ * active and running workloads.
+ *
+ * - There is a window of time between gt_park and runtime suspend,
+ * where the worker may run. This is acceptable since the worker will
+ * not find any new data to update busyness.
+ */
+ wakeref = intel_runtime_pm_get_if_active(&gt->i915->runtime_pm);
+ if (!wakeref)
+ return;
+
+ /*
* Synchronize with gt reset to make sure the worker does not
* corrupt the engine/guc stats. NB: can't actually block waiting
* for a reset to complete as the reset requires flushing out
@@ -1440,10 +1470,9 @@ static void guc_timestamp_ping(struct work_struct *wrk)
*/
ret = intel_gt_reset_trylock(gt, &srcu);
if (ret)
- return;
+ goto err_trylock;
- with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
- __update_guc_busyness_stats(guc);
+ __update_guc_busyness_stats(guc);
/* adjust context stats for overflow */
xa_for_each(&guc->context_lookup, index, ce)
@@ -1452,6 +1481,9 @@ static void guc_timestamp_ping(struct work_struct *wrk)
intel_gt_reset_unlock(gt, srcu);
guc_enable_busyness_worker(guc);
+
+err_trylock:
+ intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
}
static int guc_action_enable_usage_stats(struct intel_guc *guc)
@@ -1658,9 +1690,7 @@ static void guc_engine_reset_prepare(struct intel_engine_cs *engine)
* Wa_22011802037: In addition to stopping the cs, we need
* to wait for any pending mi force wakeups
*/
- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
- (GRAPHICS_VER(engine->i915) >= 11 &&
- GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) {
+ if (intel_engine_reset_needs_wa_22011802037(engine->gt)) {
intel_engine_stop_cs(engine);
intel_engine_wait_for_pending_mi_fw(engine);
}
@@ -4267,7 +4297,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
/* Wa_14014475959:dg2 */
if (engine->class == COMPUTE_CLASS)
- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
+ if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
IS_DG2(engine->i915))
engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
@@ -5470,6 +5500,9 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
ve->base.flags = I915_ENGINE_IS_VIRTUAL;
+ BUILD_BUG_ON(ilog2(VIRTUAL_ENGINES) < I915_NUM_ENGINES);
+ ve->base.mask = VIRTUAL_ENGINES;
+
intel_context_init(&ve->context, &ve->base);
for (n = 0; n < count; n++) {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index ddd146265beb..ba9e07fc2b57 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -26,6 +26,7 @@
* The kernel driver is only responsible for loading the HuC firmware and
* triggering its security authentication. This is done differently depending
* on the platform:
+ *
* - older platforms (from Gen9 to most Gen12s): the load is performed via DMA
* and the authentication via GuC
* - DG2: load and authentication are both performed via GSC.
@@ -33,6 +34,7 @@
* not-DG2 older platforms), while the authentication is done in 2-steps,
* a first auth for clear-media workloads via GuC and a second one for all
* workloads via GSC.
+ *
* On platforms where the GuC does the authentication, to correctly do so the
* HuC binary must be loaded before the GuC one.
* Loading the HuC is optional; however, not using the HuC might negatively
@@ -265,7 +267,7 @@ static bool vcs_supported(struct intel_gt *gt)
GEM_BUG_ON(!gt_is_root(gt) && !gt->info.engine_mask);
if (gt_is_root(gt))
- mask = RUNTIME_INFO(gt->i915)->platform_engine_mask;
+ mask = INTEL_INFO(gt->i915)->platform_engine_mask;
else
mask = gt->info.engine_mask;
@@ -308,9 +310,9 @@ void intel_huc_init_early(struct intel_huc *huc)
huc->status[INTEL_HUC_AUTH_BY_GSC].mask = HUC_LOAD_SUCCESSFUL;
huc->status[INTEL_HUC_AUTH_BY_GSC].value = HUC_LOAD_SUCCESSFUL;
} else {
- huc->status[INTEL_HUC_AUTH_BY_GSC].reg = HECI_FWSTS5(MTL_GSC_HECI1_BASE);
- huc->status[INTEL_HUC_AUTH_BY_GSC].mask = HECI_FWSTS5_HUC_AUTH_DONE;
- huc->status[INTEL_HUC_AUTH_BY_GSC].value = HECI_FWSTS5_HUC_AUTH_DONE;
+ huc->status[INTEL_HUC_AUTH_BY_GSC].reg = HECI_FWSTS(MTL_GSC_HECI1_BASE, 5);
+ huc->status[INTEL_HUC_AUTH_BY_GSC].mask = HECI1_FWSTS5_HUC_AUTH_DONE;
+ huc->status[INTEL_HUC_AUTH_BY_GSC].value = HECI1_FWSTS5_HUC_AUTH_DONE;
}
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
index e608152fecfc..b648238cc675 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
@@ -27,7 +27,6 @@ struct mtl_huc_auth_msg_out {
int intel_huc_fw_auth_via_gsccs(struct intel_huc *huc)
{
struct intel_gt *gt = huc_to_gt(huc);
- struct drm_i915_private *i915 = gt->i915;
struct drm_i915_gem_object *obj;
struct mtl_huc_auth_msg_in *msg_in;
struct mtl_huc_auth_msg_out *msg_out;
@@ -43,7 +42,7 @@ int intel_huc_fw_auth_via_gsccs(struct intel_huc *huc)
pkt_offset = i915_ggtt_offset(huc->heci_pkt);
pkt_vaddr = i915_gem_object_pin_map_unlocked(obj,
- i915_coherent_map_type(i915, obj, true));
+ intel_gt_coherent_map_type(gt, obj, true));
if (IS_ERR(pkt_vaddr))
return PTR_ERR(pkt_vaddr);
@@ -107,15 +106,6 @@ out_unpin:
return err;
}
-static void get_version_from_gsc_manifest(struct intel_uc_fw_ver *ver, const void *data)
-{
- const struct intel_gsc_manifest_header *manifest = data;
-
- ver->major = manifest->fw_version.major;
- ver->minor = manifest->fw_version.minor;
- ver->patch = manifest->fw_version.hotfix;
-}
-
static bool css_valid(const void *data, size_t size)
{
const struct uc_css_header *css = data;
@@ -227,8 +217,8 @@ int intel_huc_fw_get_binary_info(struct intel_uc_fw *huc_fw, const void *data, s
for (i = 0; i < header->num_of_entries; i++, entry++) {
if (strcmp(entry->name, "HUCP.man") == 0)
- get_version_from_gsc_manifest(&huc_fw->file_selected.ver,
- data + entry_offset(entry));
+ intel_uc_fw_version_from_gsc_manifest(&huc_fw->file_selected.ver,
+ data + entry_offset(entry));
if (strcmp(entry->name, "huc_fw") == 0) {
u32 offset = entry_offset(entry);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 18250fb64bd8..98b103375b7a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -43,7 +43,7 @@ static void uc_expand_default_options(struct intel_uc *uc)
}
/* Intermediate platforms are HuC authentication only */
- if (IS_ALDERLAKE_S(i915) && !IS_ADLS_RPLS(i915)) {
+ if (IS_ALDERLAKE_S(i915) && !IS_RAPTORLAKE_S(i915)) {
i915->params.enable_guc = ENABLE_GUC_LOAD_HUC;
return;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
index 2f93cc4e408a..6d541c866edb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
@@ -10,6 +10,7 @@
#include "gt/intel_gt_debugfs.h"
#include "intel_guc_debugfs.h"
+#include "intel_gsc_uc_debugfs.h"
#include "intel_huc_debugfs.h"
#include "intel_uc.h"
#include "intel_uc_debugfs.h"
@@ -58,6 +59,7 @@ void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root)
intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), uc);
+ intel_gsc_uc_debugfs_register(&uc->gsc, root);
intel_guc_debugfs_register(&uc->guc, root);
intel_huc_debugfs_register(&uc->huc, root);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 944725e62414..32e27e9a2490 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -11,7 +11,10 @@
#include <drm/drm_print.h>
#include "gem/i915_gem_lmem.h"
+#include "gt/intel_gt.h"
#include "gt/intel_gt_print.h"
+#include "intel_gsc_binary_headers.h"
+#include "intel_gsc_fw.h"
#include "intel_uc_fw.h"
#include "intel_uc_fw_abi.h"
#include "i915_drv.h"
@@ -129,6 +132,17 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
fw_def(SKYLAKE, 0, huc_mmp(skl, 2, 0, 0))
/*
+ * The GSC FW has multiple version (see intel_gsc_uc.h for details); since what
+ * we care about is the interface, we use the compatibility version in the
+ * binary names.
+ * Same as with the GuC, a major version bump indicate a
+ * backward-incompatible change, while a minor version bump indicates a
+ * backward-compatible one, so we use only the former in the file name.
+ */
+#define INTEL_GSC_FIRMWARE_DEFS(fw_def, gsc_def) \
+ fw_def(METEORLAKE, 0, gsc_def(mtl, 1, 0))
+
+/*
* Set of macros for producing a list of filenames from the above table.
*/
#define __MAKE_UC_FW_PATH_BLANK(prefix_, name_) \
@@ -163,6 +177,9 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
#define MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \
__MAKE_UC_FW_PATH_MMP(prefix_, "huc", major_, minor_, patch_)
+#define MAKE_GSC_FW_PATH(prefix_, major_, minor_) \
+ __MAKE_UC_FW_PATH_MAJOR(prefix_, "gsc", major_)
+
/*
* All blobs need to be declared via MODULE_FIRMWARE().
* This first expansion of the table macros is solely to provide
@@ -173,6 +190,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH_MAJOR, MAKE_GUC_FW_PATH_MMP)
INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, MAKE_HUC_FW_PATH_MMP, MAKE_HUC_FW_PATH_GSC)
+INTEL_GSC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GSC_FW_PATH)
/*
* The next expansion of the table macros (in __uc_fw_auto_select below) provides
@@ -222,6 +240,10 @@ struct __packed uc_fw_blob {
#define HUC_FW_BLOB_GSC(prefix_) \
UC_FW_BLOB_NEW(0, 0, 0, true, MAKE_HUC_FW_PATH_GSC(prefix_))
+#define GSC_FW_BLOB(prefix_, major_, minor_) \
+ UC_FW_BLOB_NEW(major_, minor_, 0, true, \
+ MAKE_GSC_FW_PATH(prefix_, major_, minor_))
+
struct __packed uc_fw_platform_requirement {
enum intel_platform p;
u8 rev; /* first platform rev using this FW */
@@ -248,9 +270,14 @@ static const struct uc_fw_platform_requirement blobs_huc[] = {
INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP, HUC_FW_BLOB_GSC)
};
+static const struct uc_fw_platform_requirement blobs_gsc[] = {
+ INTEL_GSC_FIRMWARE_DEFS(MAKE_FW_LIST, GSC_FW_BLOB)
+};
+
static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = {
[INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
[INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) },
+ [INTEL_UC_FW_TYPE_GSC] = { blobs_gsc, ARRAY_SIZE(blobs_gsc) },
};
static void
@@ -264,20 +291,12 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
bool found;
/*
- * GSC FW support is still not fully in place, so we're not defining
- * the FW blob yet because we don't want the driver to attempt to load
- * it until we're ready for it.
- */
- if (uc_fw->type == INTEL_UC_FW_TYPE_GSC)
- return;
-
- /*
* The only difference between the ADL GuC FWs is the HWConfig support.
* ADL-N does not support HWConfig, so we should use the same binary as
* ADL-S, otherwise the GuC might attempt to fetch a config table that
* does not exist.
*/
- if (IS_ADLP_N(i915))
+ if (IS_ALDERLAKE_P_N(i915))
p = INTEL_ALDERLAKE_S;
GEM_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all));
@@ -468,6 +487,17 @@ static void __uc_fw_user_override(struct drm_i915_private *i915, struct intel_uc
}
}
+void intel_uc_fw_version_from_gsc_manifest(struct intel_uc_fw_ver *ver,
+ const void *data)
+{
+ const struct intel_gsc_manifest_header *manifest = data;
+
+ ver->major = manifest->fw_version.major;
+ ver->minor = manifest->fw_version.minor;
+ ver->patch = manifest->fw_version.hotfix;
+ ver->build = manifest->fw_version.build;
+}
+
/**
* intel_uc_fw_init_early - initialize the uC object and select the firmware
* @uc_fw: uC firmware
@@ -668,13 +698,18 @@ static int check_gsc_manifest(struct intel_gt *gt,
const struct firmware *fw,
struct intel_uc_fw *uc_fw)
{
- if (uc_fw->type != INTEL_UC_FW_TYPE_HUC) {
- gt_err(gt, "trying to GSC-parse a non-HuC binary");
+ switch (uc_fw->type) {
+ case INTEL_UC_FW_TYPE_HUC:
+ intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size);
+ break;
+ case INTEL_UC_FW_TYPE_GSC:
+ intel_gsc_fw_get_binary_info(uc_fw, fw->data, fw->size);
+ break;
+ default:
+ MISSING_CASE(uc_fw->type);
return -EINVAL;
}
- intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size);
-
if (uc_fw->dma_start_offset) {
u32 delta = uc_fw->dma_start_offset;
@@ -734,10 +769,6 @@ static int check_fw_header(struct intel_gt *gt,
{
int err = 0;
- /* GSC FW version is queried after the FW is loaded */
- if (uc_fw->type == INTEL_UC_FW_TYPE_GSC)
- return 0;
-
if (uc_fw->has_gsc_headers)
err = check_gsc_manifest(gt, fw, uc_fw);
else
@@ -773,6 +804,80 @@ static int try_firmware_load(struct intel_uc_fw *uc_fw, const struct firmware **
return 0;
}
+static int check_mtl_huc_guc_compatibility(struct intel_gt *gt,
+ struct intel_uc_fw_file *huc_selected)
+{
+ struct intel_uc_fw_file *guc_selected = &gt->uc.guc.fw.file_selected;
+ struct intel_uc_fw_ver *huc_ver = &huc_selected->ver;
+ struct intel_uc_fw_ver *guc_ver = &guc_selected->ver;
+ bool new_huc, new_guc;
+
+ /* we can only do this check after having fetched both GuC and HuC */
+ GEM_BUG_ON(!huc_selected->path || !guc_selected->path);
+
+ /*
+ * Due to changes in the authentication flow for MTL, HuC 8.5.1 or newer
+ * requires GuC 70.7.0 or newer. Older HuC binaries will instead require
+ * GuC < 70.7.0.
+ */
+ new_huc = huc_ver->major > 8 ||
+ (huc_ver->major == 8 && huc_ver->minor > 5) ||
+ (huc_ver->major == 8 && huc_ver->minor == 5 && huc_ver->patch >= 1);
+
+ new_guc = guc_ver->major > 70 ||
+ (guc_ver->major == 70 && guc_ver->minor >= 7);
+
+ if (new_huc != new_guc) {
+ UNEXPECTED(gt, "HuC %u.%u.%u is incompatible with GuC %u.%u.%u\n",
+ huc_ver->major, huc_ver->minor, huc_ver->patch,
+ guc_ver->major, guc_ver->minor, guc_ver->patch);
+ gt_info(gt, "MTL GuC 70.7.0+ and HuC 8.5.1+ don't work with older releases\n");
+ return -ENOEXEC;
+ }
+
+ return 0;
+}
+
+int intel_uc_check_file_version(struct intel_uc_fw *uc_fw, bool *old_ver)
+{
+ struct intel_gt *gt = __uc_fw_to_gt(uc_fw);
+ struct intel_uc_fw_file *wanted = &uc_fw->file_wanted;
+ struct intel_uc_fw_file *selected = &uc_fw->file_selected;
+ int ret;
+
+ /*
+ * MTL has some compatibility issues with early GuC/HuC binaries
+ * not working with newer ones. This is specific to MTL and we
+ * don't expect it to extend to other platforms.
+ */
+ if (IS_METEORLAKE(gt->i915) && uc_fw->type == INTEL_UC_FW_TYPE_HUC) {
+ ret = check_mtl_huc_guc_compatibility(gt, selected);
+ if (ret)
+ return ret;
+ }
+
+ if (!wanted->ver.major || !selected->ver.major)
+ return 0;
+
+ /* Check the file's major version was as it claimed */
+ if (selected->ver.major != wanted->ver.major) {
+ UNEXPECTED(gt, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
+ intel_uc_fw_type_repr(uc_fw->type), selected->path,
+ selected->ver.major, selected->ver.minor,
+ wanted->ver.major, wanted->ver.minor);
+ if (!intel_uc_fw_is_overridden(uc_fw))
+ return -ENOEXEC;
+ } else if (old_ver) {
+ if (selected->ver.minor < wanted->ver.minor)
+ *old_ver = true;
+ else if ((selected->ver.minor == wanted->ver.minor) &&
+ (selected->ver.patch < wanted->ver.patch))
+ *old_ver = true;
+ }
+
+ return 0;
+}
+
/**
* intel_uc_fw_fetch - fetch uC firmware
* @uc_fw: uC firmware
@@ -840,25 +945,9 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
goto fail;
}
- if (uc_fw->file_wanted.ver.major && uc_fw->file_selected.ver.major) {
- /* Check the file's major version was as it claimed */
- if (uc_fw->file_selected.ver.major != uc_fw->file_wanted.ver.major) {
- UNEXPECTED(gt, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
- intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path,
- uc_fw->file_selected.ver.major, uc_fw->file_selected.ver.minor,
- uc_fw->file_wanted.ver.major, uc_fw->file_wanted.ver.minor);
- if (!intel_uc_fw_is_overridden(uc_fw)) {
- err = -ENOEXEC;
- goto fail;
- }
- } else {
- if (uc_fw->file_selected.ver.minor < uc_fw->file_wanted.ver.minor)
- old_ver = true;
- else if ((uc_fw->file_selected.ver.minor == uc_fw->file_wanted.ver.minor) &&
- (uc_fw->file_selected.ver.patch < uc_fw->file_wanted.ver.patch))
- old_ver = true;
- }
- }
+ err = intel_uc_check_file_version(uc_fw, &old_ver);
+ if (err)
+ goto fail;
if (old_ver && uc_fw->file_selected.ver.major) {
/* Preserve the version that was really wanted */
@@ -1125,7 +1214,7 @@ static int uc_fw_rsa_data_create(struct intel_uc_fw *uc_fw)
return PTR_ERR(vma);
vaddr = i915_gem_object_pin_map_unlocked(vma->obj,
- i915_coherent_map_type(gt->i915, vma->obj, true));
+ intel_gt_coherent_map_type(gt, vma->obj, true));
if (IS_ERR(vaddr)) {
i915_vma_unpin_and_release(&vma, 0);
err = PTR_ERR(vaddr);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 054f02811971..9a431726c8d5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -70,6 +70,7 @@ struct intel_uc_fw_ver {
u32 major;
u32 minor;
u32 patch;
+ u32 build;
};
/*
@@ -289,6 +290,9 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw)
return __intel_uc_fw_get_upload_size(uc_fw);
}
+void intel_uc_fw_version_from_gsc_manifest(struct intel_uc_fw_ver *ver,
+ const void *data);
+int intel_uc_check_file_version(struct intel_uc_fw *uc_fw, bool *old_ver);
void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
enum intel_uc_fw_type type,
bool needs_ggtt_mapping);
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
index 1fd760539f77..bfb72143566f 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@@ -204,9 +204,9 @@ static int intel_guc_steal_guc_ids(void *arg)
if (IS_ERR(rq)) {
ret = PTR_ERR(rq);
rq = NULL;
- if (ret != -EAGAIN) {
- guc_err(guc, "Failed to create request %d: %pe\n",
- context_index, ERR_PTR(ret));
+ if ((ret != -EAGAIN) || !last) {
+ guc_err(guc, "Failed to create %srequest %d: %pe\n",
+ last ? "" : "first ", context_index, ERR_PTR(ret));
goto err_spin_rq;
}
} else {