diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
74 files changed, 1858 insertions, 1022 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c index 1c82caf525c3..8fe0499308ff 100644 --- a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c @@ -76,7 +76,7 @@ int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode) cmd = MI_FLUSH; if (mode & EMIT_INVALIDATE) { cmd |= MI_EXE_FLUSH; - if (IS_G4X(rq->engine->i915) || GRAPHICS_VER(rq->engine->i915) == 5) + if (IS_G4X(rq->i915) || GRAPHICS_VER(rq->i915) == 5) cmd |= MI_INVALIDATE_ISP; } diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 23857cc08eca..ba4c2422b340 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -4,9 +4,9 @@ */ #include "gen8_engine_cs.h" -#include "i915_drv.h" #include "intel_engine_regs.h" #include "intel_gpu_commands.h" +#include "intel_gt.h" #include "intel_lrc.h" #include "intel_ring.h" @@ -39,11 +39,11 @@ int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode) * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL * pipe control. */ - if (GRAPHICS_VER(rq->engine->i915) == 9) + if (GRAPHICS_VER(rq->i915) == 9) vf_flush_wa = true; /* WaForGAMHang:kbl */ - if (IS_KBL_GRAPHICS_STEP(rq->engine->i915, 0, STEP_C0)) + if (IS_KABYLAKE(rq->i915) && IS_GRAPHICS_STEP(rq->i915, 0, STEP_C0)) dc_flush_wa = true; } @@ -165,14 +165,60 @@ static u32 preparser_disable(bool state) return MI_ARB_CHECK | 1 << 8 | state; } -u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg) +static i915_reg_t gen12_get_aux_inv_reg(struct intel_engine_cs *engine) { - u32 gsi_offset = gt->uncore->gsi_offset; + switch (engine->id) { + case RCS0: + return GEN12_CCS_AUX_INV; + case BCS0: + return GEN12_BCS0_AUX_INV; + case VCS0: + return GEN12_VD0_AUX_INV; + case VCS2: + return GEN12_VD2_AUX_INV; + case VECS0: + return GEN12_VE0_AUX_INV; + case CCS0: + return GEN12_CCS0_AUX_INV; + default: + return INVALID_MMIO_REG; + } +} + +static bool gen12_needs_ccs_aux_inv(struct intel_engine_cs *engine) +{ + i915_reg_t reg = gen12_get_aux_inv_reg(engine); + + if (IS_PONTEVECCHIO(engine->i915)) + return false; + + /* + * So far platforms supported by i915 having flat ccs do not require + * AUX invalidation. Check also whether the engine requires it. + */ + return i915_mmio_reg_valid(reg) && !HAS_FLAT_CCS(engine->i915); +} + +u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs) +{ + i915_reg_t inv_reg = gen12_get_aux_inv_reg(engine); + u32 gsi_offset = engine->gt->uncore->gsi_offset; + + if (!gen12_needs_ccs_aux_inv(engine)) + return cs; *cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN; *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset; *cs++ = AUX_INV; - *cs++ = MI_NOOP; + + *cs++ = MI_SEMAPHORE_WAIT_TOKEN | + MI_SEMAPHORE_REGISTER_POLL | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset; + *cs++ = 0; + *cs++ = 0; return cs; } @@ -180,8 +226,8 @@ u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv static int mtl_dummy_pipe_control(struct i915_request *rq) { /* Wa_14016712196 */ - if (IS_MTL_GRAPHICS_STEP(rq->engine->i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(rq->engine->i915, P, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 71)) || + IS_DG2(rq->i915)) { u32 *cs; /* dummy PIPE_CONTROL + depth flush */ @@ -202,8 +248,13 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) { struct intel_engine_cs *engine = rq->engine; - if (mode & EMIT_FLUSH) { - u32 flags = 0; + /* + * On Aux CCS platforms the invalidation of the Aux + * table requires quiescing memory traffic beforehand + */ + if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) { + u32 bit_group_0 = 0; + u32 bit_group_1 = 0; int err; u32 *cs; @@ -211,32 +262,49 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) if (err) return err; - flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; - flags |= PIPE_CONTROL_FLUSH_L3; - flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; - flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH; + + /* + * When required, in MTL and beyond platforms we + * need to set the CCS_FLUSH bit in the pipe control + */ + if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70)) + bit_group_0 |= PIPE_CONTROL_CCS_FLUSH; + + /* + * L3 fabric flush is needed for AUX CCS invalidation + * which happens as part of pipe-control so we can + * ignore PIPE_CONTROL_FLUSH_L3. Also PIPE_CONTROL_FLUSH_L3 + * deals with Protected Memory which is not needed for + * AUX CCS invalidation and lead to unwanted side effects. + */ + if (mode & EMIT_FLUSH) + bit_group_1 |= PIPE_CONTROL_FLUSH_L3; + + bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH; + bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; /* Wa_1409600907:tgl,adl-p */ - flags |= PIPE_CONTROL_DEPTH_STALL; - flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; - flags |= PIPE_CONTROL_FLUSH_ENABLE; + bit_group_1 |= PIPE_CONTROL_DEPTH_STALL; + bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE; + bit_group_1 |= PIPE_CONTROL_FLUSH_ENABLE; - flags |= PIPE_CONTROL_STORE_DATA_INDEX; - flags |= PIPE_CONTROL_QW_WRITE; + bit_group_1 |= PIPE_CONTROL_STORE_DATA_INDEX; + bit_group_1 |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_CS_STALL; + bit_group_1 |= PIPE_CONTROL_CS_STALL; if (!HAS_3D_PIPELINE(engine->i915)) - flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS; + bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS; else if (engine->class == COMPUTE_CLASS) - flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; + bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); - cs = gen12_emit_pipe_control(cs, - PIPE_CONTROL0_HDC_PIPELINE_FLUSH, - flags, LRC_PPHWSP_SCRATCH_ADDR); + cs = gen12_emit_pipe_control(cs, bit_group_0, bit_group_1, + LRC_PPHWSP_SCRATCH_ADDR); intel_ring_advance(rq, cs); } @@ -267,10 +335,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) else if (engine->class == COMPUTE_CLASS) flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; - if (!HAS_FLAT_CCS(rq->engine->i915)) - count = 8 + 4; - else - count = 8; + count = 8; + if (gen12_needs_ccs_aux_inv(rq->engine)) + count += 8; cs = intel_ring_begin(rq, count); if (IS_ERR(cs)) @@ -285,11 +352,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); - if (!HAS_FLAT_CCS(rq->engine->i915)) { - /* hsdes: 1809175790 */ - cs = gen12_emit_aux_table_inv(rq->engine->gt, - cs, GEN12_GFX_CCS_AUX_NV); - } + cs = gen12_emit_aux_table_inv(engine, cs); *cs++ = preparser_disable(false); intel_ring_advance(rq, cs); @@ -300,21 +363,14 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) { - intel_engine_mask_t aux_inv = 0; - u32 cmd, *cs; + u32 cmd = 4; + u32 *cs; - cmd = 4; if (mode & EMIT_INVALIDATE) { cmd += 2; - if (!HAS_FLAT_CCS(rq->engine->i915) && - (rq->engine->class == VIDEO_DECODE_CLASS || - rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) { - aux_inv = rq->engine->mask & - ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0); - if (aux_inv) - cmd += 4; - } + if (gen12_needs_ccs_aux_inv(rq->engine)) + cmd += 8; } cs = intel_ring_begin(rq, cmd); @@ -338,6 +394,10 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) cmd |= MI_INVALIDATE_TLB; if (rq->engine->class == VIDEO_DECODE_CLASS) cmd |= MI_INVALIDATE_BSD; + + if (gen12_needs_ccs_aux_inv(rq->engine) && + rq->engine->class == COPY_ENGINE_CLASS) + cmd |= MI_FLUSH_DW_CCS; } *cs++ = cmd; @@ -345,14 +405,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) *cs++ = 0; /* upper addr */ *cs++ = 0; /* value */ - if (aux_inv) { /* hsdes: 1809175790 */ - if (rq->engine->class == VIDEO_DECODE_CLASS) - cs = gen12_emit_aux_table_inv(rq->engine->gt, - cs, GEN12_VD0_AUX_NV); - else - cs = gen12_emit_aux_table_inv(rq->engine->gt, - cs, GEN12_VE0_AUX_NV); - } + cs = gen12_emit_aux_table_inv(rq->engine, cs); if (mode & EMIT_INVALIDATE) *cs++ = preparser_disable(false); @@ -754,7 +807,8 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { - struct drm_i915_private *i915 = rq->engine->i915; + struct drm_i915_private *i915 = rq->i915; + struct intel_gt *gt = rq->engine->gt; u32 flags = (PIPE_CONTROL_CS_STALL | PIPE_CONTROL_TLB_INVALIDATE | PIPE_CONTROL_TILE_CACHE_FLUSH | @@ -765,8 +819,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) PIPE_CONTROL_FLUSH_ENABLE); /* Wa_14016712196 */ - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915)) /* dummy PIPE_CONTROL + depth flush */ cs = gen12_emit_pipe_control(cs, 0, PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0); @@ -775,7 +828,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) /* Wa_1409600907 */ flags |= PIPE_CONTROL_DEPTH_STALL; - if (!HAS_3D_PIPELINE(rq->engine->i915)) + if (!HAS_3D_PIPELINE(rq->i915)) flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS; else if (rq->engine->class == COMPUTE_CLASS) flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h index 655e5c00ddc2..867ba697aceb 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h @@ -13,6 +13,7 @@ #include "intel_gt_regs.h" #include "intel_gpu_commands.h" +struct intel_engine_cs; struct intel_gt; struct i915_request; @@ -46,28 +47,32 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); -u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg); +u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs); static inline u32 * -__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) +__gen8_emit_pipe_control(u32 *batch, u32 bit_group_0, + u32 bit_group_1, u32 offset) { memset(batch, 0, 6 * sizeof(u32)); - batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0; - batch[1] = flags1; + batch[0] = GFX_OP_PIPE_CONTROL(6) | bit_group_0; + batch[1] = bit_group_1; batch[2] = offset; return batch + 6; } -static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) +static inline u32 *gen8_emit_pipe_control(u32 *batch, + u32 bit_group_1, u32 offset) { - return __gen8_emit_pipe_control(batch, 0, flags, offset); + return __gen8_emit_pipe_control(batch, 0, bit_group_1, offset); } -static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) +static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 bit_group_0, + u32 bit_group_1, u32 offset) { - return __gen8_emit_pipe_control(batch, flags0, flags1, offset); + return __gen8_emit_pipe_control(batch, bit_group_0, + bit_group_1, offset); } static inline u32 * diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index c8568e5d1147..9895e18df043 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -242,9 +242,9 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); len = gen8_pd_range(start, end, lvl--, &idx); - DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", - __func__, vm, lvl + 1, start, end, - idx, len, atomic_read(px_used(pd))); + GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", + __func__, vm, lvl + 1, start, end, + idx, len, atomic_read(px_used(pd))); GEM_BUG_ON(!len || len >= atomic_read(px_used(pd))); do { @@ -252,8 +252,8 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) && gen8_pd_contains(start, end, lvl)) { - DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n", - __func__, vm, lvl + 1, idx, start, end); + GTT_TRACE("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n", + __func__, vm, lvl + 1, idx, start, end); clear_pd_entry(pd, idx, scratch); __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl); start += (u64)I915_PDES << gen8_pd_shift(lvl); @@ -270,10 +270,10 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, u64 *vaddr; count = gen8_pt_count(start, end); - DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n", - __func__, vm, lvl, start, end, - gen8_pd_index(start, 0), count, - atomic_read(&pt->used)); + GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n", + __func__, vm, lvl, start, end, + gen8_pd_index(start, 0), count, + atomic_read(&pt->used)); GEM_BUG_ON(!count || count >= atomic_read(&pt->used)); num_ptes = count; @@ -325,9 +325,9 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); len = gen8_pd_range(*start, end, lvl--, &idx); - DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", - __func__, vm, lvl + 1, *start, end, - idx, len, atomic_read(px_used(pd))); + GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", + __func__, vm, lvl + 1, *start, end, + idx, len, atomic_read(px_used(pd))); GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1)); spin_lock(&pd->lock); @@ -338,8 +338,8 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, if (!pt) { spin_unlock(&pd->lock); - DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n", - __func__, vm, lvl + 1, idx); + GTT_TRACE("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n", + __func__, vm, lvl + 1, idx); pt = stash->pt[!!lvl]; __i915_gem_object_pin_pages(pt->base); @@ -369,10 +369,10 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, } else { unsigned int count = gen8_pt_count(*start, end); - DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n", - __func__, vm, lvl, *start, end, - gen8_pd_index(*start, 0), count, - atomic_read(&pt->used)); + GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n", + __func__, vm, lvl, *start, end, + gen8_pd_index(*start, 0), count, + atomic_read(&pt->used)); atomic_add(count, &pt->used); /* All other pdes may be simultaneously removed */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 0aff5bb13c53..84a75c95f3f7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -558,7 +558,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, DRIVER_CAPS(i915)->has_logical_contexts = true; ewma__engine_latency_init(&engine->latency); - seqcount_init(&engine->stats.execlists.lock); ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); @@ -1333,6 +1332,7 @@ static int measure_breadcrumb_dw(struct intel_context *ce) if (!frame) return -ENOMEM; + frame->rq.i915 = engine->i915; frame->rq.engine = engine; frame->rq.context = ce; rcu_assign_pointer(frame->rq.timeline, ce->timeline); @@ -1616,9 +1616,7 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine, * Wa_22011802037: Prior to doing a reset, ensure CS is * stopped, set ring stop bit and prefetch disable bit to halt CS */ - if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || - (GRAPHICS_VER(engine->i915) >= 11 && - GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) + if (intel_engine_reset_needs_wa_22011802037(engine->gt)) intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base), _MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE)); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 21af0ec52223..e91fc881dbf1 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -21,7 +21,7 @@ static void intel_gsc_idle_msg_enable(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; - if (IS_METEORLAKE(i915) && engine->id == GSC0) { + if (MEDIA_VER(i915) >= 13 && engine->id == GSC0) { intel_uncore_write(engine->gt->uncore, RC_PSMI_CTRL_GSCCS, _MASKED_BIT_DISABLE(IDLE_MSG_DISABLE)); @@ -39,7 +39,7 @@ static void dbg_poison_ce(struct intel_context *ce) if (ce->state) { struct drm_i915_gem_object *obj = ce->state->obj; - int type = i915_coherent_map_type(ce->engine->i915, obj, true); + int type = intel_gt_coherent_map_type(ce->engine->gt, obj, true); void *map; if (!i915_gem_object_trylock(obj, NULL)) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index 6b9d9f837669..fdd4ddd3a978 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -177,6 +177,7 @@ #define CTX_CTRL_RS_CTX_ENABLE REG_BIT(1) #define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT REG_BIT(2) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3) +#define GEN12_CTX_CTRL_RUNALONE_MODE REG_BIT(7) #define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE REG_BIT(8) #define RING_CTX_SR_CTL(base) _MMIO((base) + 0x244) #define RING_SEMA_WAIT_POLL(base) _MMIO((base) + 0x24c) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index e99a6fa03d45..a7e677598004 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -58,6 +58,7 @@ struct i915_perf_group; typedef u32 intel_engine_mask_t; #define ALL_ENGINES ((intel_engine_mask_t)~0ul) +#define VIRTUAL_ENGINES BIT(BITS_PER_TYPE(intel_engine_mask_t) - 1) struct intel_hw_status_page { struct list_head timelines; diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 2ebd937f3b4c..e8f42ec6b1b4 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2718,7 +2718,7 @@ static int emit_pdps(struct i915_request *rq) int err, i; u32 *cs; - GEM_BUG_ON(intel_vgpu_active(rq->engine->i915)); + GEM_BUG_ON(intel_vgpu_active(rq->i915)); /* * Beware ye of the dragons, this sequence is magic! @@ -3001,9 +3001,7 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) * Wa_22011802037: In addition to stopping the cs, we need * to wait for any pending mi force wakeups */ - if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || - (GRAPHICS_VER(engine->i915) >= 11 && - GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) + if (intel_engine_reset_needs_wa_22011802037(engine->gt)) intel_engine_wait_for_pending_mi_fw(engine); engine->execlists.reset_ccid = active_ccid(engine); @@ -3550,22 +3548,24 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); + seqcount_init(&engine->stats.execlists.lock); + if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) rcs_submission_override(engine); lrc_init_wa_ctx(engine); if (HAS_LOGICAL_RING_ELSQ(i915)) { - execlists->submit_reg = uncore->regs + + execlists->submit_reg = intel_uncore_regs(uncore) + i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base)); - execlists->ctrl_reg = uncore->regs + + execlists->ctrl_reg = intel_uncore_regs(uncore) + i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base)); engine->fw_domain = intel_uncore_forcewake_for_reg(engine->uncore, RING_EXECLIST_CONTROL(engine->mmio_base), FW_REG_WRITE); } else { - execlists->submit_reg = uncore->regs + + execlists->submit_reg = intel_uncore_regs(uncore) + i915_mmio_reg_offset(RING_ELSP(base)); } diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index dd0ed941441a..da21f2786b5d 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -511,20 +511,31 @@ void intel_ggtt_unbind_vma(struct i915_address_space *vm, vm->clear_range(vm, vma_res->start, vma_res->vma_size); } +/* + * Reserve the top of the GuC address space for firmware images. Addresses + * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC, + * which makes for a suitable range to hold GuC/HuC firmware images if the + * size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT + * is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk + * of the same size anyway, which is far more than needed, to keep the logic + * in uc_fw_ggtt_offset() simple. + */ +#define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP) + static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) { - u64 size; + u64 offset; int ret; if (!intel_uc_uses_guc(&ggtt->vm.gt->uc)) return 0; - GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); - size = ggtt->vm.total - GUC_GGTT_TOP; + GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE); + offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE; - ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, size, - GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, - PIN_NOEVICT); + ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, + GUC_TOP_RESERVE_SIZE, offset, + I915_COLOR_UNEVICTABLE, PIN_NOEVICT); if (ret) drm_dbg(&ggtt->vm.i915->drm, "Failed to reserve top of GGTT for GuC\n"); diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c index 37d0b0fe791d..40371b8a9bbb 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c @@ -818,7 +818,7 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj, if (obj->bit_17 == NULL) { obj->bit_17 = bitmap_zalloc(page_count, GFP_KERNEL); if (obj->bit_17 == NULL) { - drm_err(&to_i915(obj->base.dev)->drm, + drm_err(obj->base.dev, "Failed to allocate memory for bit 17 record\n"); return; } diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 5d143e2a8db0..2bd8d98d2110 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -121,6 +121,7 @@ #define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) #define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ #define MI_SEMAPHORE_WAIT_TOKEN MI_INSTR(0x1c, 3) /* GEN12+ */ +#define MI_SEMAPHORE_REGISTER_POLL (1 << 16) #define MI_SEMAPHORE_POLL (1 << 15) #define MI_SEMAPHORE_SAD_GT_SDD (0 << 12) #define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12) @@ -299,6 +300,7 @@ #define PIPE_CONTROL_QW_WRITE (1<<14) #define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14) #define PIPE_CONTROL_DEPTH_STALL (1<<13) +#define PIPE_CONTROL_CCS_FLUSH (1<<13) /* MTL+ */ #define PIPE_CONTROL_WRITE_FLUSH (1<<12) #define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */ #define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 7a008e829d4d..93062c35e072 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -33,6 +33,7 @@ #include "intel_rps.h" #include "intel_sa_media.h" #include "intel_gt_sysfs.h" +#include "intel_tlb.h" #include "intel_uncore.h" #include "shmem_utils.h" @@ -50,8 +51,7 @@ void intel_gt_common_init_early(struct intel_gt *gt) intel_gt_init_reset(gt); intel_gt_init_requests(gt); intel_gt_init_timelines(gt); - mutex_init(>->tlb.invalidate_lock); - seqcount_mutex_init(>->tlb.seqno, >->tlb.invalidate_lock); + intel_gt_init_tlb(gt); intel_gt_pm_init_early(gt); intel_wopcm_init_early(>->wopcm); @@ -179,7 +179,7 @@ int intel_gt_init_hw(struct intel_gt *gt) if (IS_HASWELL(i915)) intel_uncore_write(uncore, HSW_MI_PREDICATE_RESULT_2, - IS_HSW_GT3(i915) ? + IS_HASWELL_GT3(i915) ? LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); /* Apply the GT workarounds... */ @@ -466,7 +466,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE | I915_BO_ALLOC_GPU_ONLY); - if (IS_ERR(obj)) + if (IS_ERR(obj) && !IS_METEORLAKE(i915)) /* Wa_22018444074 */ obj = i915_gem_object_create_stolen(i915, size); if (IS_ERR(obj)) obj = i915_gem_object_create_internal(i915, size); @@ -846,7 +846,7 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915) intel_gt_fini_requests(gt); intel_gt_fini_reset(gt); intel_gt_fini_timelines(gt); - mutex_destroy(>->tlb.invalidate_lock); + intel_gt_fini_tlb(gt); intel_engines_free(gt); } } @@ -887,7 +887,7 @@ static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr) int intel_gt_probe_all(struct drm_i915_private *i915) { struct pci_dev *pdev = to_pci_dev(i915->drm.dev); - struct intel_gt *gt = &i915->gt0; + struct intel_gt *gt = to_gt(i915); const struct intel_gt_definition *gtdef; phys_addr_t phys_addr; unsigned int mmio_bar; @@ -904,7 +904,7 @@ int intel_gt_probe_all(struct drm_i915_private *i915) */ gt->i915 = i915; gt->name = "Primary GT"; - gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask; + gt->info.engine_mask = INTEL_INFO(i915)->platform_engine_mask; gt_dbg(gt, "Setting up %s\n", gt->name); ret = intel_gt_tile_setup(gt, phys_addr); @@ -1004,136 +1004,23 @@ void intel_gt_info_print(const struct intel_gt_info *info, intel_sseu_dump(&info->sseu, p); } -/* - * HW architecture suggest typical invalidation time at 40us, - * with pessimistic cases up to 100us and a recommendation to - * cap at 1ms. We go a bit higher just in case. - */ -#define TLB_INVAL_TIMEOUT_US 100 -#define TLB_INVAL_TIMEOUT_MS 4 - -/* - * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets - * but are now considered MCR registers. Since they exist within a GAM range, - * the primary instance of the register rolls up the status from each unit. - */ -static int wait_for_invalidate(struct intel_engine_cs *engine) -{ - if (engine->tlb_inv.mcr) - return intel_gt_mcr_wait_for_reg(engine->gt, - engine->tlb_inv.reg.mcr_reg, - engine->tlb_inv.done, - 0, - TLB_INVAL_TIMEOUT_US, - TLB_INVAL_TIMEOUT_MS); - else - return __intel_wait_for_register_fw(engine->gt->uncore, - engine->tlb_inv.reg.reg, - engine->tlb_inv.done, - 0, - TLB_INVAL_TIMEOUT_US, - TLB_INVAL_TIMEOUT_MS, - NULL); -} - -static void mmio_invalidate_full(struct intel_gt *gt) +enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt, + struct drm_i915_gem_object *obj, + bool always_coherent) { - struct drm_i915_private *i915 = gt->i915; - struct intel_uncore *uncore = gt->uncore; - struct intel_engine_cs *engine; - intel_engine_mask_t awake, tmp; - enum intel_engine_id id; - unsigned long flags; - - if (GRAPHICS_VER(i915) < 8) - return; - - intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); - - intel_gt_mcr_lock(gt, &flags); - spin_lock(&uncore->lock); /* serialise invalidate with GT reset */ - - awake = 0; - for_each_engine(engine, gt, id) { - if (!intel_engine_pm_is_awake(engine)) - continue; - - if (engine->tlb_inv.mcr) - intel_gt_mcr_multicast_write_fw(gt, - engine->tlb_inv.reg.mcr_reg, - engine->tlb_inv.request); - else - intel_uncore_write_fw(uncore, - engine->tlb_inv.reg.reg, - engine->tlb_inv.request); - - awake |= engine->mask; - } - - GT_TRACE(gt, "invalidated engines %08x\n", awake); - - /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */ - if (awake && - (IS_TIGERLAKE(i915) || - IS_DG1(i915) || - IS_ROCKETLAKE(i915) || - IS_ALDERLAKE_S(i915) || - IS_ALDERLAKE_P(i915))) - intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1); - - spin_unlock(&uncore->lock); - intel_gt_mcr_unlock(gt, flags); - - for_each_engine_masked(engine, gt, awake, tmp) { - if (wait_for_invalidate(engine)) - gt_err_ratelimited(gt, - "%s TLB invalidation did not complete in %ums!\n", - engine->name, TLB_INVAL_TIMEOUT_MS); - } - /* - * Use delayed put since a) we mostly expect a flurry of TLB - * invalidations so it is good to avoid paying the forcewake cost and - * b) it works around a bug in Icelake which cannot cope with too rapid - * transitions. + * Wa_22016122933: always return I915_MAP_WC for Media + * version 13.0 when the object is on the Media GT */ - intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL); + if (i915_gem_object_is_lmem(obj) || intel_gt_needs_wa_22016122933(gt)) + return I915_MAP_WC; + if (HAS_LLC(gt->i915) || always_coherent) + return I915_MAP_WB; + else + return I915_MAP_WC; } -static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno) +bool intel_gt_needs_wa_22016122933(struct intel_gt *gt) { - u32 cur = intel_gt_tlb_seqno(gt); - - /* Only skip if a *full* TLB invalidate barrier has passed */ - return (s32)(cur - ALIGN(seqno, 2)) > 0; + return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == GT_MEDIA; } - -void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno) -{ - intel_wakeref_t wakeref; - - if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) - return; - - if (intel_gt_is_wedged(gt)) - return; - - if (tlb_seqno_passed(gt, seqno)) - return; - - with_intel_gt_pm_if_awake(gt, wakeref) { - mutex_lock(>->tlb.invalidate_lock); - if (tlb_seqno_passed(gt, seqno)) - goto unlock; - - mmio_invalidate_full(gt); - - write_seqcount_invalidate(>->tlb.seqno); -unlock: - mutex_unlock(>->tlb.invalidate_lock); - } -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftest_tlb.c" -#endif diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index d2f4fbde5f9f..2cac499d5aa3 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -13,6 +13,69 @@ struct drm_i915_private; struct drm_printer; +/* + * Check that the GT is a graphics GT and has an IP version within the + * specified range (inclusive). + */ +#define IS_GFX_GT_IP_RANGE(gt, from, until) ( \ + BUILD_BUG_ON_ZERO((from) < IP_VER(2, 0)) + \ + BUILD_BUG_ON_ZERO((until) < (from)) + \ + ((gt)->type != GT_MEDIA && \ + GRAPHICS_VER_FULL((gt)->i915) >= (from) && \ + GRAPHICS_VER_FULL((gt)->i915) <= (until))) + +/* + * Check that the GT is a media GT and has an IP version within the + * specified range (inclusive). + * + * Only usable on platforms with a standalone media design (i.e., IP version 13 + * and higher). + */ +#define IS_MEDIA_GT_IP_RANGE(gt, from, until) ( \ + BUILD_BUG_ON_ZERO((from) < IP_VER(13, 0)) + \ + BUILD_BUG_ON_ZERO((until) < (from)) + \ + ((gt) && (gt)->type == GT_MEDIA && \ + MEDIA_VER_FULL((gt)->i915) >= (from) && \ + MEDIA_VER_FULL((gt)->i915) <= (until))) + +/* + * Check that the GT is a graphics GT with a specific IP version and has + * a stepping in the range [from, until). The lower stepping bound is + * inclusive, the upper bound is exclusive. The most common use-case of this + * macro is for checking bounds for workarounds, which usually have a stepping + * ("from") at which the hardware issue is first present and another stepping + * ("until") at which a hardware fix is present and the software workaround is + * no longer necessary. E.g., + * + * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) + * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B1, STEP_FOREVER) + * + * "STEP_FOREVER" can be passed as "until" for workarounds that have no upper + * stepping bound for the specified IP version. + */ +#define IS_GFX_GT_IP_STEP(gt, ipver, from, until) ( \ + BUILD_BUG_ON_ZERO((until) <= (from)) + \ + (IS_GFX_GT_IP_RANGE((gt), (ipver), (ipver)) && \ + IS_GRAPHICS_STEP((gt)->i915, (from), (until)))) + +/* + * Check that the GT is a media GT with a specific IP version and has + * a stepping in the range [from, until). The lower stepping bound is + * inclusive, the upper bound is exclusive. The most common use-case of this + * macro is for checking bounds for workarounds, which usually have a stepping + * ("from") at which the hardware issue is first present and another stepping + * ("until") at which a hardware fix is present and the software workaround is + * no longer necessary. "STEP_FOREVER" can be passed as "until" for + * workarounds that have no upper stepping bound for the specified IP version. + * + * This macro may only be used to match on platforms that have a standalone + * media design (i.e., media version 13 or higher). + */ +#define IS_MEDIA_GT_IP_STEP(gt, ipver, from, until) ( \ + BUILD_BUG_ON_ZERO((until) <= (from)) + \ + (IS_MEDIA_GT_IP_RANGE((gt), (ipver), (ipver)) && \ + IS_MEDIA_STEP((gt)->i915, (from), (until)))) + #define GT_TRACE(gt, fmt, ...) do { \ const struct intel_gt *gt__ __maybe_unused = (gt); \ GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \ @@ -24,6 +87,8 @@ static inline bool gt_is_root(struct intel_gt *gt) return !gt->info.id; } +bool intel_gt_needs_wa_22016122933(struct intel_gt *gt); + static inline struct intel_gt *uc_to_gt(struct intel_uc *uc) { return container_of(uc, struct intel_gt, uc); @@ -107,16 +172,8 @@ void intel_gt_info_print(const struct intel_gt_info *info, void intel_gt_watchdog_work(struct work_struct *work); -static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt) -{ - return seqprop_sequence(>->tlb.seqno); -} - -static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt) -{ - return intel_gt_tlb_seqno(gt) | 1; -} - -void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno); +enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt, + struct drm_i915_gem_object *obj, + bool always_coherent); #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_defines.h b/drivers/gpu/drm/i915/gt/intel_gt_defines.h new file mode 100644 index 000000000000..5017788bac8f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_defines.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_GT_DEFINES__ +#define __INTEL_GT_DEFINES__ + +#define I915_MAX_GT 2 + +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 62fd00c9e519..77fb57223465 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -31,7 +31,7 @@ static u32 gen11_gt_engine_identity(struct intel_gt *gt, const unsigned int bank, const unsigned int bit) { - void __iomem * const regs = gt->uncore->regs; + void __iomem * const regs = intel_uncore_regs(gt->uncore); u32 timeout_ts; u32 ident; @@ -148,7 +148,7 @@ gen11_gt_identity_handler(struct intel_gt *gt, const u32 identity) static void gen11_gt_bank_handler(struct intel_gt *gt, const unsigned int bank) { - void __iomem * const regs = gt->uncore->regs; + void __iomem * const regs = intel_uncore_regs(gt->uncore); unsigned long intr_dw; unsigned int bit; @@ -183,7 +183,7 @@ void gen11_gt_irq_handler(struct intel_gt *gt, const u32 master_ctl) bool gen11_gt_reset_one_iir(struct intel_gt *gt, const unsigned int bank, const unsigned int bit) { - void __iomem * const regs = gt->uncore->regs; + void __iomem * const regs = intel_uncore_regs(gt->uncore); u32 dw; lockdep_assert_held(gt->irq_lock); @@ -404,7 +404,7 @@ void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir) void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl) { - void __iomem * const regs = gt->uncore->regs; + void __iomem * const regs = intel_uncore_regs(gt->uncore); u32 iir; if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 0b414eae1683..bf4a933de03a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -4,7 +4,7 @@ */ #include "i915_drv.h" - +#include "intel_gt.h" #include "intel_gt_mcr.h" #include "intel_gt_print.h" #include "intel_gt_regs.h" @@ -166,8 +166,8 @@ void intel_gt_mcr_init(struct intel_gt *gt) gt->steering_table[OADDRM] = xelpmp_oaddrm_steering_table; } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { /* Wa_14016747170 */ - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) fuse = REG_FIELD_GET(MTL_GT_L3_EXC_MASK, intel_uncore_read(gt->uncore, MTL_GT_ACTIVITY_FACTOR)); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 357e2f865727..f900cc68d6d9 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -290,7 +290,6 @@ static int mtl_drpc(struct seq_file *m) seq_puts(m, "RC6\n"); break; default: - MISSING_CASE(REG_FIELD_GET(MTL_CC_MASK, gt_core_status)); seq_puts(m, "Unknown\n"); break; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 718cb2c80f79..cca4bac8f8b0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -26,7 +26,7 @@ #define MTL_CAGF_MASK REG_GENMASK(8, 0) #define MTL_CC0 0x0 #define MTL_CC6 0x3 -#define MTL_CC_MASK REG_GENMASK(12, 9) +#define MTL_CC_MASK REG_GENMASK(10, 9) /* RPM unit config (Gen8+) */ #define RPM_CONFIG0 _MMIO(0xd00) @@ -164,6 +164,8 @@ #define GEN9_CSFE_CHICKEN1_RCS _MMIO(0x20d4) #define GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE (1 << 2) #define GEN11_ENABLE_32_PLANE_MODE (1 << 7) +#define GEN12_CS_DEBUG_MODE2 _MMIO(0x20d8) +#define INSTRUCTION_STATE_CACHE_INVALIDATE REG_BIT(6) #define GEN7_FF_SLICE_CS_CHICKEN1 _MMIO(0x20e0) #define GEN9_FFSC_PERCTX_PREEMPT_CTRL (1 << 14) @@ -332,9 +334,11 @@ #define GEN8_PRIVATE_PAT_HI _MMIO(0x40e0 + 4) #define GEN10_PAT_INDEX(index) _MMIO(0x40e0 + (index) * 4) #define BSD_HWS_PGA_GEN7 _MMIO(0x4180) -#define GEN12_GFX_CCS_AUX_NV _MMIO(0x4208) -#define GEN12_VD0_AUX_NV _MMIO(0x4218) -#define GEN12_VD1_AUX_NV _MMIO(0x4228) + +#define GEN12_CCS_AUX_INV _MMIO(0x4208) +#define GEN12_VD0_AUX_INV _MMIO(0x4218) +#define GEN12_VE0_AUX_INV _MMIO(0x4238) +#define GEN12_BCS0_AUX_INV _MMIO(0x4248) #define GEN8_RTCR _MMIO(0x4260) #define GEN8_M1TCR _MMIO(0x4264) @@ -342,14 +346,12 @@ #define GEN8_BTCR _MMIO(0x426c) #define GEN8_VTCR _MMIO(0x4270) -#define GEN12_VD2_AUX_NV _MMIO(0x4298) -#define GEN12_VD3_AUX_NV _MMIO(0x42a8) -#define GEN12_VE0_AUX_NV _MMIO(0x4238) - #define BLT_HWS_PGA_GEN7 _MMIO(0x4280) -#define GEN12_VE1_AUX_NV _MMIO(0x42b8) +#define GEN12_VD2_AUX_INV _MMIO(0x4298) +#define GEN12_CCS0_AUX_INV _MMIO(0x42c8) #define AUX_INV REG_BIT(0) + #define VEBOX_HWS_PGA_GEN7 _MMIO(0x4380) #define GEN12_AUX_ERR_DBG _MMIO(0x43f4) @@ -412,9 +414,6 @@ #define XEHP_CULLBIT1 MCR_REG(0x6100) -#define CHICKEN_RASTER_1 MCR_REG(0x6204) -#define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) - #define CHICKEN_RASTER_2 MCR_REG(0x6208) #define TBIMR_FAST_CLIP REG_BIT(5) @@ -1221,6 +1220,8 @@ #define XEHP_HDC_CHICKEN0 MCR_REG(0xe5f0) #define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11) +#define DIS_ATOMIC_CHAINING_TYPED_WRITES REG_BIT(3) + #define ICL_HDC_MODE MCR_REG(0xe5f4) #define EU_PERF_CNTL2 PERF_REG(0xe658) @@ -1231,6 +1232,7 @@ #define DISABLE_D8_D16_COASLESCE REG_BIT(30) #define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15) #define LSC_CHICKEN_BIT_0_UDW MCR_REG(0xe7c8 + 4) +#define UGM_FRAGMENT_THRESHOLD_TO_3 REG_BIT(58 - 32) #define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32) #define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32) #define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index ee2b44f896a2..f0dea54880af 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -701,6 +701,80 @@ static const struct attribute *media_perf_power_attrs[] = { }; static ssize_t +rps_up_threshold_pct_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); + struct intel_rps *rps = >->rps; + + return sysfs_emit(buf, "%u\n", intel_rps_get_up_threshold(rps)); +} + +static ssize_t +rps_up_threshold_pct_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); + struct intel_rps *rps = >->rps; + int ret; + u8 val; + + ret = kstrtou8(buf, 10, &val); + if (ret) + return ret; + + ret = intel_rps_set_up_threshold(rps, val); + + return ret == 0 ? count : ret; +} + +static struct kobj_attribute rps_up_threshold_pct = + __ATTR(rps_up_threshold_pct, + 0664, + rps_up_threshold_pct_show, + rps_up_threshold_pct_store); + +static ssize_t +rps_down_threshold_pct_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); + struct intel_rps *rps = >->rps; + + return sysfs_emit(buf, "%u\n", intel_rps_get_down_threshold(rps)); +} + +static ssize_t +rps_down_threshold_pct_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); + struct intel_rps *rps = >->rps; + int ret; + u8 val; + + ret = kstrtou8(buf, 10, &val); + if (ret) + return ret; + + ret = intel_rps_set_down_threshold(rps, val); + + return ret == 0 ? count : ret; +} + +static struct kobj_attribute rps_down_threshold_pct = + __ATTR(rps_down_threshold_pct, + 0664, + rps_down_threshold_pct_show, + rps_down_threshold_pct_store); + +static const struct attribute * const gen6_gt_rps_attrs[] = { + &rps_up_threshold_pct.attr, + &rps_down_threshold_pct.attr, + NULL +}; + +static ssize_t default_min_freq_mhz_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct intel_gt *gt = kobj_to_gt(kobj->parent); @@ -722,9 +796,37 @@ default_max_freq_mhz_show(struct kobject *kobj, struct kobj_attribute *attr, cha static struct kobj_attribute default_max_freq_mhz = __ATTR(rps_max_freq_mhz, 0444, default_max_freq_mhz_show, NULL); +static ssize_t +default_rps_up_threshold_pct_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct intel_gt *gt = kobj_to_gt(kobj->parent); + + return sysfs_emit(buf, "%u\n", gt->defaults.rps_up_threshold); +} + +static struct kobj_attribute default_rps_up_threshold_pct = +__ATTR(rps_up_threshold_pct, 0444, default_rps_up_threshold_pct_show, NULL); + +static ssize_t +default_rps_down_threshold_pct_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct intel_gt *gt = kobj_to_gt(kobj->parent); + + return sysfs_emit(buf, "%u\n", gt->defaults.rps_down_threshold); +} + +static struct kobj_attribute default_rps_down_threshold_pct = +__ATTR(rps_down_threshold_pct, 0444, default_rps_down_threshold_pct_show, NULL); + static const struct attribute * const rps_defaults_attrs[] = { &default_min_freq_mhz.attr, &default_max_freq_mhz.attr, + &default_rps_up_threshold_pct.attr, + &default_rps_down_threshold_pct.attr, NULL }; @@ -752,6 +854,12 @@ static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj) if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915)) ret = sysfs_create_file(kobj, vlv_attr); + if (is_object_gt(kobj) && !intel_uc_uses_guc_slpc(>->uc)) { + ret = sysfs_create_files(kobj, gen6_gt_rps_attrs); + if (ret) + return ret; + } + return ret; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index f08c2556aa25..def7dd0eb6f1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -83,6 +83,9 @@ enum intel_submission_method { struct gt_defaults { u32 min_freq; u32 max_freq; + + u8 rps_up_threshold; + u8 rps_down_threshold; }; enum intel_gt_type { @@ -306,4 +309,6 @@ enum intel_gt_scratch_field { INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256, }; +#define intel_gt_support_legacy_fencing(gt) ((gt)->ggtt->num_fences > 0) + #endif /* __INTEL_GT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 731d9f2bbc56..13944a14ea2d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -89,7 +89,7 @@ int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj) enum i915_map_type type; void *vaddr; - type = i915_coherent_map_type(vm->i915, obj, true); + type = intel_gt_coherent_map_type(vm->gt, obj, true); vaddr = i915_gem_object_pin_map_unlocked(obj, type); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); @@ -103,7 +103,7 @@ int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object enum i915_map_type type; void *vaddr; - type = i915_coherent_map_type(vm->i915, obj, true); + type = intel_gt_coherent_map_type(vm->gt, obj, true); vaddr = i915_gem_object_pin_map(obj, type); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 4d6296cdbcfd..346ec8ec2edd 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -35,9 +35,9 @@ #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) #if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT) -#define DBG(...) trace_printk(__VA_ARGS__) +#define GTT_TRACE(...) trace_printk(__VA_ARGS__) #else -#define DBG(...) +#define GTT_TRACE(...) #endif #define NALLOC 3 /* 1 normal, 1 for concurrent threads, 1 for preallocation */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index a4ec20aaafe2..eaf66d903166 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -845,6 +845,27 @@ lrc_setup_indirect_ctx(u32 *regs, lrc_ring_indirect_offset_default(engine) << 6; } +static bool ctx_needs_runalone(const struct intel_context *ce) +{ + struct i915_gem_context *gem_ctx; + bool ctx_is_protected = false; + + /* + * On MTL and newer platforms, protected contexts require setting + * the LRC run-alone bit or else the encryption will not happen. + */ + if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 70) && + (ce->engine->class == COMPUTE_CLASS || ce->engine->class == RENDER_CLASS)) { + rcu_read_lock(); + gem_ctx = rcu_dereference(ce->gem_context); + if (gem_ctx) + ctx_is_protected = gem_ctx->uses_protected_content; + rcu_read_unlock(); + } + + return ctx_is_protected; +} + static void init_common_regs(u32 * const regs, const struct intel_context *ce, const struct intel_engine_cs *engine, @@ -860,6 +881,8 @@ static void init_common_regs(u32 * const regs, if (GRAPHICS_VER(engine->i915) < 11) ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | CTX_CTRL_RS_CTX_ENABLE); + if (ctx_needs_runalone(ce)) + ctl |= _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_RUNALONE_MODE); regs[CTX_CONTEXT_CONTROL] = ctl; regs[CTX_TIMESTAMP] = ce->stats.runtime.last; @@ -1092,10 +1115,19 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) obj = i915_gem_object_create_lmem(engine->i915, context_size, I915_BO_ALLOC_PM_VOLATILE); - if (IS_ERR(obj)) + if (IS_ERR(obj)) { obj = i915_gem_object_create_shmem(engine->i915, context_size); - if (IS_ERR(obj)) - return ERR_CAST(obj); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + /* + * Wa_22016122933: For Media version 13.0, all Media GT shared + * memory needs to be mapped as WC on CPU side and UC (PAT + * index 2) on GPU side. + */ + if (intel_gt_needs_wa_22016122933(engine->gt)) + i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); + } vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) { @@ -1184,9 +1216,9 @@ lrc_pre_pin(struct intel_context *ce, GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); *vaddr = i915_gem_object_pin_map(ce->state->obj, - i915_coherent_map_type(ce->engine->i915, - ce->state->obj, - false) | + intel_gt_coherent_map_type(ce->engine->gt, + ce->state->obj, + false) | I915_MAP_OVERRIDE); return PTR_ERR_OR_ZERO(*vaddr); @@ -1308,29 +1340,6 @@ gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs) } /* - * On DG2 during context restore of a preempted context in GPGPU mode, - * RCS restore hang is detected. This is extremely timing dependent. - * To address this below sw wabb is implemented for DG2 A steppings. - */ -static u32 * -dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs) -{ - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG(ce->engine->mmio_base)); - *cs++ = 0x21; - - *cs++ = MI_LOAD_REGISTER_REG; - *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base)); - *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT1); - - *cs++ = MI_LOAD_REGISTER_REG; - *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base)); - *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT2); - - return cs; -} - -/* * The bspec's tuning guide asks us to program a vertical watermark value of * 0x3FF. However this register is not saved/restored properly by the * hardware, so we're required to apply the desired value via INDIRECT_CTX @@ -1348,30 +1357,34 @@ dg2_emit_draw_watermark_setting(u32 *cs) } static u32 * +gen12_invalidate_state_cache(u32 *cs) +{ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN12_CS_DEBUG_MODE2); + *cs++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE); + return cs; +} + +static u32 * gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) { cs = gen12_emit_timestamp_wa(ce, cs); cs = gen12_emit_cmd_buf_wa(ce, cs); cs = gen12_emit_restore_scratch(ce, cs); - /* Wa_22011450934:dg2 */ - if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(ce->engine->i915, G11, STEP_A0, STEP_B0)) - cs = dg2_emit_rcs_hang_wabb(ce, cs); - /* Wa_16013000631:dg2 */ - if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) || - IS_DG2_G11(ce->engine->i915)) + if (IS_DG2_G11(ce->engine->i915)) cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0); - /* hsdes: 1809175790 */ - if (!HAS_FLAT_CCS(ce->engine->i915)) - cs = gen12_emit_aux_table_inv(ce->engine->gt, - cs, GEN12_GFX_CCS_AUX_NV); + cs = gen12_emit_aux_table_inv(ce->engine, cs); + + /* Wa_18022495364 */ + if (IS_GFX_GT_IP_RANGE(ce->engine->gt, IP_VER(12, 0), IP_VER(12, 10))) + cs = gen12_invalidate_state_cache(cs); /* Wa_16014892111 */ - if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(ce->engine->i915, P, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) || IS_DG2(ce->engine->i915)) cs = dg2_emit_draw_watermark_setting(cs); @@ -1385,24 +1398,13 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) cs = gen12_emit_restore_scratch(ce, cs); /* Wa_16013000631:dg2 */ - if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) || - IS_DG2_G11(ce->engine->i915)) + if (IS_DG2_G11(ce->engine->i915)) if (ce->engine->class == COMPUTE_CLASS) cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0); - /* hsdes: 1809175790 */ - if (!HAS_FLAT_CCS(ce->engine->i915)) { - if (ce->engine->class == VIDEO_DECODE_CLASS) - cs = gen12_emit_aux_table_inv(ce->engine->gt, - cs, GEN12_VD0_AUX_NV); - else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS) - cs = gen12_emit_aux_table_inv(ce->engine->gt, - cs, GEN12_VE0_AUX_NV); - } - - return cs; + return gen12_emit_aux_table_inv(ce->engine, cs); } static void diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 6023288b0e2d..576e5ef0289b 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -366,7 +366,7 @@ static int emit_pte(struct i915_request *rq, u64 offset, int length) { - bool has_64K_pages = HAS_64K_PAGES(rq->engine->i915); + bool has_64K_pages = HAS_64K_PAGES(rq->i915); const u64 encode = rq->context->vm->pte_encode(0, pat_index, is_lmem ? PTE_LM : 0); struct intel_ring *ring = rq->ring; @@ -375,7 +375,7 @@ static int emit_pte(struct i915_request *rq, u32 page_size; u32 *hdr, *cs; - GEM_BUG_ON(GRAPHICS_VER(rq->engine->i915) < 8); + GEM_BUG_ON(GRAPHICS_VER(rq->i915) < 8); page_size = I915_GTT_PAGE_SIZE; dword_length = 0x400; @@ -531,7 +531,7 @@ static int emit_copy_ccs(struct i915_request *rq, u32 dst_offset, u8 dst_access, u32 src_offset, u8 src_access, int size) { - struct drm_i915_private *i915 = rq->engine->i915; + struct drm_i915_private *i915 = rq->i915; int mocs = rq->engine->gt->mocs.uc_index << 1; u32 num_ccs_blks; u32 *cs; @@ -581,7 +581,7 @@ static int emit_copy_ccs(struct i915_request *rq, static int emit_copy(struct i915_request *rq, u32 dst_offset, u32 src_offset, int size) { - const int ver = GRAPHICS_VER(rq->engine->i915); + const int ver = GRAPHICS_VER(rq->i915); u32 instance = rq->engine->instance; u32 *cs; @@ -917,7 +917,7 @@ out_ce: static int emit_clear(struct i915_request *rq, u32 offset, int size, u32 value, bool is_lmem) { - struct drm_i915_private *i915 = rq->engine->i915; + struct drm_i915_private *i915 = rq->i915; int mocs = rq->engine->gt->mocs.uc_index << 1; const int ver = GRAPHICS_VER(i915); int ring_sz; diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 2c014407225c..07269ff3be13 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -404,18 +404,6 @@ static const struct drm_i915_mocs_entry dg2_mocs_table[] = { MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), }; -static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = { - /* Wa_14011441408: Set Go to Memory for MOCS#0 */ - MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), - /* UC - Coherent; GO:Memory */ - MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), - /* UC - Non-Coherent; GO:Memory */ - MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), - - /* WB - LC */ - MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), -}; - static const struct drm_i915_mocs_entry pvc_mocs_table[] = { /* Error */ MOCS_ENTRY(0, 0, L3_3_WB), @@ -507,7 +495,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, memset(table, 0, sizeof(struct drm_i915_mocs_table)); table->unused_entries_index = I915_MOCS_PTE; - if (IS_METEORLAKE(i915)) { + if (IS_GFX_GT_IP_RANGE(&i915->gt0, IP_VER(12, 70), IP_VER(12, 71))) { table->size = ARRAY_SIZE(mtl_mocs_table); table->table = mtl_mocs_table; table->n_entries = MTL_NUM_MOCS_ENTRIES; @@ -521,13 +509,8 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, table->wb_index = 2; table->unused_entries_index = 2; } else if (IS_DG2(i915)) { - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { - table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax); - table->table = dg2_mocs_table_g10_ax; - } else { - table->size = ARRAY_SIZE(dg2_mocs_table); - table->table = dg2_mocs_table; - } + table->size = ARRAY_SIZE(dg2_mocs_table); + table->table = dg2_mocs_table; table->uc_index = 1; table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->unused_entries_index = 3; diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index 436756bfbb1a..d07a4f97b943 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -8,6 +8,7 @@ #include "gem/i915_gem_lmem.h" #include "i915_trace.h" +#include "intel_gt.h" #include "intel_gtt.h" #include "gen6_ppgtt.h" #include "gen8_ppgtt.h" @@ -210,8 +211,7 @@ void ppgtt_unbind_vma(struct i915_address_space *vm, return; vm->clear_range(vm, vma_res->start, vma_res->vma_size); - if (vma_res->tlb) - vma_invalidate_tlb(vm, vma_res->tlb); + vma_invalidate_tlb(vm, vma_res->tlb); } static unsigned long pd_count(u64 size, int shift) diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 58bb1c55294c..8b67abd720be 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -118,14 +118,12 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) GEN6_RC_CTL_EI_MODE(1); /* - * Wa_16011777198 and BSpec 52698 - Render powergating must be off. + * BSpec 52698 - Render powergating must be off. * FIXME BSpec is outdated, disabling powergating for MTL is just * temporary wa and should be removed after fixing real cause * of forcewake timeouts. */ - if (IS_METEORLAKE(gt->i915) || - IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) pg_enable = GEN9_MEDIA_PG_ENABLE | GEN11_MEDIA_SAMPLER_PG_ENABLE; @@ -526,8 +524,7 @@ static bool rc6_supported(struct intel_rc6 *rc6) return false; } - if (IS_MTL_MEDIA_STEP(gt->i915, STEP_A0, STEP_B0) && - gt->type == GT_MEDIA) { + if (IS_MEDIA_GT_IP_STEP(gt, IP_VER(13, 0), STEP_A0, STEP_B0)) { drm_notice(&i915->drm, "Media RC6 disabled on A step\n"); return false; diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index 2a3217e2890f..f8512aee58a8 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -220,7 +220,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) resource_size_t lmem_range; u64 tile_stolen, flat_ccs_base; - lmem_range = intel_gt_mcr_read_any(&i915->gt0, XEHP_TILE0_ADDR_RANGE) & 0xFFFF; + lmem_range = intel_gt_mcr_read_any(to_gt(i915), XEHP_TILE0_ADDR_RANGE) & 0xFFFF; lmem_size = lmem_range >> XEHP_TILE_LMEM_RANGE_SHIFT; lmem_size *= SZ_1G; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index e2152f75ba2e..a21e939fdbf6 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -35,9 +35,6 @@ #define RESET_MAX_RETRIES 3 -/* XXX How to handle concurrent GGTT updates using tiling registers? */ -#define RESET_UNDER_STOP_MACHINE 0 - static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) { struct drm_i915_file_private *file_priv = ctx->file_priv; @@ -164,16 +161,16 @@ static int i915_do_reset(struct intel_gt *gt, struct pci_dev *pdev = to_pci_dev(gt->i915->drm.dev); int err; - /* Assert reset for at least 20 usec, and wait for acknowledgement. */ + /* Assert reset for at least 50 usec, and wait for acknowledgement. */ pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); udelay(50); - err = wait_for_atomic(i915_in_reset(pdev), 50); + err = _wait_for_atomic(i915_in_reset(pdev), 50000, 0); /* Clear the reset request. */ pci_write_config_byte(pdev, I915_GDRST, 0); udelay(50); if (!err) - err = wait_for_atomic(!i915_in_reset(pdev), 50); + err = _wait_for_atomic(!i915_in_reset(pdev), 50000, 0); return err; } @@ -193,7 +190,7 @@ static int g33_do_reset(struct intel_gt *gt, struct pci_dev *pdev = to_pci_dev(gt->i915->drm.dev); pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); - return wait_for_atomic(g4x_reset_complete(pdev), 50); + return _wait_for_atomic(g4x_reset_complete(pdev), 50000, 0); } static int g4x_do_reset(struct intel_gt *gt, @@ -210,7 +207,7 @@ static int g4x_do_reset(struct intel_gt *gt, pci_write_config_byte(pdev, I915_GDRST, GRDOM_MEDIA | GRDOM_RESET_ENABLE); - ret = wait_for_atomic(g4x_reset_complete(pdev), 50); + ret = _wait_for_atomic(g4x_reset_complete(pdev), 50000, 0); if (ret) { GT_TRACE(gt, "Wait for media reset failed\n"); goto out; @@ -218,7 +215,7 @@ static int g4x_do_reset(struct intel_gt *gt, pci_write_config_byte(pdev, I915_GDRST, GRDOM_RENDER | GRDOM_RESET_ENABLE); - ret = wait_for_atomic(g4x_reset_complete(pdev), 50); + ret = _wait_for_atomic(g4x_reset_complete(pdev), 50000, 0); if (ret) { GT_TRACE(gt, "Wait for render reset failed\n"); goto out; @@ -708,7 +705,7 @@ static int __reset_guc(struct intel_gt *gt) static bool needs_wa_14015076503(struct intel_gt *gt, intel_engine_mask_t engine_mask) { - if (!IS_METEORLAKE(gt->i915) || !HAS_ENGINE(gt, GSC0)) + if (MEDIA_VER_FULL(gt->i915) != IP_VER(13, 0) || !HAS_ENGINE(gt, GSC0)) return false; if (!__HAS_ENGINE(engine_mask, GSC0)) @@ -788,9 +785,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) reset_mask = wa_14015076503_start(gt, engine_mask, !retry); GT_TRACE(gt, "engine_mask=%x\n", reset_mask); - preempt_disable(); ret = reset(gt, reset_mask, retry); - preempt_enable(); wa_14015076503_end(gt, reset_mask); } @@ -1635,6 +1630,24 @@ void __intel_fini_wedge(struct intel_wedge_me *w) w->gt = NULL; } +/* + * Wa_22011802037 requires that we (or the GuC) ensure that no command + * streamers are executing MI_FORCE_WAKE while an engine reset is initiated. + */ +bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt) +{ + if (GRAPHICS_VER(gt->i915) < 11) + return false; + + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0)) + return true; + + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) + return false; + + return true; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_reset.c" #include "selftest_hangcheck.c" diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index 25c975b6e8fc..f615b30b81c5 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -78,4 +78,6 @@ void __intel_fini_wedge(struct intel_wedge_me *w); bool intel_has_gpu_reset(const struct intel_gt *gt); bool intel_has_reset_engine(const struct intel_gt *gt); +bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt); + #endif /* I915_RESET_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index fb99143be98e..59da4b7bd262 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -13,6 +13,7 @@ #include "intel_engine_regs.h" #include "intel_gpu_commands.h" #include "intel_ring.h" +#include "intel_gt.h" #include "intel_timeline.h" unsigned int intel_ring_update_space(struct intel_ring *ring) @@ -56,7 +57,7 @@ int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww) if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915)) { addr = (void __force *)i915_vma_pin_iomap(vma); } else { - int type = i915_coherent_map_type(vma->vm->i915, vma->obj, false); + int type = intel_gt_coherent_map_type(vma->vm->gt, vma->obj, false); addr = i915_gem_object_pin_map(vma->obj, type); } diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 3fd795c3263f..92085ffd23de 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -805,7 +805,7 @@ static int mi_set_context(struct i915_request *rq, static int remap_l3_slice(struct i915_request *rq, int slice) { #define L3LOG_DW (GEN7_L3LOG_SIZE / sizeof(u32)) - u32 *cs, *remap_info = rq->engine->i915->l3_parity.remap_info[slice]; + u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice]; int i; if (!remap_info) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index e92e626d4994..4feef874e6d6 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -16,7 +16,9 @@ #include "intel_gt.h" #include "intel_gt_clock_utils.h" #include "intel_gt_irq.h" +#include "intel_gt_pm.h" #include "intel_gt_pm_irq.h" +#include "intel_gt_print.h" #include "intel_gt_regs.h" #include "intel_mchbar_regs.h" #include "intel_pcode.h" @@ -672,7 +674,6 @@ static void rps_set_power(struct intel_rps *rps, int new_power) { struct intel_gt *gt = rps_to_gt(rps); struct intel_uncore *uncore = gt->uncore; - u32 threshold_up = 0, threshold_down = 0; /* in % */ u32 ei_up = 0, ei_down = 0; lockdep_assert_held(&rps->power.mutex); @@ -680,9 +681,6 @@ static void rps_set_power(struct intel_rps *rps, int new_power) if (new_power == rps->power.mode) return; - threshold_up = 95; - threshold_down = 85; - /* Note the units here are not exactly 1us, but 1280ns. */ switch (new_power) { case LOW_POWER: @@ -709,17 +707,22 @@ static void rps_set_power(struct intel_rps *rps, int new_power) GT_TRACE(gt, "changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n", - new_power, threshold_up, ei_up, threshold_down, ei_down); + new_power, + rps->power.up_threshold, ei_up, + rps->power.down_threshold, ei_down); set(uncore, GEN6_RP_UP_EI, intel_gt_ns_to_pm_interval(gt, ei_up * 1000)); set(uncore, GEN6_RP_UP_THRESHOLD, - intel_gt_ns_to_pm_interval(gt, ei_up * threshold_up * 10)); + intel_gt_ns_to_pm_interval(gt, + ei_up * rps->power.up_threshold * 10)); set(uncore, GEN6_RP_DOWN_EI, intel_gt_ns_to_pm_interval(gt, ei_down * 1000)); set(uncore, GEN6_RP_DOWN_THRESHOLD, - intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10)); + intel_gt_ns_to_pm_interval(gt, + ei_down * + rps->power.down_threshold * 10)); set(uncore, GEN6_RP_CONTROL, (GRAPHICS_VER(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | @@ -731,8 +734,6 @@ static void rps_set_power(struct intel_rps *rps, int new_power) skip_hw_write: rps->power.mode = new_power; - rps->power.up_threshold = threshold_up; - rps->power.down_threshold = threshold_down; } static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val) @@ -1160,7 +1161,7 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c { struct drm_i915_private *i915 = rps_to_i915(rps); - if (IS_METEORLAKE(i915)) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) return mtl_get_freq_caps(rps, caps); else return __gen6_rps_get_freq_caps(rps, caps); @@ -1559,10 +1560,12 @@ void intel_rps_enable(struct intel_rps *rps) return; GT_TRACE(rps_to_gt(rps), - "min:%x, max:%x, freq:[%d, %d]\n", + "min:%x, max:%x, freq:[%d, %d], thresholds:[%u, %u]\n", rps->min_freq, rps->max_freq, intel_gpu_freq(rps, rps->min_freq), - intel_gpu_freq(rps, rps->max_freq)); + intel_gpu_freq(rps, rps->max_freq), + rps->power.up_threshold, + rps->power.down_threshold); GEM_BUG_ON(rps->max_freq < rps->min_freq); GEM_BUG_ON(rps->idle_freq > rps->max_freq); @@ -2015,6 +2018,12 @@ void intel_rps_init(struct intel_rps *rps) } } + /* Set default thresholds in % */ + rps->power.up_threshold = 95; + rps_to_gt(rps)->defaults.rps_up_threshold = rps->power.up_threshold; + rps->power.down_threshold = 85; + rps_to_gt(rps)->defaults.rps_down_threshold = rps->power.down_threshold; + /* Finally allow us to boost to max by default */ rps->boost_freq = rps->max_freq; rps->idle_freq = rps->min_freq; @@ -2569,6 +2578,58 @@ int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val) return set_min_freq(rps, val); } +u8 intel_rps_get_up_threshold(struct intel_rps *rps) +{ + return rps->power.up_threshold; +} + +static int rps_set_threshold(struct intel_rps *rps, u8 *threshold, u8 val) +{ + int ret; + + if (val > 100) + return -EINVAL; + + ret = mutex_lock_interruptible(&rps->lock); + if (ret) + return ret; + + if (*threshold == val) + goto out_unlock; + + *threshold = val; + + /* Force reset. */ + rps->last_freq = -1; + mutex_lock(&rps->power.mutex); + rps->power.mode = -1; + mutex_unlock(&rps->power.mutex); + + intel_rps_set(rps, clamp(rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit)); + +out_unlock: + mutex_unlock(&rps->lock); + + return ret; +} + +int intel_rps_set_up_threshold(struct intel_rps *rps, u8 threshold) +{ + return rps_set_threshold(rps, &rps->power.up_threshold, threshold); +} + +u8 intel_rps_get_down_threshold(struct intel_rps *rps) +{ + return rps->power.down_threshold; +} + +int intel_rps_set_down_threshold(struct intel_rps *rps, u8 threshold) +{ + return rps_set_threshold(rps, &rps->power.down_threshold, threshold); +} + static void intel_rps_set_manual(struct intel_rps *rps, bool enable) { struct intel_uncore *uncore = rps_to_uncore(rps); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index a3fa987aa91f..92fb01f5a452 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -37,6 +37,10 @@ void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive); int intel_gpu_freq(struct intel_rps *rps, int val); int intel_freq_opcode(struct intel_rps *rps, int val); +u8 intel_rps_get_up_threshold(struct intel_rps *rps); +int intel_rps_set_up_threshold(struct intel_rps *rps, u8 threshold); +u8 intel_rps_get_down_threshold(struct intel_rps *rps); +int intel_rps_set_down_threshold(struct intel_rps *rps, u8 threshold); u32 intel_rps_read_actual_frequency(struct intel_rps *rps); u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps); u32 intel_rps_get_requested_frequency(struct intel_rps *rps); diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.c b/drivers/gpu/drm/i915/gt/intel_sa_media.c index e8f3d18c12b8..8c1dbcbcbc4f 100644 --- a/drivers/gpu/drm/i915/gt/intel_sa_media.c +++ b/drivers/gpu/drm/i915/gt/intel_sa_media.c @@ -29,7 +29,7 @@ int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t phys_addr, * Standalone media shares the general MMIO space with the primary * GT. We'll re-use the primary GT's mapping. */ - uncore->regs = i915->uncore.regs; + uncore->regs = intel_uncore_regs(&i915->uncore); if (drm_WARN_ON(&i915->drm, uncore->regs == NULL)) return -EIO; diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 1141f875f5bd..f602895f6d0d 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -302,7 +302,7 @@ static void gen11_sseu_info_init(struct intel_gt *gt) u8 eu_en; u8 s_en; - if (IS_JSL_EHL(gt->i915)) + if (IS_JASPERLAKE(gt->i915) || IS_ELKHARTLAKE(gt->i915)) intel_sseu_set_info(sseu, 1, 4, 8); else intel_sseu_set_info(sseu, 1, 8, 8); diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c b/drivers/gpu/drm/i915/gt/intel_tlb.c new file mode 100644 index 000000000000..139608c30d97 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_tlb.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_perf_oa_regs.h" +#include "intel_engine_pm.h" +#include "intel_gt.h" +#include "intel_gt_mcr.h" +#include "intel_gt_pm.h" +#include "intel_gt_print.h" +#include "intel_gt_regs.h" +#include "intel_tlb.h" + +/* + * HW architecture suggest typical invalidation time at 40us, + * with pessimistic cases up to 100us and a recommendation to + * cap at 1ms. We go a bit higher just in case. + */ +#define TLB_INVAL_TIMEOUT_US 100 +#define TLB_INVAL_TIMEOUT_MS 4 + +/* + * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets + * but are now considered MCR registers. Since they exist within a GAM range, + * the primary instance of the register rolls up the status from each unit. + */ +static int wait_for_invalidate(struct intel_engine_cs *engine) +{ + if (engine->tlb_inv.mcr) + return intel_gt_mcr_wait_for_reg(engine->gt, + engine->tlb_inv.reg.mcr_reg, + engine->tlb_inv.done, + 0, + TLB_INVAL_TIMEOUT_US, + TLB_INVAL_TIMEOUT_MS); + else + return __intel_wait_for_register_fw(engine->gt->uncore, + engine->tlb_inv.reg.reg, + engine->tlb_inv.done, + 0, + TLB_INVAL_TIMEOUT_US, + TLB_INVAL_TIMEOUT_MS, + NULL); +} + +static void mmio_invalidate_full(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_engine_cs *engine; + intel_engine_mask_t awake, tmp; + enum intel_engine_id id; + unsigned long flags; + + if (GRAPHICS_VER(i915) < 8) + return; + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + intel_gt_mcr_lock(gt, &flags); + spin_lock(&uncore->lock); /* serialise invalidate with GT reset */ + + awake = 0; + for_each_engine(engine, gt, id) { + if (!intel_engine_pm_is_awake(engine)) + continue; + + if (engine->tlb_inv.mcr) + intel_gt_mcr_multicast_write_fw(gt, + engine->tlb_inv.reg.mcr_reg, + engine->tlb_inv.request); + else + intel_uncore_write_fw(uncore, + engine->tlb_inv.reg.reg, + engine->tlb_inv.request); + + awake |= engine->mask; + } + + GT_TRACE(gt, "invalidated engines %08x\n", awake); + + /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */ + if (awake && + (IS_TIGERLAKE(i915) || + IS_DG1(i915) || + IS_ROCKETLAKE(i915) || + IS_ALDERLAKE_S(i915) || + IS_ALDERLAKE_P(i915))) + intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1); + + spin_unlock(&uncore->lock); + intel_gt_mcr_unlock(gt, flags); + + for_each_engine_masked(engine, gt, awake, tmp) { + if (wait_for_invalidate(engine)) + gt_err_ratelimited(gt, + "%s TLB invalidation did not complete in %ums!\n", + engine->name, TLB_INVAL_TIMEOUT_MS); + } + + /* + * Use delayed put since a) we mostly expect a flurry of TLB + * invalidations so it is good to avoid paying the forcewake cost and + * b) it works around a bug in Icelake which cannot cope with too rapid + * transitions. + */ + intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL); +} + +static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno) +{ + u32 cur = intel_gt_tlb_seqno(gt); + + /* Only skip if a *full* TLB invalidate barrier has passed */ + return (s32)(cur - ALIGN(seqno, 2)) > 0; +} + +void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno) +{ + intel_wakeref_t wakeref; + + if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) + return; + + if (intel_gt_is_wedged(gt)) + return; + + if (tlb_seqno_passed(gt, seqno)) + return; + + with_intel_gt_pm_if_awake(gt, wakeref) { + mutex_lock(>->tlb.invalidate_lock); + if (tlb_seqno_passed(gt, seqno)) + goto unlock; + + mmio_invalidate_full(gt); + + write_seqcount_invalidate(>->tlb.seqno); +unlock: + mutex_unlock(>->tlb.invalidate_lock); + } +} + +void intel_gt_init_tlb(struct intel_gt *gt) +{ + mutex_init(>->tlb.invalidate_lock); + seqcount_mutex_init(>->tlb.seqno, >->tlb.invalidate_lock); +} + +void intel_gt_fini_tlb(struct intel_gt *gt) +{ + mutex_destroy(>->tlb.invalidate_lock); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_tlb.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.h b/drivers/gpu/drm/i915/gt/intel_tlb.h new file mode 100644 index 000000000000..337327af92ac --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_tlb.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef INTEL_TLB_H +#define INTEL_TLB_H + +#include <linux/seqlock.h> +#include <linux/types.h> + +#include "intel_gt_types.h" + +void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno); + +void intel_gt_init_tlb(struct intel_gt *gt); +void intel_gt_fini_tlb(struct intel_gt *gt); + +static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt) +{ + return seqprop_sequence(>->tlb.seqno); +} + +static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt) +{ + return intel_gt_tlb_seqno(gt) | 1; +} + +#endif /* INTEL_TLB_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 4d2dece96011..b86a10b1f534 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -123,6 +123,22 @@ static void wa_init_finish(struct i915_wa_list *wal) wal->wa_count, wal->name, wal->engine_name); } +static enum forcewake_domains +wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal) +{ + enum forcewake_domains fw = 0; + struct i915_wa *wa; + unsigned int i; + + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) + fw |= intel_uncore_forcewake_for_reg(uncore, + wa->reg, + FW_REG_READ | + FW_REG_WRITE); + + return fw; +} + static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) { unsigned int addr = i915_mmio_reg_offset(wa->reg); @@ -225,13 +241,13 @@ static void wa_mcr_add(struct i915_wa_list *wal, i915_mcr_reg_t reg, static void wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) { - wa_add(wal, reg, clear, set, clear, false); + wa_add(wal, reg, clear, set, clear | set, false); } static void wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clear, u32 set) { - wa_mcr_add(wal, reg, clear, set, clear, false); + wa_mcr_add(wal, reg, clear, set, clear | set, false); } static void @@ -404,7 +420,7 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine, /* WaForceContextSaveRestoreNonCoherent:bdw */ HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ - (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); + (IS_BROADWELL_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); } static void chv_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -584,7 +600,7 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine, gen9_ctx_workarounds_init(engine, wal); /* WaToEnableHwFixForPushConstHWBug:kbl */ - if (IS_KBL_GRAPHICS_STEP(i915, STEP_C0, STEP_FOREVER)) + if (IS_KABYLAKE(i915) && IS_GRAPHICS_STEP(i915, STEP_C0, STEP_FOREVER)) wa_masked_en(wal, COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); @@ -621,10 +637,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { /* Wa_1406697149 (WaDisableBankHangMode:icl) */ - wa_write(wal, - GEN8_L3CNTLREG, - intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) | - GEN8_ERRDETBCTRL); + wa_write(wal, GEN8_L3CNTLREG, GEN8_ERRDETBCTRL); /* WaForceEnableNonCoherent:icl * This is not the same workaround as in early Gen9 platforms, where @@ -653,7 +666,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_1604278689:icl,ehl */ wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID); wa_write_clr_set(wal, IVB_FBC_RT_BASE_UPPER, - 0, /* write-only register; skip validation */ + 0, 0xFFFFFFFF); /* Wa_1406306137:icl,ehl */ @@ -670,38 +683,8 @@ static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine, wa_mcr_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP); wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)); - wa_mcr_add(wal, - XEHP_FF_MODE2, - FF_MODE2_TDS_TIMER_MASK, - FF_MODE2_TDS_TIMER_128, - 0, false); -} - -/* - * These settings aren't actually workarounds, but general tuning settings that - * need to be programmed on several platforms. - */ -static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine, - struct i915_wa_list *wal) -{ - /* - * Although some platforms refer to it as Wa_1604555607, we need to - * program it even on those that don't explicitly list that - * workaround. - * - * Note that the programming of this register is further modified - * according to the FF_MODE2 guidance given by Wa_1608008084:gen12. - * Wa_1608008084 tells us the FF_MODE2 register will return the wrong - * value when read. The default value for this register is zero for all - * fields and there are no bit masks. So instead of doing a RMW we - * should just write TDS timer value. For the same reason read - * verification is ignored. - */ - wa_add(wal, - GEN12_FF_MODE2, - FF_MODE2_TDS_TIMER_MASK, - FF_MODE2_TDS_TIMER_128, - 0, false); + wa_mcr_write_clr_set(wal, XEHP_FF_MODE2, FF_MODE2_TDS_TIMER_MASK, + FF_MODE2_TDS_TIMER_128); } static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -709,8 +692,6 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, { struct drm_i915_private *i915 = engine->i915; - gen12_ctx_gt_tuning_init(engine, wal); - /* * Wa_1409142259:tgl,dg1,adl-p * Wa_1409347922:tgl,dg1,adl-p @@ -732,15 +713,27 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); /* - * Wa_16011163337 + * Wa_16011163337 - GS_TIMER + * + * TDS_TIMER: Although some platforms refer to it as Wa_1604555607, we + * need to program it even on those that don't explicitly list that + * workaround. * - * Like in gen12_ctx_gt_tuning_init(), read verification is ignored due - * to Wa_1608008084. + * Note that the programming of GEN12_FF_MODE2 is further modified + * according to the FF_MODE2 guidance given by Wa_1608008084. + * Wa_1608008084 tells us the FF_MODE2 register will return the wrong + * value when read from the CPU. + * + * The default value for this register is zero for all fields. + * So instead of doing a RMW we should just write the desired values + * for TDS and GS timers. Note that since the readback can't be trusted, + * the clear mask is just set to ~0 to make sure other bits are not + * inadvertently set. For the same reason read verification is ignored. */ wa_add(wal, GEN12_FF_MODE2, - FF_MODE2_GS_TIMER_MASK, - FF_MODE2_GS_TIMER_224, + ~0, + FF_MODE2_TDS_TIMER_128 | FF_MODE2_GS_TIMER_224, 0, false); if (!IS_DG1(i915)) { @@ -771,68 +764,41 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, { dg2_ctx_gt_tuning_init(engine, wal); - /* Wa_16011186671:dg2_g11 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { - wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH); - wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE); - } - - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { - /* Wa_14010469329:dg2_g10 */ - wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, - XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE); - - /* - * Wa_22010465075:dg2_g10 - * Wa_22010613112:dg2_g10 - * Wa_14010698770:dg2_g10 - */ - wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, - GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); - } - /* Wa_16013271637:dg2 */ wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1, MSC_MSAA_REODER_BUF_BYPASS_DISABLE); /* Wa_14014947963:dg2 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) - wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); + wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); /* Wa_18018764978:dg2 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_C0, STEP_FOREVER) || - IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) - wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL); - - /* Wa_15010599737:dg2 */ - wa_mcr_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN); + wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL); /* Wa_18019271663:dg2 */ wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE); } -static void mtl_ctx_gt_tuning_init(struct intel_engine_cs *engine, - struct i915_wa_list *wal) +static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { - struct drm_i915_private *i915 = engine->i915; + struct intel_gt *gt = engine->gt; dg2_ctx_gt_tuning_init(engine, wal); - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER)) wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false); } -static void mtl_ctx_workarounds_init(struct intel_engine_cs *engine, - struct i915_wa_list *wal) +static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { - struct drm_i915_private *i915 = engine->i915; + struct intel_gt *gt = engine->gt; - mtl_ctx_gt_tuning_init(engine, wal); + xelpg_ctx_gt_tuning_init(engine, wal); - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { /* Wa_14014947963 */ wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); @@ -938,8 +904,8 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, if (engine->class != RENDER_CLASS) goto done; - if (IS_METEORLAKE(i915)) - mtl_ctx_workarounds_init(engine, wal); + if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71))) + xelpg_ctx_workarounds_init(engine, wal); else if (IS_PONTEVECCHIO(i915)) ; /* noop; none at this time */ else if (IS_DG2(i915)) @@ -987,6 +953,9 @@ void intel_engine_init_ctx_wa(struct intel_engine_cs *engine) int intel_engine_emit_ctx_wa(struct i915_request *rq) { struct i915_wa_list *wal = &rq->engine->ctx_wa_list; + struct intel_uncore *uncore = rq->engine->uncore; + enum forcewake_domains fw; + unsigned long flags; struct i915_wa *wa; unsigned int i; u32 *cs; @@ -1003,13 +972,36 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) if (IS_ERR(cs)) return PTR_ERR(cs); + fw = wal_get_fw_for_rmw(uncore, wal); + + intel_gt_mcr_lock(wal->gt, &flags); + spin_lock(&uncore->lock); + intel_uncore_forcewake_get__locked(uncore, fw); + *cs++ = MI_LOAD_REGISTER_IMM(wal->count); for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { + u32 val; + + /* Skip reading the register if it's not really needed */ + if (wa->masked_reg || (wa->clr | wa->set) == U32_MAX) { + val = wa->set; + } else { + val = wa->is_mcr ? + intel_gt_mcr_read_any_fw(wal->gt, wa->mcr_reg) : + intel_uncore_read_fw(uncore, wa->reg); + val &= ~wa->clr; + val |= wa->set; + } + *cs++ = i915_mmio_reg_offset(wa->reg); - *cs++ = wa->set; + *cs++ = val; } *cs++ = MI_NOOP; + intel_uncore_forcewake_put__locked(uncore, fw); + spin_unlock(&uncore->lock); + intel_gt_mcr_unlock(wal->gt, flags); + intel_ring_advance(rq, cs); ret = rq->engine->emit_flush(rq, EMIT_BARRIER); @@ -1173,7 +1165,7 @@ skl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); /* WaInPlaceDecompressionHang:skl */ - if (IS_SKL_GRAPHICS_STEP(gt->i915, STEP_A0, STEP_H0)) + if (IS_SKYLAKE(gt->i915) && IS_GRAPHICS_STEP(gt->i915, STEP_A0, STEP_H0)) wa_write_or(wal, GEN9_GAMT_ECO_REG_RW_IA, GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); @@ -1185,7 +1177,7 @@ kbl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) gen9_gt_workarounds_init(gt, wal); /* WaDisableDynamicCreditSharing:kbl */ - if (IS_KBL_GRAPHICS_STEP(gt->i915, 0, STEP_C0)) + if (IS_KABYLAKE(gt->i915) && IS_GRAPHICS_STEP(gt->i915, 0, STEP_C0)) wa_write_or(wal, GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); @@ -1441,7 +1433,8 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) /* Wa_1607087056:icl,ehl,jsl */ if (IS_ICELAKE(i915) || - IS_JSL_EHL_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) + ((IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) && + IS_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))) wa_write_or(wal, GEN11_SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); @@ -1485,6 +1478,18 @@ gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) /* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */ wa_mcr_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); + + /* + * Wa_14015795083 + * + * Firmware on some gen12 platforms locks the MISCCPCTL register, + * preventing i915 from modifying it for this workaround. Skip the + * readback verification for this workaround on debug builds; if the + * workaround doesn't stick due to firmware behavior, it's not an error + * that we want CI to flag. + */ + wa_add(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE, + 0, 0, false); } static void @@ -1574,31 +1579,11 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) static void dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - struct intel_engine_cs *engine; - int id; - xehp_init_mcr(gt, wal); /* Wa_14011060649:dg2 */ wa_14011060649(gt, wal); - /* - * Although there are per-engine instances of these registers, - * they technically exist outside the engine itself and are not - * impacted by engine resets. Furthermore, they're part of the - * GuC blacklist so trying to treat them as engine workarounds - * will result in GuC initialization failure and a wedged GPU. - */ - for_each_engine(engine, gt, id) { - if (engine->class != VIDEO_DECODE_CLASS) - continue; - - /* Wa_16010515920:dg2_g10 */ - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) - wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base), - ALNUNIT_CLKGATE_DIS); - } - if (IS_DG2_G10(gt->i915)) { /* Wa_22010523718:dg2 */ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, @@ -1609,70 +1594,15 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) DSS_ROUTER_CLKGATE_DIS); } - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) { - /* Wa_14012362059:dg2 */ - wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB); - } - - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) { - /* Wa_14010948348:dg2_g10 */ - wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS); - - /* Wa_14011037102:dg2_g10 */ - wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS); - - /* Wa_14011371254:dg2_g10 */ - wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS); - - /* Wa_14011431319:dg2_g10 */ - wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS | - GAMTLBVDBOX7_CLKGATE_DIS | - GAMTLBVDBOX6_CLKGATE_DIS | - GAMTLBVDBOX5_CLKGATE_DIS | - GAMTLBVDBOX4_CLKGATE_DIS | - GAMTLBVDBOX3_CLKGATE_DIS | - GAMTLBVDBOX2_CLKGATE_DIS | - GAMTLBVDBOX1_CLKGATE_DIS | - GAMTLBVDBOX0_CLKGATE_DIS | - GAMTLBKCR_CLKGATE_DIS | - GAMTLBGUC_CLKGATE_DIS | - GAMTLBBLT_CLKGATE_DIS); - wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS | - GAMTLBGFXA1_CLKGATE_DIS | - GAMTLBCOMPA0_CLKGATE_DIS | - GAMTLBCOMPA1_CLKGATE_DIS | - GAMTLBCOMPB0_CLKGATE_DIS | - GAMTLBCOMPB1_CLKGATE_DIS | - GAMTLBCOMPC0_CLKGATE_DIS | - GAMTLBCOMPC1_CLKGATE_DIS | - GAMTLBCOMPD0_CLKGATE_DIS | - GAMTLBCOMPD1_CLKGATE_DIS | - GAMTLBMERT_CLKGATE_DIS | - GAMTLBVEBOX3_CLKGATE_DIS | - GAMTLBVEBOX2_CLKGATE_DIS | - GAMTLBVEBOX1_CLKGATE_DIS | - GAMTLBVEBOX0_CLKGATE_DIS); - - /* Wa_14010569222:dg2_g10 */ - wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, - GAMEDIA_CLKGATE_DIS); - - /* Wa_14011028019:dg2_g10 */ - wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS); - - /* Wa_14010680813:dg2_g10 */ - wa_mcr_write_or(wal, XEHP_GAMSTLB_CTRL, - CONTROL_BLOCK_CLKGATE_DIS | - EGRESS_BLOCK_CLKGATE_DIS | - TAG_BLOCK_CLKGATE_DIS); - } - /* Wa_14014830051:dg2 */ wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); - /* Wa_14015795083 */ - wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); + /* + * Wa_14015795083 + * Skip verification for possibly locked register. + */ + wa_add(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE, + 0, 0, false); /* Wa_18018781329 */ wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); @@ -1710,14 +1640,13 @@ static void xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { /* Wa_14018778641 / Wa_18018781329 */ - wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); /* Wa_22016670082 */ wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE); - if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { /* Wa_14014830051 */ wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); @@ -1743,8 +1672,6 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) * GT, the media GT's versions are regular singleton registers. */ wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, XELPMP_VDBX_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, XELPMP_VEBX_MOD_CTRL, FORCE_MISS_FTLB); debug_dump_steering(gt); } @@ -1762,10 +1689,8 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) */ static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal) { - if (IS_METEORLAKE(gt->i915)) { - if (gt->type != GT_MEDIA) - wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); - + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) { + wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS); } @@ -1789,15 +1714,15 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) gt_tuning_settings(gt, wal); if (gt->type == GT_MEDIA) { - if (MEDIA_VER(i915) >= 13) + if (MEDIA_VER_FULL(i915) == IP_VER(13, 0)) xelpmp_gt_workarounds_init(gt, wal); else - MISSING_CASE(MEDIA_VER(i915)); + MISSING_CASE(MEDIA_VER_FULL(i915)); return; } - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) xelpg_gt_workarounds_init(gt, wal); else if (IS_PONTEVECCHIO(i915)) pvc_gt_workarounds_init(gt, wal); @@ -1850,22 +1775,6 @@ void intel_gt_init_workarounds(struct intel_gt *gt) wa_init_finish(wal); } -static enum forcewake_domains -wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal) -{ - enum forcewake_domains fw = 0; - struct i915_wa *wa; - unsigned int i; - - for (i = 0, wa = wal->list; i < wal->count; i++, wa++) - fw |= intel_uncore_forcewake_for_reg(uncore, - wa->reg, - FW_REG_READ | - FW_REG_WRITE); - - return fw; -} - static bool wa_verify(struct intel_gt *gt, const struct i915_wa *wa, u32 cur, const char *name, const char *from) @@ -2229,29 +2138,10 @@ static void dg2_whitelist_build(struct intel_engine_cs *engine) switch (engine->class) { case RENDER_CLASS: - /* - * Wa_1507100340:dg2_g10 - * - * This covers 4 registers which are next to one another : - * - PS_INVOCATION_COUNT - * - PS_INVOCATION_COUNT_UDW - * - PS_DEPTH_COUNT - * - PS_DEPTH_COUNT_UDW - */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) - whitelist_reg_ext(w, PS_INVOCATION_COUNT, - RING_FORCE_TO_NONPRIV_ACCESS_RD | - RING_FORCE_TO_NONPRIV_RANGE_4); - /* Required by recommended tuning setting (not a workaround) */ whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3); break; - case COMPUTE_CLASS: - /* Wa_16011157294:dg2_g10 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) - whitelist_reg(w, GEN9_CTX_PREEMPT_REG); - break; default: break; } @@ -2281,7 +2171,7 @@ static void pvc_whitelist_build(struct intel_engine_cs *engine) blacklist_trtt(engine); } -static void mtl_whitelist_build(struct intel_engine_cs *engine) +static void xelpg_whitelist_build(struct intel_engine_cs *engine) { struct i915_wa_list *w = &engine->whitelist; @@ -2303,8 +2193,10 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) wa_init_start(w, engine->gt, "whitelist", engine->name); - if (IS_METEORLAKE(i915)) - mtl_whitelist_build(engine); + if (engine->gt->type == GT_MEDIA) + ; /* none yet */ + else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71))) + xelpg_whitelist_build(engine); else if (IS_PONTEVECCHIO(i915)) pvc_whitelist_build(engine); else if (IS_DG2(i915)) @@ -2402,62 +2294,35 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } } -static bool needs_wa_1308578152(struct intel_engine_cs *engine) -{ - return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >= - GEN_DSS_PER_GSLICE; -} - static void rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; + struct intel_gt *gt = engine->gt; - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { /* Wa_22014600077 */ wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, ENABLE_EU_COUNT_FOR_TDL_FLUSH); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(i915) || IS_DG2_G12(i915)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) || + IS_DG2(i915)) { /* Wa_1509727124 */ wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, SC_DISABLE_POWER_OPTIMIZATION_EBB); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(i915) || IS_DG2_G12(i915) || - IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_DG2(i915)) { /* Wa_22012856258 */ wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_READ_SUPPRESSION); } - if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { - /* Wa_14013392000:dg2_g11 */ - wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE); - } - - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { - /* Wa_14012419201:dg2 */ - wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, - GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX); - } - - /* Wa_1308578152:dg2_g10 when first gslice is fused off */ - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) && - needs_wa_1308578152(engine)) { - wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON, - GEN12_REPLAY_MODE_GRANULARITY); - } - - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(i915) || IS_DG2_G12(i915)) { + if (IS_DG2(i915)) { /* * Wa_22010960976:dg2 * Wa_14013347512:dg2 @@ -2466,34 +2331,15 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { - /* - * Wa_1608949956:dg2_g10 - * Wa_14010198302:dg2_g10 - */ - wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, - MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE); + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || + IS_DG2(i915)) { + /* Wa_14015150844 */ + wa_mcr_add(wal, XEHP_HDC_CHICKEN0, 0, + _MASKED_BIT_ENABLE(DIS_ATOMIC_CHAINING_TYPED_WRITES), + 0, true); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) - /* Wa_22010430635:dg2 */ - wa_mcr_masked_en(wal, - GEN9_ROW_CHICKEN4, - GEN12_DISABLE_GRF_CLEAR); - - /* Wa_14013202645:dg2 */ - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) - wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY); - - /* Wa_22012532006:dg2 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) - wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, - DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA); - - if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) || - IS_DG2_G10(i915)) { + if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) { /* Wa_22014600077:dg2 */ wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH), @@ -2501,6 +2347,19 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) true); } + if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || + IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { + /* + * Wa_1606700617:tgl,dg1,adl-p + * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p + * Wa_14010826681:tgl,dg1,rkl,adl-p + * Wa_18019627453:dg2 + */ + wa_masked_en(wal, + GEN9_CS_DEBUG_MODE1, + FF_DOP_CLOCK_GATE_DISABLE); + } + if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */ @@ -2514,19 +2373,11 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) */ wa_write_or(wal, GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE); - } - if (IS_ALDERLAKE_P(i915) || IS_DG2(i915) || IS_ALDERLAKE_S(i915) || - IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { - /* - * Wa_1606700617:tgl,dg1,adl-p - * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p - * Wa_14010826681:tgl,dg1,rkl,adl-p - * Wa_18019627453:dg2 - */ - wa_masked_en(wal, - GEN9_CS_DEBUG_MODE1, - FF_DOP_CLOCK_GATE_DISABLE); + /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */ + wa_mcr_masked_en(wal, + GEN10_SAMPLER_MODE, + ENABLE_SMALLPL); } if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || @@ -2553,14 +2404,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN8_RC_SEMA_IDLE_MSG_DISABLE); } - if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || - IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) { - /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */ - wa_mcr_masked_en(wal, - GEN10_SAMPLER_MODE, - ENABLE_SMALLPL); - } - if (GRAPHICS_VER(i915) == 11) { /* This is not an Wa. Enable for better image quality */ wa_masked_en(wal, @@ -2933,7 +2776,7 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) struct drm_i915_private *i915 = engine->i915; /* WaKBLVECSSemaphoreWaitPoll:kbl */ - if (IS_KBL_GRAPHICS_STEP(i915, STEP_A0, STEP_F0)) { + if (IS_KABYLAKE(i915) && IS_GRAPHICS_STEP(i915, STEP_A0, STEP_F0)) { wa_write(wal, RING_SEMA_WAIT_POLL(engine->mmio_base), 1); @@ -2962,10 +2805,12 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * function invoked by __intel_engine_init_ctx_wa(). */ static void -add_render_compute_tuning_settings(struct drm_i915_private *i915, +add_render_compute_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal) { - if (IS_METEORLAKE(i915) || IS_DG2(i915)) + struct drm_i915_private *i915 = gt->i915; + + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915)) wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); /* @@ -2994,8 +2839,9 @@ static void general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; + struct intel_gt *gt = engine->gt; - add_render_compute_tuning_settings(i915, wal); + add_render_compute_tuning_settings(gt, wal); if (GRAPHICS_VER(i915) >= 11) { /* This is not a Wa (although referred to as @@ -3016,13 +2862,13 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li GEN11_INDIRECT_STATE_BASE_ADDR_OVERRIDE); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER)) /* Wa_14017856879 */ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH); - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) /* * Wa_14017066071 * Wa_14017654203 @@ -3030,37 +2876,47 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, MTL_DISABLE_SAMPLER_SC_OOO); - if (IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) /* Wa_22015279794 */ wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_PREFETCH_INTO_IC); - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(i915) || IS_DG2_G12(i915)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) || + IS_DG2(i915)) { /* Wa_22013037850 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DISABLE_128B_EVICTION_COMMAND_UDW); + + /* Wa_18017747507 */ + wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) || IS_PONTEVECCHIO(i915) || IS_DG2(i915)) { /* Wa_22014226127 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || - IS_DG2(i915)) { - /* Wa_18017747507 */ - wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); + if (IS_PONTEVECCHIO(i915) || IS_DG2(i915)) { + /* Wa_14015227452:dg2,pvc */ + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); + + /* Wa_16015675438:dg2,pvc */ + wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || - IS_DG2_G11(i915)) { + if (IS_DG2(i915)) { + /* + * Wa_16011620976:dg2_g11 + * Wa_22015475538:dg2 + */ + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); + } + + if (IS_DG2_G11(i915)) { /* * Wa_22012826095:dg2 * Wa_22013059131:dg2 @@ -3072,18 +2928,23 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li /* Wa_22013059131:dg2 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT); - } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { /* - * Wa_14010918519:dg2_g10 + * Wa_22012654132 * - * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping, - * so ignoring verification. + * Note that register 0xE420 is write-only and cannot be read + * back for verification on DG2 (due to Wa_14012342262), so + * we need to explicitly skip the readback. */ - wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0, - FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE, - 0, false); + wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), + 0 /* write-only, so skip validation */, + true); + } + + if (IS_DG2_G10(i915) || IS_DG2_G12(i915)) { + /* Wa_18028616096 */ + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3); } if (IS_XEHPSDV(i915)) { @@ -3101,35 +2962,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); } - - if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) { - /* Wa_14015227452:dg2,pvc */ - wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); - - /* Wa_16015675438:dg2,pvc */ - wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE); - } - - if (IS_DG2(i915)) { - /* - * Wa_16011620976:dg2_g11 - * Wa_22015475538:dg2 - */ - wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); - } - - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || IS_DG2_G11(i915)) - /* - * Wa_22012654132 - * - * Note that register 0xE420 is write-only and cannot be read - * back for verification on DG2 (due to Wa_14012342262), so - * we need to explicitly skip the readback. - */ - wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), - 0 /* write-only, so skip validation */, - true); } static void @@ -3237,7 +3069,7 @@ wa_list_srm(struct i915_request *rq, const struct i915_wa_list *wal, struct i915_vma *vma) { - struct drm_i915_private *i915 = rq->engine->i915; + struct drm_i915_private *i915 = rq->i915; unsigned int i, count = 0; const struct i915_wa *wa; u32 srm, *cs; @@ -3336,7 +3168,7 @@ retry: err = 0; for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { - if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg))) + if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg))) continue; if (!wa_verify(wal->gt, wa, results[i], wal->name, from)) diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index 76fbae358072..47070cba7eb1 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -88,8 +88,9 @@ static int __live_context_size(struct intel_engine_cs *engine) goto err; vaddr = i915_gem_object_pin_map_unlocked(ce->state->obj, - i915_coherent_map_type(engine->i915, - ce->state->obj, false)); + intel_gt_coherent_map_type(engine->gt, + ce->state->obj, + false)); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); intel_context_unpin(ce); diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index 78cdfc6f315f..86cecf7a1105 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -62,7 +62,7 @@ static int write_timestamp(struct i915_request *rq, int slot) return PTR_ERR(cs); cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; - if (GRAPHICS_VER(rq->engine->i915) >= 8) + if (GRAPHICS_VER(rq->i915) >= 8) cmd++; *cs++ = cmd; *cs++ = i915_mmio_reg_offset(timestamp_reg(rq->engine)); diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 8b0d84f2aad2..0dd4d00ee894 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -73,7 +73,7 @@ static int hang_init(struct hang *h, struct intel_gt *gt) h->seqno = memset(vaddr, 0xff, PAGE_SIZE); vaddr = i915_gem_object_pin_map_unlocked(h->obj, - i915_coherent_map_type(gt->i915, h->obj, false)); + intel_gt_coherent_map_type(gt, h->obj, false)); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto err_unpin_hws; @@ -119,7 +119,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) return ERR_CAST(obj); } - vaddr = i915_gem_object_pin_map_unlocked(obj, i915_coherent_map_type(gt->i915, obj, false)); + vaddr = i915_gem_object_pin_map_unlocked(obj, intel_gt_coherent_map_type(gt, obj, false)); if (IS_ERR(vaddr)) { i915_gem_object_put(obj); i915_vm_put(vm); diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index a78a3d2c2e16..5f826b6dcf5d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1292,9 +1292,9 @@ static int compare_isolation(struct intel_engine_cs *engine, } lrc = i915_gem_object_pin_map_unlocked(ce->state->obj, - i915_coherent_map_type(engine->i915, - ce->state->obj, - false)); + intel_gt_coherent_map_type(engine->gt, + ce->state->obj, + false)); if (IS_ERR(lrc)) { err = PTR_ERR(lrc); goto err_B1; diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 3def5ca72dec..1a34cbe04fb6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -710,7 +710,7 @@ static int threaded_migrate(struct intel_migrate *migrate, thread[i].tsk = tsk; } - msleep(10); /* start all threads before we kthread_stop() */ + msleep(10 * n_cpus); /* start all threads before we kthread_stop() */ for (i = 0; i < n_cpus; ++i) { struct task_struct *tsk = thread[i].tsk; diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index a8446ab82501..d73e438fb85f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -137,7 +137,7 @@ static int read_mocs_table(struct i915_request *rq, if (!table) return 0; - if (HAS_GLOBAL_MOCS_REGISTERS(rq->engine->i915)) + if (HAS_GLOBAL_MOCS_REGISTERS(rq->i915)) addr = global_mocs_offset() + gt->uncore->gsi_offset; else addr = mocs_offset(rq->engine); diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c index 2ceeadecc639..a7189c2d660c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -140,7 +140,7 @@ static const u32 *__live_rc6_ctx(struct intel_context *ce) } cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; - if (GRAPHICS_VER(rq->engine->i915) >= 8) + if (GRAPHICS_VER(rq->i915) >= 8) cmd++; *cs++ = cmd; diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 39c3ec12df1a..fa36cf920bde 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -459,12 +459,12 @@ static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) if (IS_ERR(cs)) return PTR_ERR(cs); - if (GRAPHICS_VER(rq->engine->i915) >= 8) { + if (GRAPHICS_VER(rq->i915) >= 8) { *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = addr; *cs++ = 0; *cs++ = value; - } else if (GRAPHICS_VER(rq->engine->i915) >= 4) { + } else if (GRAPHICS_VER(rq->i915) >= 4) { *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = 0; *cs++ = addr; diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c index 3bd6b540257b..7e41f69fc818 100644 --- a/drivers/gpu/drm/i915/gt/selftest_tlb.c +++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c @@ -6,6 +6,7 @@ #include "i915_selftest.h" #include "gem/i915_gem_internal.h" +#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "gen8_engine_cs.h" @@ -354,7 +355,7 @@ out_a: static void tlbinv_full(struct i915_address_space *vm, u64 addr, u64 length) { - intel_gt_invalidate_tlb(vm->gt, intel_gt_tlb_seqno(vm->gt) | 1); + intel_gt_invalidate_tlb_full(vm->gt, intel_gt_tlb_seqno(vm->gt) | 1); } static int invalidate_full(void *arg) diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c index 449c9ed44382..bccc3a1200bc 100644 --- a/drivers/gpu/drm/i915/gt/shmem_utils.c +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c @@ -33,7 +33,6 @@ struct file *shmem_create_from_data(const char *name, void *data, size_t len) struct file *shmem_create_from_object(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); enum i915_map_type map_type; struct file *file; void *ptr; @@ -44,7 +43,7 @@ struct file *shmem_create_from_object(struct drm_i915_gem_object *obj) return file; } - map_type = i915_coherent_map_type(i915, obj, true); + map_type = i915_gem_object_is_lmem(obj) ? I915_MAP_WC : I915_MAP_WB; ptr = i915_gem_object_pin_map_unlocked(obj, map_type); if (IS_ERR(ptr)) return ERR_CAST(ptr); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h index 714f0c256118..6d009a905269 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h @@ -8,6 +8,74 @@ #include <linux/types.h> +struct intel_gsc_version { + u16 major; + u16 minor; + u16 hotfix; + u16 build; +} __packed; + +struct intel_gsc_partition { + u32 offset; + u32 size; +} __packed; + +struct intel_gsc_layout_pointers { + u8 rom_bypass_vector[16]; + + /* size of pointers layout not including ROM bypass vector */ + u16 size; + + /* + * bit0: Backup copy of layout pointers exist + * bits1-15: reserved + */ + u8 flags; + + u8 reserved; + + u32 crc32; + + struct intel_gsc_partition datap; + struct intel_gsc_partition boot1; + struct intel_gsc_partition boot2; + struct intel_gsc_partition boot3; + struct intel_gsc_partition boot4; + struct intel_gsc_partition boot5; + struct intel_gsc_partition temp_pages; +} __packed; + +/* Boot partition structures */ +struct intel_gsc_bpdt_header { + u32 signature; +#define INTEL_GSC_BPDT_HEADER_SIGNATURE 0x000055AA + + u16 descriptor_count; /* num of entries after the header */ + + u8 version; + u8 configuration; + + u32 crc32; + + u32 build_version; + struct intel_gsc_version tool_version; +} __packed; + +struct intel_gsc_bpdt_entry { + /* + * Bits 0-15: BPDT entry type + * Bits 16-17: reserved + * Bit 18: code sub-partition + * Bits 19-31: reserved + */ + u32 type; +#define INTEL_GSC_BPDT_ENTRY_TYPE_MASK GENMASK(15, 0) +#define INTEL_GSC_BPDT_ENTRY_TYPE_GSC_RBE 0x1 + + u32 sub_partition_offset; /* from the base of the BPDT header */ + u32 sub_partition_size; +} __packed; + /* Code partition directory (CPD) structures */ struct intel_gsc_cpd_header_v2 { u32 header_marker; @@ -44,13 +112,6 @@ struct intel_gsc_cpd_entry { u8 reserved[4]; } __packed; -struct intel_gsc_version { - u16 major; - u16 minor; - u16 hotfix; - u16 build; -} __packed; - struct intel_gsc_manifest_header { u32 header_type; /* 0x4 for manifest type */ u32 header_length; /* in dwords */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c index 60e9c6c9e775..e2e42b3e0d5d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c @@ -3,48 +3,216 @@ * Copyright © 2022 Intel Corporation */ +#include "gem/i915_gem_lmem.h" #include "gt/intel_engine_pm.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" #include "gt/intel_gt_print.h" #include "gt/intel_ring.h" +#include "intel_gsc_binary_headers.h" #include "intel_gsc_fw.h" - -#define GSC_FW_STATUS_REG _MMIO(0x116C40) -#define GSC_FW_CURRENT_STATE REG_GENMASK(3, 0) -#define GSC_FW_CURRENT_STATE_RESET 0 -#define GSC_FW_PROXY_STATE_NORMAL 5 -#define GSC_FW_INIT_COMPLETE_BIT REG_BIT(9) +#include "intel_gsc_uc_heci_cmd_submit.h" +#include "i915_reg.h" static bool gsc_is_in_reset(struct intel_uncore *uncore) { - u32 fw_status = intel_uncore_read(uncore, GSC_FW_STATUS_REG); + u32 fw_status = intel_uncore_read(uncore, HECI_FWSTS(MTL_GSC_HECI1_BASE, 1)); - return REG_FIELD_GET(GSC_FW_CURRENT_STATE, fw_status) == - GSC_FW_CURRENT_STATE_RESET; + return REG_FIELD_GET(HECI1_FWSTS1_CURRENT_STATE, fw_status) == + HECI1_FWSTS1_CURRENT_STATE_RESET; } -static u32 gsc_uc_get_fw_status(struct intel_uncore *uncore) +static u32 gsc_uc_get_fw_status(struct intel_uncore *uncore, bool needs_wakeref) { intel_wakeref_t wakeref; u32 fw_status = 0; - with_intel_runtime_pm(uncore->rpm, wakeref) - fw_status = intel_uncore_read(uncore, GSC_FW_STATUS_REG); + if (needs_wakeref) + wakeref = intel_runtime_pm_get(uncore->rpm); + + fw_status = intel_uncore_read(uncore, HECI_FWSTS(MTL_GSC_HECI1_BASE, 1)); + if (needs_wakeref) + intel_runtime_pm_put(uncore->rpm, wakeref); return fw_status; } -bool intel_gsc_uc_fw_proxy_init_done(struct intel_gsc_uc *gsc) +bool intel_gsc_uc_fw_proxy_init_done(struct intel_gsc_uc *gsc, bool needs_wakeref) +{ + return REG_FIELD_GET(HECI1_FWSTS1_CURRENT_STATE, + gsc_uc_get_fw_status(gsc_uc_to_gt(gsc)->uncore, + needs_wakeref)) == + HECI1_FWSTS1_PROXY_STATE_NORMAL; +} + +int intel_gsc_uc_fw_proxy_get_status(struct intel_gsc_uc *gsc) { - return REG_FIELD_GET(GSC_FW_CURRENT_STATE, - gsc_uc_get_fw_status(gsc_uc_to_gt(gsc)->uncore)) == - GSC_FW_PROXY_STATE_NORMAL; + if (!(IS_ENABLED(CONFIG_INTEL_MEI_GSC_PROXY))) + return -ENODEV; + if (!intel_uc_fw_is_loadable(&gsc->fw)) + return -ENODEV; + if (__intel_uc_fw_status(&gsc->fw) == INTEL_UC_FIRMWARE_LOAD_FAIL) + return -ENOLINK; + if (!intel_gsc_uc_fw_proxy_init_done(gsc, true)) + return -EAGAIN; + + return 0; } bool intel_gsc_uc_fw_init_done(struct intel_gsc_uc *gsc) { - return gsc_uc_get_fw_status(gsc_uc_to_gt(gsc)->uncore) & GSC_FW_INIT_COMPLETE_BIT; + return gsc_uc_get_fw_status(gsc_uc_to_gt(gsc)->uncore, false) & + HECI1_FWSTS1_INIT_COMPLETE; +} + +static inline u32 cpd_entry_offset(const struct intel_gsc_cpd_entry *entry) +{ + return entry->offset & INTEL_GSC_CPD_ENTRY_OFFSET_MASK; +} + +int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, size_t size) +{ + struct intel_gsc_uc *gsc = container_of(gsc_fw, struct intel_gsc_uc, fw); + struct intel_gt *gt = gsc_uc_to_gt(gsc); + const struct intel_gsc_layout_pointers *layout = data; + const struct intel_gsc_bpdt_header *bpdt_header = NULL; + const struct intel_gsc_bpdt_entry *bpdt_entry = NULL; + const struct intel_gsc_cpd_header_v2 *cpd_header = NULL; + const struct intel_gsc_cpd_entry *cpd_entry = NULL; + const struct intel_gsc_manifest_header *manifest; + size_t min_size = sizeof(*layout); + int i; + + if (size < min_size) { + gt_err(gt, "GSC FW too small! %zu < %zu\n", size, min_size); + return -ENODATA; + } + + /* + * The GSC binary starts with the pointer layout, which contains the + * locations of the various partitions of the binary. The one we're + * interested in to get the version is the boot1 partition, where we can + * find a BPDT header followed by entries, one of which points to the + * RBE sub-section of the partition. From here, we can parse the CPD + * header and the following entries to find the manifest location + * (entry identified by the "RBEP.man" name), from which we can finally + * extract the version. + * + * -------------------------------------------------- + * [ intel_gsc_layout_pointers ] + * [ ... ] + * [ boot1.offset >---------------------------]------o + * [ ... ] | + * -------------------------------------------------- | + * | + * -------------------------------------------------- | + * [ intel_gsc_bpdt_header ]<-----o + * -------------------------------------------------- + * [ intel_gsc_bpdt_entry[] ] + * [ entry1 ] + * [ ... ] + * [ entryX ] + * [ type == GSC_RBE ] + * [ offset >-----------------------------]------o + * [ ... ] | + * -------------------------------------------------- | + * | + * -------------------------------------------------- | + * [ intel_gsc_cpd_header_v2 ]<-----o + * -------------------------------------------------- + * [ intel_gsc_cpd_entry[] ] + * [ entry1 ] + * [ ... ] + * [ entryX ] + * [ "RBEP.man" ] + * [ ... ] + * [ offset >----------------------------]------o + * [ ... ] | + * -------------------------------------------------- | + * | + * -------------------------------------------------- | + * [ intel_gsc_manifest_header ]<-----o + * [ ... ] + * [ intel_gsc_version fw_version ] + * [ ... ] + * -------------------------------------------------- + */ + + min_size = layout->boot1.offset + layout->boot1.size; + if (size < min_size) { + gt_err(gt, "GSC FW too small for boot section! %zu < %zu\n", + size, min_size); + return -ENODATA; + } + + min_size = sizeof(*bpdt_header); + if (layout->boot1.size < min_size) { + gt_err(gt, "GSC FW boot section too small for BPDT header: %u < %zu\n", + layout->boot1.size, min_size); + return -ENODATA; + } + + bpdt_header = data + layout->boot1.offset; + if (bpdt_header->signature != INTEL_GSC_BPDT_HEADER_SIGNATURE) { + gt_err(gt, "invalid signature for BPDT header: 0x%08x!\n", + bpdt_header->signature); + return -EINVAL; + } + + min_size += sizeof(*bpdt_entry) * bpdt_header->descriptor_count; + if (layout->boot1.size < min_size) { + gt_err(gt, "GSC FW boot section too small for BPDT entries: %u < %zu\n", + layout->boot1.size, min_size); + return -ENODATA; + } + + bpdt_entry = (void *)bpdt_header + sizeof(*bpdt_header); + for (i = 0; i < bpdt_header->descriptor_count; i++, bpdt_entry++) { + if ((bpdt_entry->type & INTEL_GSC_BPDT_ENTRY_TYPE_MASK) != + INTEL_GSC_BPDT_ENTRY_TYPE_GSC_RBE) + continue; + + cpd_header = (void *)bpdt_header + bpdt_entry->sub_partition_offset; + min_size = bpdt_entry->sub_partition_offset + sizeof(*cpd_header); + break; + } + + if (!cpd_header) { + gt_err(gt, "couldn't find CPD header in GSC binary!\n"); + return -ENODATA; + } + + if (layout->boot1.size < min_size) { + gt_err(gt, "GSC FW boot section too small for CPD header: %u < %zu\n", + layout->boot1.size, min_size); + return -ENODATA; + } + + if (cpd_header->header_marker != INTEL_GSC_CPD_HEADER_MARKER) { + gt_err(gt, "invalid marker for CPD header in GSC bin: 0x%08x!\n", + cpd_header->header_marker); + return -EINVAL; + } + + min_size += sizeof(*cpd_entry) * cpd_header->num_of_entries; + if (layout->boot1.size < min_size) { + gt_err(gt, "GSC FW boot section too small for CPD entries: %u < %zu\n", + layout->boot1.size, min_size); + return -ENODATA; + } + + cpd_entry = (void *)cpd_header + cpd_header->header_length; + for (i = 0; i < cpd_header->num_of_entries; i++, cpd_entry++) { + if (strcmp(cpd_entry->name, "RBEP.man") == 0) { + manifest = (void *)cpd_header + cpd_entry_offset(cpd_entry); + intel_uc_fw_version_from_gsc_manifest(&gsc->release, + manifest); + gsc->security_version = manifest->security_version; + break; + } + } + + return 0; } static int emit_gsc_fw_load(struct i915_request *rq, struct intel_gsc_uc *gsc) @@ -114,48 +282,25 @@ out_rq: static int gsc_fw_load_prepare(struct intel_gsc_uc *gsc) { struct intel_gt *gt = gsc_uc_to_gt(gsc); - struct drm_i915_private *i915 = gt->i915; - struct drm_i915_gem_object *obj; - void *src, *dst; + void *src; if (!gsc->local) return -ENODEV; - obj = gsc->local->obj; - - if (obj->base.size < gsc->fw.size) + if (gsc->local->size < gsc->fw.size) return -ENOSPC; - /* - * Wa_22016122933: For MTL the shared memory needs to be mapped - * as WC on CPU side and UC (PAT index 2) on GPU side - */ - if (IS_METEORLAKE(i915)) - i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); - - dst = i915_gem_object_pin_map_unlocked(obj, - i915_coherent_map_type(i915, obj, true)); - if (IS_ERR(dst)) - return PTR_ERR(dst); - src = i915_gem_object_pin_map_unlocked(gsc->fw.obj, - i915_coherent_map_type(i915, gsc->fw.obj, true)); - if (IS_ERR(src)) { - i915_gem_object_unpin_map(obj); + intel_gt_coherent_map_type(gt, gsc->fw.obj, true)); + if (IS_ERR(src)) return PTR_ERR(src); - } - memset(dst, 0, obj->base.size); - memcpy(dst, src, gsc->fw.size); + memcpy_toio(gsc->local_vaddr, src, gsc->fw.size); + memset_io(gsc->local_vaddr + gsc->fw.size, 0, gsc->local->size - gsc->fw.size); - /* - * Wa_22016122933: Making sure the data in dst is - * visible to GSC right away - */ intel_guc_write_barrier(>->uc.guc); i915_gem_object_unpin_map(gsc->fw.obj); - i915_gem_object_unpin_map(obj); return 0; } @@ -163,12 +308,94 @@ static int gsc_fw_load_prepare(struct intel_gsc_uc *gsc) static int gsc_fw_wait(struct intel_gt *gt) { return intel_wait_for_register(gt->uncore, - GSC_FW_STATUS_REG, - GSC_FW_INIT_COMPLETE_BIT, - GSC_FW_INIT_COMPLETE_BIT, + HECI_FWSTS(MTL_GSC_HECI1_BASE, 1), + HECI1_FWSTS1_INIT_COMPLETE, + HECI1_FWSTS1_INIT_COMPLETE, 500); } +struct intel_gsc_mkhi_header { + u8 group_id; +#define MKHI_GROUP_ID_GFX_SRV 0x30 + + u8 command; +#define MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION (0x42) + + u8 reserved; + u8 result; +} __packed; + +struct mtl_gsc_ver_msg_in { + struct intel_gsc_mtl_header header; + struct intel_gsc_mkhi_header mkhi; +} __packed; + +struct mtl_gsc_ver_msg_out { + struct intel_gsc_mtl_header header; + struct intel_gsc_mkhi_header mkhi; + u16 proj_major; + u16 compat_major; + u16 compat_minor; + u16 reserved[5]; +} __packed; + +#define GSC_VER_PKT_SZ SZ_4K + +static int gsc_fw_query_compatibility_version(struct intel_gsc_uc *gsc) +{ + struct intel_gt *gt = gsc_uc_to_gt(gsc); + struct mtl_gsc_ver_msg_in *msg_in; + struct mtl_gsc_ver_msg_out *msg_out; + struct i915_vma *vma; + u64 offset; + void *vaddr; + int err; + + err = intel_guc_allocate_and_map_vma(>->uc.guc, GSC_VER_PKT_SZ * 2, + &vma, &vaddr); + if (err) { + gt_err(gt, "failed to allocate vma for GSC version query\n"); + return err; + } + + offset = i915_ggtt_offset(vma); + msg_in = vaddr; + msg_out = vaddr + GSC_VER_PKT_SZ; + + intel_gsc_uc_heci_cmd_emit_mtl_header(&msg_in->header, + HECI_MEADDRESS_MKHI, + sizeof(*msg_in), 0); + msg_in->mkhi.group_id = MKHI_GROUP_ID_GFX_SRV; + msg_in->mkhi.command = MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION; + + err = intel_gsc_uc_heci_cmd_submit_packet(>->uc.gsc, + offset, + sizeof(*msg_in), + offset + GSC_VER_PKT_SZ, + GSC_VER_PKT_SZ); + if (err) { + gt_err(gt, + "failed to submit GSC request for compatibility version: %d\n", + err); + goto out_vma; + } + + if (msg_out->header.message_size != sizeof(*msg_out)) { + gt_err(gt, "invalid GSC reply length %u [expected %zu], s=0x%x, f=0x%x, r=0x%x\n", + msg_out->header.message_size, sizeof(*msg_out), + msg_out->header.status, msg_out->header.flags, msg_out->mkhi.result); + err = -EPROTO; + goto out_vma; + } + + gsc->fw.file_selected.ver.major = msg_out->compat_major; + gsc->fw.file_selected.ver.minor = msg_out->compat_minor; + +out_vma: + i915_vma_unpin_and_release(&vma, I915_VMA_RELEASE_MAP); + return err; +} + int intel_gsc_uc_fw_upload(struct intel_gsc_uc *gsc) { struct intel_gt *gt = gsc_uc_to_gt(gsc); @@ -226,10 +453,24 @@ int intel_gsc_uc_fw_upload(struct intel_gsc_uc *gsc) if (err) goto fail; + err = gsc_fw_query_compatibility_version(gsc); + if (err) + goto fail; + + /* we only support compatibility version 1.0 at the moment */ + err = intel_uc_check_file_version(gsc_fw, NULL); + if (err) + goto fail; + /* FW is not fully operational until we enable SW proxy */ intel_uc_fw_change_status(gsc_fw, INTEL_UC_FIRMWARE_TRANSFERRED); - gt_info(gt, "Loaded GSC firmware %s\n", gsc_fw->file_selected.path); + gt_info(gt, "Loaded GSC firmware %s (cv%u.%u, r%u.%u.%u.%u, svn %u)\n", + gsc_fw->file_selected.path, + gsc_fw->file_selected.ver.major, gsc_fw->file_selected.ver.minor, + gsc->release.major, gsc->release.minor, + gsc->release.patch, gsc->release.build, + gsc->security_version); return 0; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h index fff8928218df..bc9dd0de8aaf 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h @@ -9,10 +9,13 @@ #include <linux/types.h> struct intel_gsc_uc; +struct intel_uc_fw; struct intel_uncore; +int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, size_t size); int intel_gsc_uc_fw_upload(struct intel_gsc_uc *gsc); bool intel_gsc_uc_fw_init_done(struct intel_gsc_uc *gsc); -bool intel_gsc_uc_fw_proxy_init_done(struct intel_gsc_uc *gsc); +bool intel_gsc_uc_fw_proxy_init_done(struct intel_gsc_uc *gsc, bool needs_wakeref); +int intel_gsc_uc_fw_proxy_get_status(struct intel_gsc_uc *gsc); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c index c659cc01f32f..0d3b22a74365 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c @@ -7,10 +7,11 @@ #include "gt/intel_gt.h" #include "gt/intel_gt_print.h" -#include "intel_gsc_uc.h" #include "intel_gsc_fw.h" -#include "i915_drv.h" #include "intel_gsc_proxy.h" +#include "intel_gsc_uc.h" +#include "i915_drv.h" +#include "i915_reg.h" static void gsc_work(struct work_struct *work) { @@ -61,8 +62,18 @@ static void gsc_work(struct work_struct *work) } ret = intel_gsc_proxy_request_handler(gsc); - if (ret) + if (ret) { + if (actions & GSC_ACTION_FW_LOAD) { + /* + * A proxy failure right after firmware load means the proxy-init + * step has failed so mark GSC as not usable after this + */ + drm_err(>->i915->drm, + "GSC proxy handler failed to init\n"); + intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL); + } goto out_put; + } /* mark the GSC FW init as done the first time we run this */ if (actions & GSC_ACTION_FW_LOAD) { @@ -71,12 +82,13 @@ static void gsc_work(struct work_struct *work) * complete the request handling cleanly, so we need to check the * status register to check if the proxy init was actually successful */ - if (intel_gsc_uc_fw_proxy_init_done(gsc)) { + if (intel_gsc_uc_fw_proxy_init_done(gsc, false)) { drm_dbg(>->i915->drm, "GSC Proxy initialized\n"); intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_RUNNING); } else { drm_err(>->i915->drm, "GSC status reports proxy init not complete\n"); + intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL); } } } @@ -98,7 +110,7 @@ static bool gsc_engine_supported(struct intel_gt *gt) GEM_BUG_ON(!gt_is_root(gt) && !gt->info.engine_mask); if (gt_is_root(gt)) - mask = RUNTIME_INFO(gt->i915)->platform_engine_mask; + mask = INTEL_INFO(gt->i915)->platform_engine_mask; else mask = gt->info.engine_mask; @@ -133,26 +145,85 @@ void intel_gsc_uc_init_early(struct intel_gsc_uc *gsc) } } +static int gsc_allocate_and_map_vma(struct intel_gsc_uc *gsc, u32 size) +{ + struct intel_gt *gt = gsc_uc_to_gt(gsc); + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + void __iomem *vaddr; + int ret = 0; + + /* + * The GSC FW doesn't immediately suspend after becoming idle, so there + * is a chance that it could still be awake after we successfully + * return from the pci suspend function, even if there are no pending + * operations. + * The FW might therefore try to access memory for its suspend operation + * after the kernel has completed the HW suspend flow; this can cause + * issues if the FW is mapped in normal RAM memory, as some of the + * involved HW units might've already lost power. + * The driver must therefore avoid this situation and the recommended + * way to do so is to use stolen memory for the GSC memory allocation, + * because stolen memory takes a different path in HW and it is + * guaranteed to always work as long as the GPU itself is awake (which + * it must be if the GSC is awake). + */ + obj = i915_gem_object_create_stolen(gt->i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err; + } + + vaddr = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + goto err; + } + + i915_vma_make_unshrinkable(vma); + + gsc->local = vma; + gsc->local_vaddr = vaddr; + + return 0; + +err: + i915_gem_object_put(obj); + return ret; +} + +static void gsc_unmap_and_free_vma(struct intel_gsc_uc *gsc) +{ + struct i915_vma *vma = fetch_and_zero(&gsc->local); + + if (!vma) + return; + + gsc->local_vaddr = NULL; + i915_vma_unpin_iomap(vma); + i915_gem_object_put(vma->obj); +} + int intel_gsc_uc_init(struct intel_gsc_uc *gsc) { static struct lock_class_key gsc_lock; struct intel_gt *gt = gsc_uc_to_gt(gsc); struct intel_engine_cs *engine = gt->engine[GSC0]; struct intel_context *ce; - struct i915_vma *vma; int err; err = intel_uc_fw_init(&gsc->fw); if (err) goto out; - vma = intel_guc_allocate_vma(>->uc.guc, SZ_8M); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); + err = gsc_allocate_and_map_vma(gsc, SZ_4M); + if (err) goto out_fw; - } - - gsc->local = vma; ce = intel_engine_create_pinned_context(engine, engine->gt->vm, SZ_4K, I915_GEM_HWS_GSC_ADDR, @@ -173,7 +244,7 @@ int intel_gsc_uc_init(struct intel_gsc_uc *gsc) return 0; out_vma: - i915_vma_unpin_and_release(&gsc->local, 0); + gsc_unmap_and_free_vma(gsc); out_fw: intel_uc_fw_fini(&gsc->fw); out: @@ -197,7 +268,7 @@ void intel_gsc_uc_fini(struct intel_gsc_uc *gsc) if (gsc->ce) intel_engine_destroy_pinned_context(fetch_and_zero(&gsc->ce)); - i915_vma_unpin_and_release(&gsc->local, 0); + gsc_unmap_and_free_vma(gsc); intel_uc_fw_fini(&gsc->fw); } @@ -245,3 +316,45 @@ void intel_gsc_uc_load_start(struct intel_gsc_uc *gsc) queue_work(gsc->wq, &gsc->work); } + +void intel_gsc_uc_load_status(struct intel_gsc_uc *gsc, struct drm_printer *p) +{ + struct intel_gt *gt = gsc_uc_to_gt(gsc); + struct intel_uncore *uncore = gt->uncore; + intel_wakeref_t wakeref; + + if (!intel_gsc_uc_is_supported(gsc)) { + drm_printf(p, "GSC not supported\n"); + return; + } + + if (!intel_gsc_uc_is_wanted(gsc)) { + drm_printf(p, "GSC disabled\n"); + return; + } + + drm_printf(p, "GSC firmware: %s\n", gsc->fw.file_selected.path); + if (gsc->fw.file_selected.path != gsc->fw.file_wanted.path) + drm_printf(p, "GSC firmware wanted: %s\n", gsc->fw.file_wanted.path); + drm_printf(p, "\tstatus: %s\n", intel_uc_fw_status_repr(gsc->fw.status)); + + drm_printf(p, "Release: %u.%u.%u.%u\n", + gsc->release.major, gsc->release.minor, + gsc->release.patch, gsc->release.build); + + drm_printf(p, "Compatibility Version: %u.%u [min expected %u.%u]\n", + gsc->fw.file_selected.ver.major, gsc->fw.file_selected.ver.minor, + gsc->fw.file_wanted.ver.major, gsc->fw.file_wanted.ver.minor); + + drm_printf(p, "SVN: %u\n", gsc->security_version); + + with_intel_runtime_pm(uncore->rpm, wakeref) { + u32 i; + + for (i = 1; i <= 6; i++) { + u32 status = intel_uncore_read(uncore, + HECI_FWSTS(MTL_GSC_HECI1_BASE, i)); + drm_printf(p, "HECI1 FWSTST%u = 0x%08x\n", i, status); + } + } +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h index a2a0813b8a76..c8082cf200fc 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h @@ -8,6 +8,7 @@ #include "intel_uc_fw.h" +struct drm_printer; struct i915_vma; struct intel_context; struct i915_gsc_proxy_component; @@ -17,7 +18,26 @@ struct intel_gsc_uc { struct intel_uc_fw fw; /* GSC-specific additions */ + + /* + * The GSC has 3 version numbers: + * - Release version (incremented with each build) + * - Security version (incremented on security fix) + * - Compatibility version (incremented on interface change) + * + * The one we care about to use the binary is the last one, so that's + * the one we save inside the intel_uc_fw structure. The other two + * versions are only used for debug/info purposes, so we save them here. + * + * Note that the release and security versions are available in the + * binary header, while the compatibility version must be queried after + * loading the binary. + */ + struct intel_uc_fw_ver release; + u32 security_version; + struct i915_vma *local; /* private memory for GSC usage */ + void __iomem *local_vaddr; /* pointer to access the private memory */ struct intel_context *ce; /* for submission to GSC FW via GSC engine */ /* for delayed load and proxy handling */ @@ -44,6 +64,7 @@ void intel_gsc_uc_suspend(struct intel_gsc_uc *gsc); void intel_gsc_uc_resume(struct intel_gsc_uc *gsc); void intel_gsc_uc_flush_work(struct intel_gsc_uc *gsc); void intel_gsc_uc_load_start(struct intel_gsc_uc *gsc); +void intel_gsc_uc_load_status(struct intel_gsc_uc *gsc, struct drm_printer *p); static inline bool intel_gsc_uc_is_supported(struct intel_gsc_uc *gsc) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.c new file mode 100644 index 000000000000..5baacd822a1c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <drm/drm_print.h> + +#include "gt/intel_gt.h" +#include "gt/intel_gt_debugfs.h" +#include "gt/intel_gt_print.h" +#include "intel_gsc_uc.h" +#include "intel_gsc_uc_debugfs.h" +#include "i915_drv.h" + +static int gsc_info_show(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + struct intel_gsc_uc *gsc = m->private; + + if (!intel_gsc_uc_is_supported(gsc)) + return -ENODEV; + + intel_gsc_uc_load_status(gsc, &p); + + return 0; +} +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(gsc_info); + +void intel_gsc_uc_debugfs_register(struct intel_gsc_uc *gsc_uc, struct dentry *root) +{ + static const struct intel_gt_debugfs_file files[] = { + { "gsc_info", &gsc_info_fops, NULL }, + }; + + if (!intel_gsc_uc_is_supported(gsc_uc)) + return; + + intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gsc_uc); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.h new file mode 100644 index 000000000000..3415ad39aabb --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef DEBUGFS_GSC_UC_H +#define DEBUGFS_GSC_UC_H + +struct intel_gsc_uc; +struct dentry; + +void intel_gsc_uc_debugfs_register(struct intel_gsc_uc *gsc, struct dentry *root); + +#endif /* DEBUGFS_GSC_UC_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c index 89ed5ee9cded..2fde5c360cff 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c @@ -81,8 +81,17 @@ out_rq: i915_request_add(rq); - if (!err && i915_request_wait(rq, 0, msecs_to_jiffies(500)) < 0) - err = -ETIME; + if (!err) { + /* + * Start timeout for i915_request_wait only after considering one possible + * pending GSC-HECI submission cycle on the other (non-privileged) path. + */ + if (wait_for(i915_request_started(rq), GSC_HECI_REPLY_LATENCY_MS)) + drm_dbg(&gsc_uc_to_gt(gsc)->i915->drm, + "Delay in gsc-heci-priv submission to gsccs-hw"); + if (i915_request_wait(rq, 0, msecs_to_jiffies(GSC_HECI_REPLY_LATENCY_MS)) < 0) + err = -ETIME; + } i915_request_put(rq); @@ -186,6 +195,13 @@ out_rq: i915_request_add(rq); if (!err) { + /* + * Start timeout for i915_request_wait only after considering one possible + * pending GSC-HECI submission cycle on the other (privileged) path. + */ + if (wait_for(i915_request_started(rq), GSC_HECI_REPLY_LATENCY_MS)) + drm_dbg(&gsc_uc_to_gt(gsc)->i915->drm, + "Delay in gsc-heci-non-priv submission to gsccs-hw"); if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, msecs_to_jiffies(timeout_ms)) < 0) err = -ETIME; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h index ef70e304904a..c4308291c003 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h @@ -12,11 +12,18 @@ struct i915_vma; struct intel_context; struct intel_gsc_uc; +#define GSC_HECI_REPLY_LATENCY_MS 500 +/* + * Max FW response time is 500ms, but this should be counted from the time the + * command has hit the GSC-CS hardware, not the preceding handoff to GuC CTB. + */ + struct intel_gsc_mtl_header { u32 validity_marker; #define GSC_HECI_VALIDITY_MARKER 0xA578875A u8 heci_client_id; +#define HECI_MEADDRESS_MKHI 7 #define HECI_MEADDRESS_PROXY 10 #define HECI_MEADDRESS_PXP 17 #define HECI_MEADDRESS_HDCP 18 diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 2eb891b270ae..27df41c53b89 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -159,6 +159,21 @@ static void gen11_disable_guc_interrupts(struct intel_guc *guc) gen11_reset_guc_interrupts(guc); } +static void guc_dead_worker_func(struct work_struct *w) +{ + struct intel_guc *guc = container_of(w, struct intel_guc, dead_guc_worker); + struct intel_gt *gt = guc_to_gt(guc); + unsigned long last = guc->last_dead_guc_jiffies; + unsigned long delta = jiffies_to_msecs(jiffies - last); + + if (delta < 500) { + intel_gt_set_wedged(gt); + } else { + intel_gt_handle_error(gt, ALL_ENGINES, I915_ERROR_CAPTURE, "dead GuC"); + guc->last_dead_guc_jiffies = jiffies; + } +} + void intel_guc_init_early(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -171,6 +186,8 @@ void intel_guc_init_early(struct intel_guc *guc) intel_guc_slpc_init_early(&guc->slpc); intel_guc_rc_init_early(guc); + INIT_WORK(&guc->dead_guc_worker, guc_dead_worker_func); + mutex_init(&guc->send_mutex); spin_lock_init(&guc->irq_lock); if (GRAPHICS_VER(i915) >= 11) { @@ -272,18 +289,14 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50)) flags |= GUC_WA_POLLCS; - /* Wa_16011759253:dg2_g10:a0 */ - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) - flags |= GUC_WA_GAM_CREDITS; - /* Wa_14014475959 */ - if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || IS_DG2(gt->i915)) flags |= GUC_WA_HOLD_CCS_SWITCHOUT; /* - * Wa_14012197797:dg2_g10:a0,dg2_g11:a0 - * Wa_22011391025:dg2_g10,dg2_g11,dg2_g12 + * Wa_14012197797 + * Wa_22011391025 * * The same WA bit is used for both and 22011391025 is applicable to * all DG2. @@ -292,22 +305,14 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) flags |= GUC_WA_DUAL_QUEUE; /* Wa_22011802037: graphics version 11/12 */ - if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || - (GRAPHICS_VER(gt->i915) >= 11 && - GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70))) + if (intel_engine_reset_needs_wa_22011802037(gt)) flags |= GUC_WA_PRE_PARSER; - /* Wa_16011777198:dg2 */ - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) - flags |= GUC_WA_RCS_RESET_BEFORE_RC6; - /* - * Wa_22012727170:dg2_g10[a0-c0), dg2_g11[a0..) - * Wa_22012727685:dg2_g11[a0..) + * Wa_22012727170 + * Wa_22012727685 */ - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_FOREVER)) + if (IS_DG2_G11(gt->i915)) flags |= GUC_WA_CONTEXT_ISOLATION; /* Wa_16015675438 */ @@ -461,6 +466,8 @@ void intel_guc_fini(struct intel_guc *guc) if (!intel_uc_fw_is_loadable(&guc->fw)) return; + flush_work(&guc->dead_guc_worker); + if (intel_guc_slpc_is_used(guc)) intel_guc_slpc_fini(&guc->slpc); @@ -585,6 +592,20 @@ out: return ret; } +int intel_guc_crash_process_msg(struct intel_guc *guc, u32 action) +{ + if (action == INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED) + guc_err(guc, "Crash dump notification\n"); + else if (action == INTEL_GUC_ACTION_NOTIFY_EXCEPTION) + guc_err(guc, "Exception notification\n"); + else + guc_err(guc, "Unknown crash notification: 0x%04X\n", action); + + queue_work(system_unbound_wq, &guc->dead_guc_worker); + + return 0; +} + int intel_guc_to_host_process_recv_msg(struct intel_guc *guc, const u32 *payload, u32 len) { @@ -601,6 +622,9 @@ int intel_guc_to_host_process_recv_msg(struct intel_guc *guc, if (msg & INTEL_GUC_RECV_MSG_EXCEPTION) guc_err(guc, "Received early exception notification!\n"); + if (msg & (INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED | INTEL_GUC_RECV_MSG_EXCEPTION)) + queue_work(system_unbound_wq, &guc->dead_guc_worker); + return 0; } @@ -640,6 +664,8 @@ int intel_guc_suspend(struct intel_guc *guc) return 0; if (intel_guc_submission_is_used(guc)) { + flush_work(&guc->dead_guc_worker); + /* * This H2G MMIO command tears down the GuC in two steps. First it will * generate a G2H CTB for every active context indicating a reset. In @@ -745,10 +771,11 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) return ERR_CAST(obj); /* - * Wa_22016122933: For MTL the shared memory needs to be mapped - * as WC on CPU side and UC (PAT index 2) on GPU side + * Wa_22016122933: For Media version 13.0, all Media GT shared + * memory needs to be mapped as WC on CPU side and UC (PAT + * index 2) on GPU side. */ - if (IS_METEORLAKE(gt->i915)) + if (intel_gt_needs_wa_22016122933(gt)) i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); vma = i915_vma_instance(obj, >->ggtt->vm, NULL); @@ -792,8 +819,8 @@ int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size, return PTR_ERR(vma); vaddr = i915_gem_object_pin_map_unlocked(vma->obj, - i915_coherent_map_type(guc_to_gt(guc)->i915, - vma->obj, true)); + intel_gt_coherent_map_type(guc_to_gt(guc), + vma->obj, true)); if (IS_ERR(vaddr)) { i915_vma_unpin_and_release(&vma, 0); return PTR_ERR(vaddr); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 8dc291ff0093..6c392bad29c1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -266,6 +266,20 @@ struct intel_guc { unsigned long last_stat_jiffies; } timestamp; + /** + * @dead_guc_worker: Asynchronous worker thread for forcing a GuC reset. + * Specifically used when the G2H handler wants to issue a reset. Resets + * require flushing the G2H queue. So, the G2H processing itself must not + * trigger a reset directly. Instead, go via this worker. + */ + struct work_struct dead_guc_worker; + /** + * @last_dead_guc_jiffies: timestamp of previous 'dead guc' occurrance + * used to prevent a fundamentally broken system from continuously + * reloading the GuC. + */ + unsigned long last_dead_guc_jiffies; + #ifdef CONFIG_DRM_I915_SELFTEST /** * @number_guc_id_stolen: The number of guc_ids that have been stolen @@ -476,6 +490,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc, const u32 *msg, u32 len); int intel_guc_error_capture_process_msg(struct intel_guc *guc, const u32 *msg, u32 len); +int intel_guc_crash_process_msg(struct intel_guc *guc, u32 action); struct intel_engine_cs * intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index f28a3a83742d..6e22af31513a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -960,10 +960,6 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) /* now update descriptor */ WRITE_ONCE(desc->head, head); - /* - * Wa_22016122933: Making sure the head update is - * visible to GuC right away - */ intel_guc_write_barrier(ct_to_guc(ct)); return available - len; @@ -1116,12 +1112,8 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r ret = 0; break; case INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED: - CT_ERROR(ct, "Received GuC crash dump notification!\n"); - ret = 0; - break; case INTEL_GUC_ACTION_NOTIFY_EXCEPTION: - CT_ERROR(ct, "Received GuC exception notification!\n"); - ret = 0; + ret = intel_guc_crash_process_msg(guc, action); break; default: ret = -EOPNOTSUPP; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 364d0d546ec8..0f79cb658518 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -251,9 +251,11 @@ static int guc_wait_ucode(struct intel_guc *guc) if (ret == 0) ret = -ENXIO; } else if (delta_ms > 200) { - guc_warn(guc, "excessive init time: %lldms! [freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d]\n", - delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps), - before_freq, status, count, ret); + guc_warn(guc, "excessive init time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n", + delta_ms, status, count, ret); + guc_warn(guc, "excessive init time: [freq = %dMHz, before = %dMHz, perf_limit_reasons = 0x%08X]\n", + intel_rps_read_actual_frequency(&uncore->gt->rps), before_freq, + intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt))); } else { guc_dbg(guc, "init took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n", delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps), diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c index 852bea0208ce..cc9569af7f0c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c @@ -94,7 +94,7 @@ static int guc_hwconfig_fill_buffer(struct intel_guc *guc, struct intel_hwconfig static bool has_table(struct drm_i915_private *i915) { - if (IS_ALDERLAKE_P(i915) && !IS_ADLP_N(i915)) + if (IS_ALDERLAKE_P(i915) && !IS_ALDERLAKE_P_N(i915)) return true; if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) return true; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index ee9f83af7cf6..2dfb07cc4b33 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -138,17 +138,6 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) return ret > 0 ? -EPROTO : ret; } -static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id) -{ - u32 request[] = { - GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, - SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1), - id, - }; - - return intel_guc_send(guc, request, ARRAY_SIZE(request)); -} - static bool slpc_is_running(struct intel_guc_slpc *slpc) { return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING; @@ -199,15 +188,6 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value) return ret; } -static int slpc_unset_param(struct intel_guc_slpc *slpc, u8 id) -{ - struct intel_guc *guc = slpc_to_guc(slpc); - - GEM_BUG_ON(id >= SLPC_MAX_PARAM); - - return guc_action_slpc_unset_param(guc, id); -} - static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) { struct intel_guc *guc = slpc_to_guc(slpc); @@ -470,12 +450,19 @@ int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val) ret = slpc_set_param(slpc, SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, val); - if (ret) + if (ret) { guc_probe_error(slpc_to_guc(slpc), "Failed to set efficient freq(%d): %pe\n", val, ERR_PTR(ret)); - else + } else { slpc->ignore_eff_freq = val; + /* Set min to RPn when we disable efficient freq */ + if (val) + ret = slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + slpc->min_freq); + } + intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&slpc->lock); return ret; @@ -602,9 +589,8 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc) return ret; if (!slpc->min_freq_softlimit) { - ret = intel_guc_slpc_get_min_freq(slpc, &slpc->min_freq_softlimit); - if (unlikely(ret)) - return ret; + /* Min softlimit is initialized to RPn */ + slpc->min_freq_softlimit = slpc->min_freq; slpc_to_gt(slpc)->defaults.min_freq = slpc->min_freq_softlimit; } else { return intel_guc_slpc_set_min_freq(slpc, @@ -666,49 +652,6 @@ static void slpc_get_rp_values(struct intel_guc_slpc *slpc) slpc->boost_freq = slpc->rp0_freq; } -/** - * intel_guc_slpc_override_gucrc_mode() - override GUCRC mode - * @slpc: pointer to intel_guc_slpc. - * @mode: new value of the mode. - * - * This function will override the GUCRC mode. - * - * Return: 0 on success, non-zero error code on failure. - */ -int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode) -{ - int ret; - struct drm_i915_private *i915 = slpc_to_i915(slpc); - intel_wakeref_t wakeref; - - if (mode >= SLPC_GUCRC_MODE_MAX) - return -EINVAL; - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - ret = slpc_set_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE, mode); - if (ret) - guc_err(slpc_to_guc(slpc), "Override RC mode %d failed: %pe\n", - mode, ERR_PTR(ret)); - } - - return ret; -} - -int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc) -{ - struct drm_i915_private *i915 = slpc_to_i915(slpc); - intel_wakeref_t wakeref; - int ret = 0; - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - ret = slpc_unset_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE); - if (ret) - guc_err(slpc_to_guc(slpc), "Unsetting RC mode failed: %pe\n", ERR_PTR(ret)); - } - - return ret; -} - /* * intel_guc_slpc_enable() - Start SLPC * @slpc: pointer to intel_guc_slpc. @@ -755,6 +698,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) return ret; } + /* Set cached value of ignore efficient freq */ + intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq); + /* Revert SLPC min/max to softlimits if necessary */ ret = slpc_set_softlimits(slpc); if (unlikely(ret)) { @@ -765,9 +711,6 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) /* Set cached media freq ratio mode */ intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode); - /* Set cached value of ignore efficient freq */ - intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq); - return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index 597eb5413ddf..6ac6503c39d4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -44,8 +44,6 @@ int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val); void intel_guc_pm_intrmsk_enable(struct intel_gt *gt); void intel_guc_slpc_boost(struct intel_guc_slpc *slpc); void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc); -int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc); -int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode); int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index a0e3ef1c65d2..ae3495a9c814 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1433,6 +1433,36 @@ static void guc_timestamp_ping(struct work_struct *wrk) int srcu, ret; /* + * Ideally the busyness worker should take a gt pm wakeref because the + * worker only needs to be active while gt is awake. However, the + * gt_park path cancels the worker synchronously and this complicates + * the flow if the worker is also running at the same time. The cancel + * waits for the worker and when the worker releases the wakeref, that + * would call gt_park and would lead to a deadlock. + * + * The resolution is to take the global pm wakeref if runtime pm is + * already active. If not, we don't need to update the busyness stats as + * the stats would already be updated when the gt was parked. + * + * Note: + * - We do not requeue the worker if we cannot take a reference to runtime + * pm since intel_guc_busyness_unpark would requeue the worker in the + * resume path. + * + * - If the gt was parked longer than time taken for GT timestamp to roll + * over, we ignore those rollovers since we don't care about tracking + * the exact GT time. We only care about roll overs when the gt is + * active and running workloads. + * + * - There is a window of time between gt_park and runtime suspend, + * where the worker may run. This is acceptable since the worker will + * not find any new data to update busyness. + */ + wakeref = intel_runtime_pm_get_if_active(>->i915->runtime_pm); + if (!wakeref) + return; + + /* * Synchronize with gt reset to make sure the worker does not * corrupt the engine/guc stats. NB: can't actually block waiting * for a reset to complete as the reset requires flushing out @@ -1440,10 +1470,9 @@ static void guc_timestamp_ping(struct work_struct *wrk) */ ret = intel_gt_reset_trylock(gt, &srcu); if (ret) - return; + goto err_trylock; - with_intel_runtime_pm(>->i915->runtime_pm, wakeref) - __update_guc_busyness_stats(guc); + __update_guc_busyness_stats(guc); /* adjust context stats for overflow */ xa_for_each(&guc->context_lookup, index, ce) @@ -1452,6 +1481,9 @@ static void guc_timestamp_ping(struct work_struct *wrk) intel_gt_reset_unlock(gt, srcu); guc_enable_busyness_worker(guc); + +err_trylock: + intel_runtime_pm_put(>->i915->runtime_pm, wakeref); } static int guc_action_enable_usage_stats(struct intel_guc *guc) @@ -1658,9 +1690,7 @@ static void guc_engine_reset_prepare(struct intel_engine_cs *engine) * Wa_22011802037: In addition to stopping the cs, we need * to wait for any pending mi force wakeups */ - if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || - (GRAPHICS_VER(engine->i915) >= 11 && - GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) { + if (intel_engine_reset_needs_wa_22011802037(engine->gt)) { intel_engine_stop_cs(engine); intel_engine_wait_for_pending_mi_fw(engine); } @@ -4267,7 +4297,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) /* Wa_14014475959:dg2 */ if (engine->class == COMPUTE_CLASS) - if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || IS_DG2(engine->i915)) engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; @@ -5470,6 +5500,9 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, ve->base.flags = I915_ENGINE_IS_VIRTUAL; + BUILD_BUG_ON(ilog2(VIRTUAL_ENGINES) < I915_NUM_ENGINES); + ve->base.mask = VIRTUAL_ENGINES; + intel_context_init(&ve->context, &ve->base); for (n = 0; n < count; n++) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index ddd146265beb..ba9e07fc2b57 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -26,6 +26,7 @@ * The kernel driver is only responsible for loading the HuC firmware and * triggering its security authentication. This is done differently depending * on the platform: + * * - older platforms (from Gen9 to most Gen12s): the load is performed via DMA * and the authentication via GuC * - DG2: load and authentication are both performed via GSC. @@ -33,6 +34,7 @@ * not-DG2 older platforms), while the authentication is done in 2-steps, * a first auth for clear-media workloads via GuC and a second one for all * workloads via GSC. + * * On platforms where the GuC does the authentication, to correctly do so the * HuC binary must be loaded before the GuC one. * Loading the HuC is optional; however, not using the HuC might negatively @@ -265,7 +267,7 @@ static bool vcs_supported(struct intel_gt *gt) GEM_BUG_ON(!gt_is_root(gt) && !gt->info.engine_mask); if (gt_is_root(gt)) - mask = RUNTIME_INFO(gt->i915)->platform_engine_mask; + mask = INTEL_INFO(gt->i915)->platform_engine_mask; else mask = gt->info.engine_mask; @@ -308,9 +310,9 @@ void intel_huc_init_early(struct intel_huc *huc) huc->status[INTEL_HUC_AUTH_BY_GSC].mask = HUC_LOAD_SUCCESSFUL; huc->status[INTEL_HUC_AUTH_BY_GSC].value = HUC_LOAD_SUCCESSFUL; } else { - huc->status[INTEL_HUC_AUTH_BY_GSC].reg = HECI_FWSTS5(MTL_GSC_HECI1_BASE); - huc->status[INTEL_HUC_AUTH_BY_GSC].mask = HECI_FWSTS5_HUC_AUTH_DONE; - huc->status[INTEL_HUC_AUTH_BY_GSC].value = HECI_FWSTS5_HUC_AUTH_DONE; + huc->status[INTEL_HUC_AUTH_BY_GSC].reg = HECI_FWSTS(MTL_GSC_HECI1_BASE, 5); + huc->status[INTEL_HUC_AUTH_BY_GSC].mask = HECI1_FWSTS5_HUC_AUTH_DONE; + huc->status[INTEL_HUC_AUTH_BY_GSC].value = HECI1_FWSTS5_HUC_AUTH_DONE; } } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c index e608152fecfc..b648238cc675 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c @@ -27,7 +27,6 @@ struct mtl_huc_auth_msg_out { int intel_huc_fw_auth_via_gsccs(struct intel_huc *huc) { struct intel_gt *gt = huc_to_gt(huc); - struct drm_i915_private *i915 = gt->i915; struct drm_i915_gem_object *obj; struct mtl_huc_auth_msg_in *msg_in; struct mtl_huc_auth_msg_out *msg_out; @@ -43,7 +42,7 @@ int intel_huc_fw_auth_via_gsccs(struct intel_huc *huc) pkt_offset = i915_ggtt_offset(huc->heci_pkt); pkt_vaddr = i915_gem_object_pin_map_unlocked(obj, - i915_coherent_map_type(i915, obj, true)); + intel_gt_coherent_map_type(gt, obj, true)); if (IS_ERR(pkt_vaddr)) return PTR_ERR(pkt_vaddr); @@ -107,15 +106,6 @@ out_unpin: return err; } -static void get_version_from_gsc_manifest(struct intel_uc_fw_ver *ver, const void *data) -{ - const struct intel_gsc_manifest_header *manifest = data; - - ver->major = manifest->fw_version.major; - ver->minor = manifest->fw_version.minor; - ver->patch = manifest->fw_version.hotfix; -} - static bool css_valid(const void *data, size_t size) { const struct uc_css_header *css = data; @@ -227,8 +217,8 @@ int intel_huc_fw_get_binary_info(struct intel_uc_fw *huc_fw, const void *data, s for (i = 0; i < header->num_of_entries; i++, entry++) { if (strcmp(entry->name, "HUCP.man") == 0) - get_version_from_gsc_manifest(&huc_fw->file_selected.ver, - data + entry_offset(entry)); + intel_uc_fw_version_from_gsc_manifest(&huc_fw->file_selected.ver, + data + entry_offset(entry)); if (strcmp(entry->name, "huc_fw") == 0) { u32 offset = entry_offset(entry); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 18250fb64bd8..98b103375b7a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -43,7 +43,7 @@ static void uc_expand_default_options(struct intel_uc *uc) } /* Intermediate platforms are HuC authentication only */ - if (IS_ALDERLAKE_S(i915) && !IS_ADLS_RPLS(i915)) { + if (IS_ALDERLAKE_S(i915) && !IS_RAPTORLAKE_S(i915)) { i915->params.enable_guc = ENABLE_GUC_LOAD_HUC; return; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c index 2f93cc4e408a..6d541c866edb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c @@ -10,6 +10,7 @@ #include "gt/intel_gt_debugfs.h" #include "intel_guc_debugfs.h" +#include "intel_gsc_uc_debugfs.h" #include "intel_huc_debugfs.h" #include "intel_uc.h" #include "intel_uc_debugfs.h" @@ -58,6 +59,7 @@ void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root) intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), uc); + intel_gsc_uc_debugfs_register(&uc->gsc, root); intel_guc_debugfs_register(&uc->guc, root); intel_huc_debugfs_register(&uc->huc, root); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 944725e62414..32e27e9a2490 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -11,7 +11,10 @@ #include <drm/drm_print.h> #include "gem/i915_gem_lmem.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_print.h" +#include "intel_gsc_binary_headers.h" +#include "intel_gsc_fw.h" #include "intel_uc_fw.h" #include "intel_uc_fw_abi.h" #include "i915_drv.h" @@ -129,6 +132,17 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, fw_def(SKYLAKE, 0, huc_mmp(skl, 2, 0, 0)) /* + * The GSC FW has multiple version (see intel_gsc_uc.h for details); since what + * we care about is the interface, we use the compatibility version in the + * binary names. + * Same as with the GuC, a major version bump indicate a + * backward-incompatible change, while a minor version bump indicates a + * backward-compatible one, so we use only the former in the file name. + */ +#define INTEL_GSC_FIRMWARE_DEFS(fw_def, gsc_def) \ + fw_def(METEORLAKE, 0, gsc_def(mtl, 1, 0)) + +/* * Set of macros for producing a list of filenames from the above table. */ #define __MAKE_UC_FW_PATH_BLANK(prefix_, name_) \ @@ -163,6 +177,9 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, #define MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \ __MAKE_UC_FW_PATH_MMP(prefix_, "huc", major_, minor_, patch_) +#define MAKE_GSC_FW_PATH(prefix_, major_, minor_) \ + __MAKE_UC_FW_PATH_MAJOR(prefix_, "gsc", major_) + /* * All blobs need to be declared via MODULE_FIRMWARE(). * This first expansion of the table macros is solely to provide @@ -173,6 +190,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH_MAJOR, MAKE_GUC_FW_PATH_MMP) INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, MAKE_HUC_FW_PATH_MMP, MAKE_HUC_FW_PATH_GSC) +INTEL_GSC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GSC_FW_PATH) /* * The next expansion of the table macros (in __uc_fw_auto_select below) provides @@ -222,6 +240,10 @@ struct __packed uc_fw_blob { #define HUC_FW_BLOB_GSC(prefix_) \ UC_FW_BLOB_NEW(0, 0, 0, true, MAKE_HUC_FW_PATH_GSC(prefix_)) +#define GSC_FW_BLOB(prefix_, major_, minor_) \ + UC_FW_BLOB_NEW(major_, minor_, 0, true, \ + MAKE_GSC_FW_PATH(prefix_, major_, minor_)) + struct __packed uc_fw_platform_requirement { enum intel_platform p; u8 rev; /* first platform rev using this FW */ @@ -248,9 +270,14 @@ static const struct uc_fw_platform_requirement blobs_huc[] = { INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP, HUC_FW_BLOB_GSC) }; +static const struct uc_fw_platform_requirement blobs_gsc[] = { + INTEL_GSC_FIRMWARE_DEFS(MAKE_FW_LIST, GSC_FW_BLOB) +}; + static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = { [INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) }, [INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) }, + [INTEL_UC_FW_TYPE_GSC] = { blobs_gsc, ARRAY_SIZE(blobs_gsc) }, }; static void @@ -264,20 +291,12 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) bool found; /* - * GSC FW support is still not fully in place, so we're not defining - * the FW blob yet because we don't want the driver to attempt to load - * it until we're ready for it. - */ - if (uc_fw->type == INTEL_UC_FW_TYPE_GSC) - return; - - /* * The only difference between the ADL GuC FWs is the HWConfig support. * ADL-N does not support HWConfig, so we should use the same binary as * ADL-S, otherwise the GuC might attempt to fetch a config table that * does not exist. */ - if (IS_ADLP_N(i915)) + if (IS_ALDERLAKE_P_N(i915)) p = INTEL_ALDERLAKE_S; GEM_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all)); @@ -468,6 +487,17 @@ static void __uc_fw_user_override(struct drm_i915_private *i915, struct intel_uc } } +void intel_uc_fw_version_from_gsc_manifest(struct intel_uc_fw_ver *ver, + const void *data) +{ + const struct intel_gsc_manifest_header *manifest = data; + + ver->major = manifest->fw_version.major; + ver->minor = manifest->fw_version.minor; + ver->patch = manifest->fw_version.hotfix; + ver->build = manifest->fw_version.build; +} + /** * intel_uc_fw_init_early - initialize the uC object and select the firmware * @uc_fw: uC firmware @@ -668,13 +698,18 @@ static int check_gsc_manifest(struct intel_gt *gt, const struct firmware *fw, struct intel_uc_fw *uc_fw) { - if (uc_fw->type != INTEL_UC_FW_TYPE_HUC) { - gt_err(gt, "trying to GSC-parse a non-HuC binary"); + switch (uc_fw->type) { + case INTEL_UC_FW_TYPE_HUC: + intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size); + break; + case INTEL_UC_FW_TYPE_GSC: + intel_gsc_fw_get_binary_info(uc_fw, fw->data, fw->size); + break; + default: + MISSING_CASE(uc_fw->type); return -EINVAL; } - intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size); - if (uc_fw->dma_start_offset) { u32 delta = uc_fw->dma_start_offset; @@ -734,10 +769,6 @@ static int check_fw_header(struct intel_gt *gt, { int err = 0; - /* GSC FW version is queried after the FW is loaded */ - if (uc_fw->type == INTEL_UC_FW_TYPE_GSC) - return 0; - if (uc_fw->has_gsc_headers) err = check_gsc_manifest(gt, fw, uc_fw); else @@ -773,6 +804,80 @@ static int try_firmware_load(struct intel_uc_fw *uc_fw, const struct firmware ** return 0; } +static int check_mtl_huc_guc_compatibility(struct intel_gt *gt, + struct intel_uc_fw_file *huc_selected) +{ + struct intel_uc_fw_file *guc_selected = >->uc.guc.fw.file_selected; + struct intel_uc_fw_ver *huc_ver = &huc_selected->ver; + struct intel_uc_fw_ver *guc_ver = &guc_selected->ver; + bool new_huc, new_guc; + + /* we can only do this check after having fetched both GuC and HuC */ + GEM_BUG_ON(!huc_selected->path || !guc_selected->path); + + /* + * Due to changes in the authentication flow for MTL, HuC 8.5.1 or newer + * requires GuC 70.7.0 or newer. Older HuC binaries will instead require + * GuC < 70.7.0. + */ + new_huc = huc_ver->major > 8 || + (huc_ver->major == 8 && huc_ver->minor > 5) || + (huc_ver->major == 8 && huc_ver->minor == 5 && huc_ver->patch >= 1); + + new_guc = guc_ver->major > 70 || + (guc_ver->major == 70 && guc_ver->minor >= 7); + + if (new_huc != new_guc) { + UNEXPECTED(gt, "HuC %u.%u.%u is incompatible with GuC %u.%u.%u\n", + huc_ver->major, huc_ver->minor, huc_ver->patch, + guc_ver->major, guc_ver->minor, guc_ver->patch); + gt_info(gt, "MTL GuC 70.7.0+ and HuC 8.5.1+ don't work with older releases\n"); + return -ENOEXEC; + } + + return 0; +} + +int intel_uc_check_file_version(struct intel_uc_fw *uc_fw, bool *old_ver) +{ + struct intel_gt *gt = __uc_fw_to_gt(uc_fw); + struct intel_uc_fw_file *wanted = &uc_fw->file_wanted; + struct intel_uc_fw_file *selected = &uc_fw->file_selected; + int ret; + + /* + * MTL has some compatibility issues with early GuC/HuC binaries + * not working with newer ones. This is specific to MTL and we + * don't expect it to extend to other platforms. + */ + if (IS_METEORLAKE(gt->i915) && uc_fw->type == INTEL_UC_FW_TYPE_HUC) { + ret = check_mtl_huc_guc_compatibility(gt, selected); + if (ret) + return ret; + } + + if (!wanted->ver.major || !selected->ver.major) + return 0; + + /* Check the file's major version was as it claimed */ + if (selected->ver.major != wanted->ver.major) { + UNEXPECTED(gt, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", + intel_uc_fw_type_repr(uc_fw->type), selected->path, + selected->ver.major, selected->ver.minor, + wanted->ver.major, wanted->ver.minor); + if (!intel_uc_fw_is_overridden(uc_fw)) + return -ENOEXEC; + } else if (old_ver) { + if (selected->ver.minor < wanted->ver.minor) + *old_ver = true; + else if ((selected->ver.minor == wanted->ver.minor) && + (selected->ver.patch < wanted->ver.patch)) + *old_ver = true; + } + + return 0; +} + /** * intel_uc_fw_fetch - fetch uC firmware * @uc_fw: uC firmware @@ -840,25 +945,9 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) goto fail; } - if (uc_fw->file_wanted.ver.major && uc_fw->file_selected.ver.major) { - /* Check the file's major version was as it claimed */ - if (uc_fw->file_selected.ver.major != uc_fw->file_wanted.ver.major) { - UNEXPECTED(gt, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, - uc_fw->file_selected.ver.major, uc_fw->file_selected.ver.minor, - uc_fw->file_wanted.ver.major, uc_fw->file_wanted.ver.minor); - if (!intel_uc_fw_is_overridden(uc_fw)) { - err = -ENOEXEC; - goto fail; - } - } else { - if (uc_fw->file_selected.ver.minor < uc_fw->file_wanted.ver.minor) - old_ver = true; - else if ((uc_fw->file_selected.ver.minor == uc_fw->file_wanted.ver.minor) && - (uc_fw->file_selected.ver.patch < uc_fw->file_wanted.ver.patch)) - old_ver = true; - } - } + err = intel_uc_check_file_version(uc_fw, &old_ver); + if (err) + goto fail; if (old_ver && uc_fw->file_selected.ver.major) { /* Preserve the version that was really wanted */ @@ -1125,7 +1214,7 @@ static int uc_fw_rsa_data_create(struct intel_uc_fw *uc_fw) return PTR_ERR(vma); vaddr = i915_gem_object_pin_map_unlocked(vma->obj, - i915_coherent_map_type(gt->i915, vma->obj, true)); + intel_gt_coherent_map_type(gt, vma->obj, true)); if (IS_ERR(vaddr)) { i915_vma_unpin_and_release(&vma, 0); err = PTR_ERR(vaddr); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 054f02811971..9a431726c8d5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -70,6 +70,7 @@ struct intel_uc_fw_ver { u32 major; u32 minor; u32 patch; + u32 build; }; /* @@ -289,6 +290,9 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw) return __intel_uc_fw_get_upload_size(uc_fw); } +void intel_uc_fw_version_from_gsc_manifest(struct intel_uc_fw_ver *ver, + const void *data); +int intel_uc_check_file_version(struct intel_uc_fw *uc_fw, bool *old_ver); void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type, bool needs_ggtt_mapping); diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c index 1fd760539f77..bfb72143566f 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -204,9 +204,9 @@ static int intel_guc_steal_guc_ids(void *arg) if (IS_ERR(rq)) { ret = PTR_ERR(rq); rq = NULL; - if (ret != -EAGAIN) { - guc_err(guc, "Failed to create request %d: %pe\n", - context_index, ERR_PTR(ret)); + if ((ret != -EAGAIN) || !last) { + guc_err(guc, "Failed to create %srequest %d: %pe\n", + last ? "" : "first ", context_index, ERR_PTR(ret)); goto err_spin_rq; } } else { |