diff options
author | Dave Airlie <airlied@redhat.com> | 2024-02-16 11:19:14 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2024-02-16 11:19:15 +1000 |
commit | 6f167a3673463c2b1733ff04fada65346bbc772b (patch) | |
tree | 2182d1cc143fddda5268c475599a08e87699e2c6 | |
parent | b13cfb445c91837db967148222374f591fbdb20c (diff) | |
parent | eb927f01dfb6309c8a184593c2c0618c4000c481 (diff) | |
download | linux-6f167a3673463c2b1733ff04fada65346bbc772b.tar.gz linux-6f167a3673463c2b1733ff04fada65346bbc772b.tar.bz2 linux-6f167a3673463c2b1733ff04fada65346bbc772b.zip |
Merge tag 'drm-intel-gt-next-2024-02-15' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
UAPI Changes:
- Add GuC submission interface version query (Tvrtko Ursulin)
Driver Changes:
Fixes/improvements/new stuff:
- Atomically invalidate userptr on mmu-notifier (Jonathan Cavitt)
- Update handling of MMIO triggered reports (Umesh Nerlige Ramappa)
- Don't make assumptions about intel_wakeref_t type (Jani Nikula)
- Add workaround 14019877138 [xelpg] (Tejas Upadhyay)
- Allow for very slow HuC loading [huc] (John Harrison)
- Flush context destruction worker at suspend [guc] (Alan Previn)
- Close deregister-context race against CT-loss [guc] (Alan Previn)
- Avoid circular locking issue on busyness flush [guc] (John Harrison)
- Use rc6.supported flag from intel_gt for rc6_enable sysfs (Juan Escamilla)
- Reflect the true and current status of rc6_enable (Juan Escamilla)
- Wake GT before sending H2G message [mtl] (Vinay Belgaumkar)
- Restart the heartbeat timer when forcing a pulse (John Harrison)
Future platform enablement:
- Extend driver code of Xe_LPG to Xe_LPG+ [xelpg] (Harish Chegondi)
- Extend some workarounds/tuning to gfx version 12.74 [xelpg] (Matt Roper)
Miscellaneous:
- Reconcile Excess struct member kernel-doc warnings (Randy Dunlap)
- Change wa and EU_PERF_CNTL registers to MCR type [guc] (Shuicheng Lin)
- Add flex arrays to struct i915_syncmap (Erick Archer)
- Increasing the sleep time for live_rc6_manual [selftests] (Anirban Sk)
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/Zc3iIVsiAwo+bu10@tursulin-desk
26 files changed, 301 insertions, 149 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 555022c0652c..d3a771afb083 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2160,12 +2160,6 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) #ifdef CONFIG_MMU_NOTIFIER if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) { - read_lock(&eb->i915->mm.notifier_lock); - - /* - * count is always at least 1, otherwise __EXEC_USERPTR_USED - * could not have been set - */ for (i = 0; i < count; i++) { struct eb_vma *ev = &eb->vma[i]; struct drm_i915_gem_object *obj = ev->vma->obj; @@ -2177,8 +2171,6 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) if (err) break; } - - read_unlock(&eb->i915->mm.notifier_lock); } #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 0d812f4d787d..3b27218aabe2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -28,6 +28,13 @@ void i915_gem_suspend(struct drm_i915_private *i915) GEM_TRACE("%s\n", dev_name(i915->drm.dev)); intel_wakeref_auto(&i915->runtime_pm.userfault_wakeref, 0); + /* + * On rare occasions, we've observed the fence completion triggers + * free_engines asynchronously via rcu_call. Ensure those are done. + * This path is only called on suspend, so it's an acceptable cost. + */ + rcu_barrier(); + flush_workqueue(i915->wq); /* @@ -160,6 +167,9 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) * machine in an unusable condition. */ + /* Like i915_gem_suspend, flush tasks staged from fence triggers */ + rcu_barrier(); + for_each_gt(gt, i915, i) intel_gt_suspend_late(gt); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 1d3ebdf4069b..0e21ce9d3e5a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -42,7 +42,6 @@ #include "i915_drv.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" -#include "i915_gem_userptr.h" #include "i915_scatterlist.h" #ifdef CONFIG_MMU_NOTIFIER @@ -61,36 +60,7 @@ static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni, const struct mmu_notifier_range *range, unsigned long cur_seq) { - struct drm_i915_gem_object *obj = container_of(mni, struct drm_i915_gem_object, userptr.notifier); - struct drm_i915_private *i915 = to_i915(obj->base.dev); - long r; - - if (!mmu_notifier_range_blockable(range)) - return false; - - write_lock(&i915->mm.notifier_lock); - mmu_interval_set_seq(mni, cur_seq); - - write_unlock(&i915->mm.notifier_lock); - - /* - * We don't wait when the process is exiting. This is valid - * because the object will be cleaned up anyway. - * - * This is also temporarily required as a hack, because we - * cannot currently force non-consistent batch buffers to preempt - * and reschedule by waiting on it, hanging processes on exit. - */ - if (current->flags & PF_EXITING) - return true; - - /* we will unbind on next submission, still have userptr pins */ - r = dma_resv_wait_timeout(obj->base.resv, DMA_RESV_USAGE_BOOKKEEP, false, - MAX_SCHEDULE_TIMEOUT); - if (r <= 0) - drm_err(&i915->drm, "(%ld) failed to wait for idle\n", r); - return true; } @@ -580,15 +550,3 @@ i915_gem_userptr_ioctl(struct drm_device *dev, #endif } -int i915_gem_init_userptr(struct drm_i915_private *dev_priv) -{ -#ifdef CONFIG_MMU_NOTIFIER - rwlock_init(&dev_priv->mm.notifier_lock); -#endif - - return 0; -} - -void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv) -{ -} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.h b/drivers/gpu/drm/i915/gem/i915_gem_userptr.h deleted file mode 100644 index 8dadb2f8436d..000000000000 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2021 Intel Corporation - */ - -#ifndef __I915_GEM_USERPTR_H__ -#define __I915_GEM_USERPTR_H__ - -struct drm_i915_private; - -int i915_gem_init_userptr(struct drm_i915_private *dev_priv); -void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv); - -#endif /* __I915_GEM_USERPTR_H__ */ diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 86a04afff64b..e1bf13e3d307 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -226,7 +226,7 @@ u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs) static int mtl_dummy_pipe_control(struct i915_request *rq) { /* Wa_14016712196 */ - if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 71)) || + if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(rq->i915)) { u32 *cs; @@ -822,7 +822,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) flags |= PIPE_CONTROL_FLUSH_L3; /* Wa_14016712196 */ - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915)) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915)) /* dummy PIPE_CONTROL + depth flush */ cs = gen12_emit_pipe_control(cs, 0, PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 40687806d22a..1ade568ffbfa 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1190,7 +1190,8 @@ static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine) num = ARRAY_SIZE(xelpmp_regs); } } else { - if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 71) || + if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 74) || + GRAPHICS_VER_FULL(i915) == IP_VER(12, 71) || GRAPHICS_VER_FULL(i915) == IP_VER(12, 70) || GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) || GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) { diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 1a8e2b7db013..4ae2fa0b61dd 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -290,6 +290,9 @@ static int __intel_engine_pulse(struct intel_engine_cs *engine) heartbeat_commit(rq, &attr); GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER); + /* Ensure the forced pulse gets a full period to execute */ + next_heartbeat(engine); + return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index f0dea54880af..c0b202223940 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -176,27 +176,13 @@ static u32 get_residency(struct intel_gt *gt, enum intel_rc6_res_type id) return DIV_ROUND_CLOSEST_ULL(res, 1000); } -static u8 get_rc6_mask(struct intel_gt *gt) -{ - u8 mask = 0; - - if (HAS_RC6(gt->i915)) - mask |= BIT(0); - if (HAS_RC6p(gt->i915)) - mask |= BIT(1); - if (HAS_RC6pp(gt->i915)) - mask |= BIT(2); - - return mask; -} - static ssize_t rc6_enable_show(struct kobject *kobj, struct kobj_attribute *attr, char *buff) { struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); - return sysfs_emit(buff, "%x\n", get_rc6_mask(gt)); + return sysfs_emit(buff, "%x\n", gt->rc6.enabled); } static ssize_t rc6_enable_dev_show(struct device *dev, @@ -205,7 +191,7 @@ static ssize_t rc6_enable_dev_show(struct device *dev, { struct intel_gt *gt = intel_gt_sysfs_get_drvdata(&dev->kobj, attr->attr.name); - return sysfs_emit(buff, "%x\n", get_rc6_mask(gt)); + return sysfs_emit(buff, "%x\n", gt->rc6.enabled); } static u32 __rc6_residency_ms_show(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 353f93baaca0..25c1023eb5f9 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -495,7 +495,7 @@ static unsigned int get_mocs_settings(struct drm_i915_private *i915, memset(table, 0, sizeof(struct drm_i915_mocs_table)); table->unused_entries_index = I915_MOCS_PTE; - if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 70), IP_VER(12, 71))) { + if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 70), IP_VER(12, 74))) { table->size = ARRAY_SIZE(mtl_mocs_table); table->table = mtl_mocs_table; table->n_entries = MTL_NUM_MOCS_ENTRIES; diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 7090e4be29cb..8f4b3c8af09c 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -123,7 +123,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) * temporary wa and should be removed after fixing real cause * of forcewake timeouts. */ - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) pg_enable = GEN9_MEDIA_PG_ENABLE | GEN11_MEDIA_SAMPLER_PG_ENABLE; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 3eacbc50caf8..91814e3abd5c 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -789,8 +789,13 @@ static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine, dg2_ctx_gt_tuning_init(engine, wal); - if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) || - IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER)) + /* + * Due to Wa_16014892111, the DRAW_WATERMARK tuning must be done in + * gen12_emit_indirect_ctx_rcs() rather than here on some early + * steppings. + */ + if (!(IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))) wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false); } @@ -820,6 +825,9 @@ static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_18019271663 */ wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE); + + /* Wa_14019877138 */ + wa_mcr_masked_en(wal, XEHP_PSS_CHICKEN, FD_END_COLLECT); } static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, @@ -908,7 +916,7 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, if (engine->class != RENDER_CLASS) goto done; - if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71))) + if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74))) xelpg_ctx_workarounds_init(engine, wal); else if (IS_PONTEVECCHIO(i915)) ; /* noop; none at this time */ @@ -1643,7 +1651,7 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) static void xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - /* Wa_14018778641 / Wa_18018781329 */ + /* Wa_14018575942 / Wa_18018781329 */ wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); /* Wa_22016670082 */ @@ -1710,7 +1718,7 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) */ static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal) { - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) { + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) { wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS); } @@ -1743,7 +1751,7 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) return; } - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) xelpg_gt_workarounds_init(gt, wal); else if (IS_PONTEVECCHIO(i915)) pvc_gt_workarounds_init(gt, wal); @@ -2216,7 +2224,7 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) if (engine->gt->type == GT_MEDIA) ; /* none yet */ - else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71))) + else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74))) xelpg_whitelist_build(engine); else if (IS_PONTEVECCHIO(i915)) pvc_whitelist_build(engine); @@ -2828,7 +2836,7 @@ add_render_compute_tuning_settings(struct intel_gt *gt, { struct drm_i915_private *i915 = gt->i915; - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915)) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915)) wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); /* @@ -2881,7 +2889,8 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li } if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) || - IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER)) + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER) || + IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 74), IP_VER(12, 74))) /* Wa_14017856879 */ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH); diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c index a7189c2d660c..1aa1446c8fb0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -62,12 +62,12 @@ int live_rc6_manual(void *arg) dt = ktime_get(); rc0_power = librapl_energy_uJ(); - msleep(250); + msleep(1000); rc0_power = librapl_energy_uJ() - rc0_power; dt = ktime_sub(ktime_get(), dt); res[1] = rc6_residency(rc6); if ((res[1] - res[0]) >> 10) { - pr_err("RC6 residency increased by %lldus while disabled for 250ms!\n", + pr_err("RC6 residency increased by %lldus while disabled for 1000ms!\n", (res[1] - res[0]) >> 10); err = -EINVAL; goto out_unlock; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 63724e17829a..f7372f736a77 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -377,8 +377,13 @@ static int guc_mmio_regset_init(struct temp_regset *regset, CCS_MASK(engine->gt)) ret |= GUC_MMIO_REG_ADD(gt, regset, GEN12_RCU_MODE, true); + /* + * some of the WA registers are MCR registers. As it is safe to + * use MCR form for non-MCR registers, for code simplicity, all + * WA registers are added with MCR form. + */ for (i = 0, wa = wal->list; i < wal->count; i++, wa++) - ret |= GUC_MMIO_REG_ADD(gt, regset, wa->reg, wa->masked_reg); + ret |= GUC_MCR_REG_ADD(gt, regset, wa->mcr_reg, wa->masked_reg); /* Be extra paranoid and include all whitelist registers. */ for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) @@ -394,13 +399,13 @@ static int guc_mmio_regset_init(struct temp_regset *regset, ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false); if (GRAPHICS_VER(engine->i915) >= 12) { - ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL0, false); - ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL1, false); - ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL2, false); - ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL3, false); - ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL4, false); - ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL5, false); - ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL6, false); + ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL0)), false); + ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL1)), false); + ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL2)), false); + ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL3)), false); + ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL4)), false); + ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL5)), false); + ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL6)), false); } return ret ? -1 : 0; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 0f79cb658518..52332bb14339 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -184,7 +184,7 @@ static int guc_wait_ucode(struct intel_guc *guc) * in the seconds range. However, there is a limit on how long an * individual wait_for() can wait. So wrap it in a loop. */ - before_freq = intel_rps_read_actual_frequency(&uncore->gt->rps); + before_freq = intel_rps_read_actual_frequency(>->rps); before = ktime_get(); for (count = 0; count < GUC_LOAD_RETRY_LIMIT; count++) { ret = wait_for(guc_load_done(uncore, &status, &success), 1000); @@ -192,7 +192,7 @@ static int guc_wait_ucode(struct intel_guc *guc) break; guc_dbg(guc, "load still in progress, count = %d, freq = %dMHz, status = 0x%08X [0x%02X/%02X]\n", - count, intel_rps_read_actual_frequency(&uncore->gt->rps), status, + count, intel_rps_read_actual_frequency(>->rps), status, REG_FIELD_GET(GS_BOOTROM_MASK, status), REG_FIELD_GET(GS_UKERNEL_MASK, status)); } @@ -204,7 +204,7 @@ static int guc_wait_ucode(struct intel_guc *guc) u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status); guc_info(guc, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz, ret = %d\n", - status, delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps), ret); + status, delta_ms, intel_rps_read_actual_frequency(>->rps), ret); guc_info(guc, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n", REG_FIELD_GET(GS_MIA_IN_RESET, status), bootrom, ukernel, @@ -254,11 +254,11 @@ static int guc_wait_ucode(struct intel_guc *guc) guc_warn(guc, "excessive init time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n", delta_ms, status, count, ret); guc_warn(guc, "excessive init time: [freq = %dMHz, before = %dMHz, perf_limit_reasons = 0x%08X]\n", - intel_rps_read_actual_frequency(&uncore->gt->rps), before_freq, + intel_rps_read_actual_frequency(>->rps), before_freq, intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt))); } else { guc_dbg(guc, "init took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n", - delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps), + delta_ms, intel_rps_read_actual_frequency(>->rps), before_freq, status, count, ret); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index a259f1118c5a..f3dcae4b9d45 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -236,6 +236,13 @@ set_context_destroyed(struct intel_context *ce) ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; } +static inline void +clr_context_destroyed(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_DESTROYED; +} + static inline bool context_pending_disable(struct intel_context *ce) { return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; @@ -613,6 +620,8 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc, u32 g2h_len_dw, bool loop) { + int ret; + /* * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), * so we don't handle the case where we don't get a reply because we @@ -623,7 +632,11 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc, if (g2h_len_dw) atomic_inc(&guc->outstanding_submission_g2h); - return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); + ret = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); + if (ret) + atomic_dec(&guc->outstanding_submission_g2h); + + return ret; } int intel_guc_wait_for_pending_msg(struct intel_guc *guc, @@ -1362,7 +1375,45 @@ static void guc_enable_busyness_worker(struct intel_guc *guc) static void guc_cancel_busyness_worker(struct intel_guc *guc) { - cancel_delayed_work_sync(&guc->timestamp.work); + /* + * There are many different call stacks that can get here. Some of them + * hold the reset mutex. The busyness worker also attempts to acquire the + * reset mutex. Synchronously flushing a worker thread requires acquiring + * the worker mutex. Lockdep sees this as a conflict. It thinks that the + * flush can deadlock because it holds the worker mutex while waiting for + * the reset mutex, but another thread is holding the reset mutex and might + * attempt to use other worker functions. + * + * In practice, this scenario does not exist because the busyness worker + * does not block waiting for the reset mutex. It does a try-lock on it and + * immediately exits if the lock is already held. Unfortunately, the mutex + * in question (I915_RESET_BACKOFF) is an i915 implementation which has lockdep + * annotation but not to the extent of explaining the 'might lock' is also a + * 'does not need to lock'. So one option would be to add more complex lockdep + * annotations to ignore the issue (if at all possible). A simpler option is to + * just not flush synchronously when a rest in progress. Given that the worker + * will just early exit and re-schedule itself anyway, there is no advantage + * to running it immediately. + * + * If a reset is not in progress, then the synchronous flush may be required. + * As noted many call stacks lead here, some during suspend and driver unload + * which do require a synchronous flush to make sure the worker is stopped + * before memory is freed. + * + * Trying to pass a 'need_sync' or 'in_reset' flag all the way down through + * every possible call stack is unfeasible. It would be too intrusive to many + * areas that really don't care about the GuC backend. However, there is the + * 'reset_in_progress' flag available, so just use that. + * + * And note that in the case of a reset occurring during driver unload + * (wedge_on_fini), skipping the cancel in _prepare (when the reset flag is set + * is fine because there is another cancel in _finish (when the reset flag is + * not). + */ + if (guc_to_gt(guc)->uc.reset_in_progress) + cancel_delayed_work(&guc->timestamp.work); + else + cancel_delayed_work_sync(&guc->timestamp.work); } static void __reset_guc_busyness_stats(struct intel_guc *guc) @@ -1613,6 +1664,11 @@ static void guc_flush_submissions(struct intel_guc *guc) spin_unlock_irqrestore(&sched_engine->lock, flags); } +void intel_guc_submission_flush_work(struct intel_guc *guc) +{ + flush_work(&guc->submission_state.destroyed_worker); +} + static void guc_flush_destroyed_contexts(struct intel_guc *guc); void intel_guc_submission_reset_prepare(struct intel_guc *guc) @@ -1948,8 +2004,16 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc) void intel_guc_submission_reset_finish(struct intel_guc *guc) { + /* + * Ensure the busyness worker gets cancelled even on a fatal wedge. + * Note that reset_prepare is not allowed to because it confuses lockdep. + */ + if (guc_submission_initialized(guc)) + guc_cancel_busyness_worker(guc); + /* Reset called during driver load or during wedge? */ if (unlikely(!guc_submission_initialized(guc) || + !intel_guc_is_fw_running(guc) || intel_gt_is_wedged(guc_to_gt(guc)))) { return; } @@ -3283,12 +3347,13 @@ static void guc_context_close(struct intel_context *ce) spin_unlock_irqrestore(&ce->guc_state.lock, flags); } -static inline void guc_lrc_desc_unpin(struct intel_context *ce) +static inline int guc_lrc_desc_unpin(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); struct intel_gt *gt = guc_to_gt(guc); unsigned long flags; bool disabled; + int ret; GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id)); @@ -3299,18 +3364,41 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) spin_lock_irqsave(&ce->guc_state.lock, flags); disabled = submission_disabled(guc); if (likely(!disabled)) { + /* + * Take a gt-pm ref and change context state to be destroyed. + * NOTE: a G2H IRQ that comes after will put this gt-pm ref back + */ __intel_gt_pm_get(gt); set_context_destroyed(ce); clr_context_registered(ce); } spin_unlock_irqrestore(&ce->guc_state.lock, flags); + if (unlikely(disabled)) { release_guc_id(guc, ce); __guc_context_destroy(ce); - return; + return 0; } - deregister_context(ce, ce->guc_id.id); + /* + * GuC is active, lets destroy this context, but at this point we can still be racing + * with suspend, so we undo everything if the H2G fails in deregister_context so + * that GuC reset will find this context during clean up. + */ + ret = deregister_context(ce, ce->guc_id.id); + if (ret) { + spin_lock(&ce->guc_state.lock); + set_context_registered(ce); + clr_context_destroyed(ce); + spin_unlock(&ce->guc_state.lock); + /* + * As gt-pm is awake at function entry, intel_wakeref_put_async merely decrements + * the wakeref immediately but per function spec usage call this after unlock. + */ + intel_wakeref_put_async(>->wakeref); + } + + return ret; } static void __guc_context_destroy(struct intel_context *ce) @@ -3378,7 +3466,22 @@ static void deregister_destroyed_contexts(struct intel_guc *guc) if (!ce) break; - guc_lrc_desc_unpin(ce); + if (guc_lrc_desc_unpin(ce)) { + /* + * This means GuC's CT link severed mid-way which could happen + * in suspend-resume corner cases. In this case, put the + * context back into the destroyed_contexts list which will + * get picked up on the next context deregistration event or + * purged in a GuC sanitization event (reset/unload/wedged/...). + */ + spin_lock_irqsave(&guc->submission_state.lock, flags); + list_add_tail(&ce->destroyed_link, + &guc->submission_state.destroyed_contexts); + spin_unlock_irqrestore(&guc->submission_state.lock, flags); + /* Bail now since the list might never be emptied if h2gs fail */ + break; + } + } } @@ -3389,6 +3492,17 @@ static void destroyed_worker_func(struct work_struct *w) struct intel_gt *gt = guc_to_gt(guc); intel_wakeref_t wakeref; + /* + * In rare cases we can get here via async context-free fence-signals that + * come very late in suspend flow or very early in resume flows. In these + * cases, GuC won't be ready but just skipping it here is fine as these + * pending-destroy-contexts get destroyed totally at GuC reset time at the + * end of suspend.. OR.. this worker can be picked up later on the next + * context destruction trigger after resume-completes + */ + if (!intel_guc_is_ready(guc)) + return; + with_intel_gt_pm(gt, wakeref) deregister_destroyed_contexts(guc); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h index c57b29cdb1a6..b6df75622d3b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -38,6 +38,8 @@ int intel_guc_wait_for_pending_msg(struct intel_guc *guc, bool interruptible, long timeout); +void intel_guc_submission_flush_work(struct intel_guc *guc); + static inline bool intel_guc_submission_is_supported(struct intel_guc *guc) { return guc->submission_supported; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index ba9e07fc2b57..0945b177d5f9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -6,6 +6,7 @@ #include <linux/types.h> #include "gt/intel_gt.h" +#include "gt/intel_rps.h" #include "intel_guc_reg.h" #include "intel_huc.h" #include "intel_huc_print.h" @@ -447,17 +448,68 @@ static const char *auth_mode_string(struct intel_huc *huc, return partial ? "clear media" : "all workloads"; } +/* + * Use a longer timeout for debug builds so that problems can be detected + * and analysed. But a shorter timeout for releases so that user's don't + * wait forever to find out there is a problem. Note that the only reason + * an end user should hit the timeout is in case of extreme thermal throttling. + * And a system that is that hot during boot is probably dead anyway! + */ +#if defined(CONFIG_DRM_I915_DEBUG_GEM) +#define HUC_LOAD_RETRY_LIMIT 20 +#else +#define HUC_LOAD_RETRY_LIMIT 3 +#endif + int intel_huc_wait_for_auth_complete(struct intel_huc *huc, enum intel_huc_authentication_type type) { struct intel_gt *gt = huc_to_gt(huc); - int ret; + struct intel_uncore *uncore = gt->uncore; + ktime_t before, after, delta; + int ret, count; + u64 delta_ms; + u32 before_freq; - ret = __intel_wait_for_register(gt->uncore, - huc->status[type].reg, - huc->status[type].mask, - huc->status[type].value, - 2, 50, NULL); + /* + * The KMD requests maximum frequency during driver load, however thermal + * throttling can force the frequency down to minimum (although the board + * really should never get that hot in real life!). IFWI issues have been + * seen to cause sporadic failures to grant the higher frequency. And at + * minimum frequency, the authentication time can be in the seconds range. + * Note that there is a limit on how long an individual wait_for() can wait. + * So wrap it in a loop. + */ + before_freq = intel_rps_read_actual_frequency(>->rps); + before = ktime_get(); + for (count = 0; count < HUC_LOAD_RETRY_LIMIT; count++) { + ret = __intel_wait_for_register(gt->uncore, + huc->status[type].reg, + huc->status[type].mask, + huc->status[type].value, + 2, 1000, NULL); + if (!ret) + break; + + huc_dbg(huc, "auth still in progress, count = %d, freq = %dMHz, status = 0x%08X\n", + count, intel_rps_read_actual_frequency(>->rps), + huc->status[type].reg.reg); + } + after = ktime_get(); + delta = ktime_sub(after, before); + delta_ms = ktime_to_ms(delta); + + if (delta_ms > 50) { + huc_warn(huc, "excessive auth time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n", + delta_ms, huc->status[type].reg.reg, count, ret); + huc_warn(huc, "excessive auth time: [freq = %dMHz, before = %dMHz, perf_limit_reasons = 0x%08X]\n", + intel_rps_read_actual_frequency(>->rps), before_freq, + intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt))); + } else { + huc_dbg(huc, "auth took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n", + delta_ms, intel_rps_read_actual_frequency(>->rps), + before_freq, huc->status[type].reg.reg, count, ret); + } /* mark the load process as complete even if the wait failed */ delayed_huc_load_complete(huc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 3872d309ed31..6dfe5d9456c6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -640,7 +640,7 @@ void intel_uc_reset_finish(struct intel_uc *uc) uc->reset_in_progress = false; /* Firmware expected to be running when this function is called */ - if (intel_guc_is_fw_running(guc) && intel_uc_uses_guc_submission(uc)) + if (intel_uc_uses_guc_submission(uc)) intel_guc_submission_reset_finish(guc); } @@ -690,6 +690,8 @@ void intel_uc_suspend(struct intel_uc *uc) return; } + intel_guc_submission_flush_work(guc); + with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref) { err = intel_guc_suspend(guc); if (err) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index db99c2ef66db..990eaa029d9c 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -147,7 +147,7 @@ static const char *i915_cache_level_str(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = obj_to_i915(obj); - if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 70), IP_VER(12, 71))) { + if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 70), IP_VER(12, 74))) { switch (obj->pat_index) { case 0: return " WB"; case 1: return " WT"; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 861567362abd..e81b3b2858ac 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -165,14 +165,6 @@ struct i915_gem_mm { struct notifier_block vmap_notifier; struct shrinker *shrinker; -#ifdef CONFIG_MMU_NOTIFIER - /** - * notifier_lock for mmu notifiers, memory may not be allocated - * while holding this lock. - */ - rwlock_t notifier_lock; -#endif - /* shrinker accounting, also useful for userland debugging */ u64 shrink_memory; u32 shrink_count; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 92758b6b41f0..1391c01d7663 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -48,7 +48,6 @@ #include "gem/i915_gem_object_frontbuffer.h" #include "gem/i915_gem_pm.h" #include "gem/i915_gem_region.h" -#include "gem/i915_gem_userptr.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" @@ -1165,10 +1164,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) RUNTIME_INFO(dev_priv)->page_sizes = I915_GTT_PAGE_SIZE_4K; - ret = i915_gem_init_userptr(dev_priv); - if (ret) - return ret; - for_each_gt(gt, dev_priv, i) { intel_uc_fetch_firmwares(>->uc); intel_wopcm_init(>->wopcm); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 2d695818f006..bd9d812b1afa 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3225,7 +3225,7 @@ u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915) struct intel_gt *gt = to_gt(i915); /* Wa_18013179988 */ - if (IS_DG2(i915) || IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) { + if (IS_DG2(i915) || IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) { intel_wakeref_t wakeref; u32 reg, shift; diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index fa3e937ed3f5..3baa2f54a86e 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -551,6 +551,38 @@ static int query_hwconfig_blob(struct drm_i915_private *i915, return hwconfig->size; } +static int +query_guc_submission_version(struct drm_i915_private *i915, + struct drm_i915_query_item *query) +{ + struct drm_i915_query_guc_submission_version __user *query_ptr = + u64_to_user_ptr(query->data_ptr); + struct drm_i915_query_guc_submission_version ver; + struct intel_guc *guc = &to_gt(i915)->uc.guc; + const size_t size = sizeof(ver); + int ret; + + if (!intel_uc_uses_guc_submission(&to_gt(i915)->uc)) + return -ENODEV; + + ret = copy_query_item(&ver, size, size, query); + if (ret != 0) + return ret; + + if (ver.branch || ver.major || ver.minor || ver.patch) + return -EINVAL; + + ver.branch = 0; + ver.major = guc->submission_version.major; + ver.minor = guc->submission_version.minor; + ver.patch = guc->submission_version.patch; + + if (copy_to_user(query_ptr, &ver, size)) + return -EFAULT; + + return 0; +} + static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv, struct drm_i915_query_item *query_item) = { query_topology_info, @@ -559,6 +591,7 @@ static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv, query_memregion_info, query_hwconfig_blob, query_geometry_subslices, + query_guc_submission_version, }; int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/drivers/gpu/drm/i915/i915_syncmap.c b/drivers/gpu/drm/i915/i915_syncmap.c index 60404dbb2e9f..df6437c37373 100644 --- a/drivers/gpu/drm/i915/i915_syncmap.c +++ b/drivers/gpu/drm/i915/i915_syncmap.c @@ -75,13 +75,10 @@ struct i915_syncmap { unsigned int height; unsigned int bitmap; struct i915_syncmap *parent; - /* - * Following this header is an array of either seqno or child pointers: - * union { - * u32 seqno[KSYNCMAP]; - * struct i915_syncmap *child[KSYNCMAP]; - * }; - */ + union { + DECLARE_FLEX_ARRAY(u32, seqno); + DECLARE_FLEX_ARRAY(struct i915_syncmap *, child); + }; }; /** @@ -99,13 +96,13 @@ void i915_syncmap_init(struct i915_syncmap **root) static inline u32 *__sync_seqno(struct i915_syncmap *p) { GEM_BUG_ON(p->height); - return (u32 *)(p + 1); + return p->seqno; } static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p) { GEM_BUG_ON(!p->height); - return (struct i915_syncmap **)(p + 1); + return p->child; } static inline unsigned int @@ -200,7 +197,7 @@ __sync_alloc_leaf(struct i915_syncmap *parent, u64 id) { struct i915_syncmap *p; - p = kmalloc(sizeof(*p) + KSYNCMAP * sizeof(u32), GFP_KERNEL); + p = kmalloc(struct_size(p, seqno, KSYNCMAP), GFP_KERNEL); if (unlikely(!p)) return NULL; @@ -282,7 +279,7 @@ static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno) unsigned int above; /* Insert a join above the current layer */ - next = kzalloc(sizeof(*next) + KSYNCMAP * sizeof(next), + next = kzalloc(struct_size(next, child, KSYNCMAP), GFP_KERNEL); if (unlikely(!next)) return -ENOMEM; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index dfefad5a5fec..76400e9c40f0 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1800,7 +1800,10 @@ static const struct intel_forcewake_range __mtl_fw_ranges[] = { GEN_FW_RANGE(0x24000, 0x2ffff, 0), /* 0x24000 - 0x2407f: always on 0x24080 - 0x2ffff: reserved */ - GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_GT) + GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_GT), + GEN_FW_RANGE(0x40000, 0x1901ef, 0), + GEN_FW_RANGE(0x1901f0, 0x1901f3, FORCEWAKE_GT) + /* FIXME: WA to wake GT while triggering H2G */ }; /* diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index fd4f9574d177..bd87386a8243 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -3013,6 +3013,7 @@ struct drm_i915_query_item { * - %DRM_I915_QUERY_MEMORY_REGIONS (see struct drm_i915_query_memory_regions) * - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`) * - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct drm_i915_query_topology_info) + * - %DRM_I915_QUERY_GUC_SUBMISSION_VERSION (see struct drm_i915_query_guc_submission_version) */ __u64 query_id; #define DRM_I915_QUERY_TOPOLOGY_INFO 1 @@ -3021,6 +3022,7 @@ struct drm_i915_query_item { #define DRM_I915_QUERY_MEMORY_REGIONS 4 #define DRM_I915_QUERY_HWCONFIG_BLOB 5 #define DRM_I915_QUERY_GEOMETRY_SUBSLICES 6 +#define DRM_I915_QUERY_GUC_SUBMISSION_VERSION 7 /* Must be kept compact -- no holes and well documented */ /** @@ -3567,6 +3569,16 @@ struct drm_i915_query_memory_regions { }; /** + * struct drm_i915_query_guc_submission_version - query GuC submission interface version + */ +struct drm_i915_query_guc_submission_version { + __u32 branch; + __u32 major; + __u32 minor; + __u32 patch; +}; + +/** * DOC: GuC HWCONFIG blob uAPI * * The GuC produces a blob with information about the current device. |