summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2022-11-22 13:41:11 +1000
committerDave Airlie <airlied@redhat.com>2022-11-22 13:41:11 +1000
commitfc58764bbf602b65a6f63c53e5fd6feae76c510c (patch)
tree03f5448cf5d742b8fd2980e86a33636026557ac6 /drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
parent819683a1fc2f7e64017d50caf539e7bafcb37b81 (diff)
parentaec3bb3a01de09058fbebed4821ed7d07e1ed994 (diff)
downloadlinux-fc58764bbf602b65a6f63c53e5fd6feae76c510c.tar.gz
linux-fc58764bbf602b65a6f63c53e5fd6feae76c510c.tar.bz2
linux-fc58764bbf602b65a6f63c53e5fd6feae76c510c.zip
Merge tag 'amd-drm-next-6.2-2022-11-18' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.2-2022-11-18: amdgpu: - SR-IOV fixes - Clean up DC checks - DCN 3.2.x fixes - DCN 3.1.x fixes - Don't enable degamma on asics which don't support it - IP discovery fixes - BACO fixes - Fix vbios allocation handling when vkms is enabled - Drop buggy tdr advanced mode GPU reset handling - Fix the build when DCN is not set in kconfig - MST DSC fixes - Userptr fixes - FRU and RAS EEPROM fixes - VCN 4.x RAS support - Aldrebaran CU occupancy reporting fix - PSP ring cleanup amdkfd: - Memory limit fix - Enable cooperative launch on gfx 10.3 amd-drm-next-6.2-2022-11-11: amdgpu: - SMU 13.x updates - GPUVM TLB race fix - DCN 3.1.4 updates - DCN 3.2.x updates - PSR fixes - Kerneldoc fix - Vega10 fan fix - GPUVM locking fixes in error pathes - BACO fix for Beige Goby - EEPROM I2C address cleanup - GFXOFF fix - Fix DC memory leak in error pathes - Flexible array updates - Mtype fix for GPUVM PTEs - Move Kconfig into amdgpu directory - SR-IOV updates - Fix possible memory leak in CS IOCTL error path amdkfd: - Fix possible memory overrun - CRIU fixes radeon: - ACPI ref count fix - HDA audio notifier support - Move Kconfig into radeon directory UAPI: - Add new GEM_CREATE flags to help to transition more KFD functionality to the DRM UAPI. These are used internally in the driver to align location based memory coherency requirements from memory allocated in the KFD with how we manage GPUVM PTEs. They are currently blocked in the GEM_CREATE IOCTL as we don't have a user right now. They are just used internally in the kernel driver for now for existing KFD memory allocations. So a change to the UAPI header, but no functional change in the UAPI. From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20221118170807.6505-1-alexander.deucher@amd.com Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c')
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c66
1 files changed, 63 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 0d704e302d03..97b333b230d1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -531,9 +531,12 @@ void dcn32_set_phantom_stream_timing(struct dc *dc,
unsigned int i, pipe_idx;
struct pipe_ctx *pipe;
uint32_t phantom_vactive, phantom_bp, pstate_width_fw_delay_lines;
+ unsigned int num_dpp;
unsigned int vlevel = context->bw_ctx.dml.vba.VoltageLevel;
unsigned int dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
unsigned int socclk = context->bw_ctx.dml.vba.SOCCLKPerState[vlevel];
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+ struct dc_stream_state *main_stream = ref_pipe->stream;
dc_assert_fp_enabled();
@@ -569,13 +572,23 @@ void dcn32_set_phantom_stream_timing(struct dc *dc,
phantom_vactive = get_subviewport_lines_needed_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx) +
pstate_width_fw_delay_lines + dc->caps.subvp_swath_height_margin_lines;
+ // W/A for DCC corruption with certain high resolution timings.
+ // Determing if pipesplit is used. If so, add meta_row_height to the phantom vactive.
+ num_dpp = vba->NoOfDPP[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]];
+ phantom_vactive += num_dpp > 1 ? vba->meta_row_height[vba->pipe_plane[pipe_idx]] : 0;
+
// For backporch of phantom pipe, use vstartup of the main pipe
phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
phantom_stream->dst.y = 0;
phantom_stream->dst.height = phantom_vactive;
+ /* When scaling, DML provides the end to end required number of lines for MALL.
+ * dst.height is always correct for this case, but src.height is not which causes a
+ * delta between main and phantom pipe scaling outputs. Need to adjust src.height on
+ * phantom for this case.
+ */
phantom_stream->src.y = 0;
- phantom_stream->src.height = phantom_vactive;
+ phantom_stream->src.height = (double)phantom_vactive * (double)main_stream->src.height / (double)main_stream->dst.height;
phantom_stream->timing.v_addressable = phantom_vactive;
phantom_stream->timing.v_front_porch = 1;
@@ -1228,7 +1241,7 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int pipe_cnt, int vlevel)
{
- int i, pipe_idx;
+ int i, pipe_idx, active_hubp_count = 0;
bool usr_retraining_support = false;
bool unbounded_req_enabled = false;
@@ -1273,6 +1286,8 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
if (!context->res_ctx.pipe_ctx[i].stream)
continue;
+ if (context->res_ctx.pipe_ctx[i].plane_state)
+ active_hubp_count++;
pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt,
pipe_idx);
pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt,
@@ -1298,6 +1313,16 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest;
pipe_idx++;
}
+ /* If DCN isn't making memory requests we can allow pstate change and lower clocks */
+ if (!active_hubp_count) {
+ context->bw_ctx.bw.dcn.clk.socclk_khz = 0;
+ context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
+ context->bw_ctx.bw.dcn.clk.dcfclk_khz = 0;
+ context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = 0;
+ context->bw_ctx.bw.dcn.clk.dramclk_khz = 0;
+ context->bw_ctx.bw.dcn.clk.fclk_khz = 0;
+ context->bw_ctx.bw.dcn.clk.p_state_change_support = true;
+ }
/*save a original dppclock copy*/
context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz;
context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz;
@@ -1729,6 +1754,9 @@ bool dcn32_internal_validate_bw(struct dc *dc,
}
if (repopulate_pipes) {
+ int flag_max_mpc_comb = vba->maxMpcComb;
+ int flag_vlevel = vlevel;
+
pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
/* repopulate_pipes = 1 means the pipes were either split or merged. In this case
@@ -1742,6 +1770,22 @@ bool dcn32_internal_validate_bw(struct dc *dc,
if (vlevel == context->bw_ctx.dml.soc.num_states) {
/* failed after DET size changes */
goto validate_fail;
+ } else if (flag_max_mpc_comb == 0 &&
+ flag_max_mpc_comb != context->bw_ctx.dml.vba.maxMpcComb) {
+ /* check the context constructed with pipe split flags is still valid*/
+ bool flags_valid = false;
+ for (int i = flag_vlevel; i < context->bw_ctx.dml.soc.num_states; i++) {
+ if (vba->ModeSupport[i][flag_max_mpc_comb]) {
+ vba->maxMpcComb = flag_max_mpc_comb;
+ vba->VoltageLevel = i;
+ vlevel = i;
+ flags_valid = true;
+ }
+ }
+
+ /* this should never happen */
+ if (!flags_valid)
+ goto validate_fail;
}
}
*vlevel_out = vlevel;
@@ -1800,6 +1844,12 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
*/
context->bw_ctx.dml.soc.dram_clock_change_latency_us =
dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
+ /* For DCN32/321 need to validate with fclk pstate change latency equal to dummy so
+ * prefetch is scheduled correctly to account for dummy pstate.
+ */
+ if (dummy_latency_index == 0)
+ context->bw_ctx.dml.soc.fclk_change_latency_us =
+ dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false);
maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
dcfclk_from_fw_based_mclk_switching = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
@@ -1987,6 +2037,10 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod;
+ if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && dummy_latency_index == 0)
+ context->bw_ctx.dml.soc.fclk_change_latency_us =
+ dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
+
dcn32_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
if (!pstate_en)
@@ -1994,8 +2048,12 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
context->bw_ctx.dml.soc.dram_clock_change_latency_us =
dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
- if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching)
+ if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(dc, context);
+ if (dummy_latency_index == 0)
+ context->bw_ctx.dml.soc.fclk_change_latency_us =
+ dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us;
+ }
}
static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
@@ -2351,6 +2409,8 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
/* DML DSC delay factor workaround */
dcn3_2_ip.dsc_delay_factor_wa = dc->debug.dsc_delay_factor_wa_x1000 / 1000.0;
+ dcn3_2_ip.min_prefetch_in_strobe_us = dc->debug.min_prefetch_in_strobe_ns / 1000.0;
+
/* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */
dcn3_2_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;