diff options
Diffstat (limited to 'drivers/gpu/drm/amd/display/dc/dml')
11 files changed, 393 insertions, 120 deletions
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index ea7d60f9a9b4..6042a5a6a44f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -61,8 +61,12 @@ endif endif ifneq ($(CONFIG_FRAME_WARN),0) +ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) +frame_warn_flag := -Wframe-larger-than=3072 +else frame_warn_flag := -Wframe-larger-than=2048 endif +endif CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags) diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c index 50b0434354f8..0c4a8fe8e5ca 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c @@ -30,7 +30,7 @@ #include "dcn_calc_auto.h" #include "dal_asic_id.h" #include "resource.h" -#include "dcn10/dcn10_resource.h" +#include "resource/dcn10/dcn10_resource.h" #include "dcn10/dcn10_hubbub.h" #include "dml/dml1_display_rq_dlg_calc.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h index 2cbdd75429ff..6e669a2c5b2d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h @@ -36,7 +36,7 @@ * Define the maximum amount of states supported by the ASIC. Every ASIC has a * specific number of states; this macro defines the maximum number of states. */ -#define DC__VOLTAGE_STATES 20 +#define DC__VOLTAGE_STATES 40 #define DC__NUM_DPP__4 1 #define DC__NUM_DPP__0_PRESENT 1 #define DC__NUM_DPP__1_PRESENT 1 diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index 7fc8b18096ba..38ab9ad60ef8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -33,6 +33,7 @@ #include "link.h" #include "dcn20_fpu.h" +#include "dc_state_priv.h" #define DC_LOGGER \ dc->ctx->logger @@ -440,7 +441,115 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv14_soc = { .use_urgent_burst_bw = 0 }; -struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = { 0 }; +struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = { + .clock_limits = { + { + .state = 0, + .dcfclk_mhz = 560.0, + .fabricclk_mhz = 560.0, + .dispclk_mhz = 513.0, + .dppclk_mhz = 513.0, + .phyclk_mhz = 540.0, + .socclk_mhz = 560.0, + .dscclk_mhz = 171.0, + .dram_speed_mts = 1069.0, + }, + { + .state = 1, + .dcfclk_mhz = 694.0, + .fabricclk_mhz = 694.0, + .dispclk_mhz = 642.0, + .dppclk_mhz = 642.0, + .phyclk_mhz = 600.0, + .socclk_mhz = 694.0, + .dscclk_mhz = 214.0, + .dram_speed_mts = 1324.0, + }, + { + .state = 2, + .dcfclk_mhz = 875.0, + .fabricclk_mhz = 875.0, + .dispclk_mhz = 734.0, + .dppclk_mhz = 734.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 875.0, + .dscclk_mhz = 245.0, + .dram_speed_mts = 1670.0, + }, + { + .state = 3, + .dcfclk_mhz = 1000.0, + .fabricclk_mhz = 1000.0, + .dispclk_mhz = 1100.0, + .dppclk_mhz = 1100.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1000.0, + .dscclk_mhz = 367.0, + .dram_speed_mts = 2000.0, + }, + { + .state = 4, + .dcfclk_mhz = 1200.0, + .fabricclk_mhz = 1200.0, + .dispclk_mhz = 1284.0, + .dppclk_mhz = 1284.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1200.0, + .dscclk_mhz = 428.0, + .dram_speed_mts = 2000.0, + }, + { + .state = 5, + .dcfclk_mhz = 1200.0, + .fabricclk_mhz = 1200.0, + .dispclk_mhz = 1284.0, + .dppclk_mhz = 1284.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1200.0, + .dscclk_mhz = 428.0, + .dram_speed_mts = 2000.0, + }, + }, + + .num_states = 5, + .sr_exit_time_us = 1.9, + .sr_enter_plus_exit_time_us = 4.4, + .urgent_latency_us = 3.0, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 40.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 40.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0, + .max_avg_sdp_bw_use_normal_percent = 40.0, + .max_avg_dram_bw_use_normal_percent = 40.0, + .writeback_latency_us = 12.0, + .ideal_dram_bw_after_urgent_percent = 40.0, + .max_request_size_bytes = 256, + .dram_channel_width_bytes = 16, + .fabric_datapath_to_dcn_data_return_bytes = 64, + .dcn_downspread_percent = 0.5, + .downspread_percent = 0.5, + .dram_page_open_time_ns = 50.0, + .dram_rw_turnaround_time_ns = 17.5, + .dram_return_buffer_per_channel_bytes = 8192, + .round_trip_ping_latency_dcfclk_cycles = 131, + .urgent_out_of_order_return_per_channel_bytes = 4096, + .channel_interleave_bytes = 256, + .num_banks = 8, + .num_chans = 16, + .vmm_page_size_bytes = 4096, + .dram_clock_change_latency_us = 45.0, + .writeback_dram_clock_change_latency_us = 23.0, + .return_bus_width_bytes = 64, + .dispclk_dppclk_vco_speed_mhz = 3850, + .xfc_bus_transport_time_us = 20, + .xfc_xbuf_latency_tolerance_us = 50, + .use_urgent_burst_bw = 0, +}; struct _vcs_dpi_ip_params_st dcn2_1_ip = { .odm_capable = 1, @@ -950,10 +1059,8 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc { int plane_count; int i; - unsigned int min_dst_y_next_start_us; plane_count = 0; - min_dst_y_next_start_us = 0; for (i = 0; i < dc->res_pool->pipe_count; i++) { if (context->res_ctx.pipe_ctx[i].plane_state) plane_count++; @@ -975,26 +1082,15 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) { struct dc_link *link = context->streams[0]->sink->link; struct dc_stream_status *stream_status = &context->stream_status[0]; - struct dc_stream_state *current_stream = context->streams[0]; int minmum_z8_residency = dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000; bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency; bool is_pwrseq0 = link->link_index == 0; - bool isFreesyncVideo; - - isFreesyncVideo = current_stream->adjust.v_total_min == current_stream->adjust.v_total_max; - isFreesyncVideo = isFreesyncVideo && current_stream->timing.v_total < current_stream->adjust.v_total_min; - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].stream == current_stream && isFreesyncVideo) { - min_dst_y_next_start_us = context->res_ctx.pipe_ctx[i].dlg_regs.min_dst_y_next_start_us; - break; - } - } /* Don't support multi-plane configurations */ if (stream_status->plane_count > 1) return DCN_ZSTATE_SUPPORT_DISALLOW; - if (is_pwrseq0 && (context->bw_ctx.dml.vba.StutterPeriod > 5000.0 || min_dst_y_next_start_us > 5000)) + if (is_pwrseq0 && context->bw_ctx.dml.vba.StutterPeriod > 5000.0) return DCN_ZSTATE_SUPPORT_ALLOW; else if (is_pwrseq0 && link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr) return allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY; @@ -1087,7 +1183,7 @@ void dcn20_calculate_dlg_params(struct dc *dc, pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) { // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0; context->res_ctx.pipe_ctx[i].unbounded_req = false; @@ -1437,7 +1533,7 @@ int dcn20_populate_dml_pipes_from_context(struct dc *dc, */ if (res_ctx->pipe_ctx[i].plane_state && (res_ctx->pipe_ctx[i].plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE || - res_ctx->pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM)) + dc_state_get_pipe_subvp_type(context, &res_ctx->pipe_ctx[i]) == SUBVP_PHANTOM)) pipes[pipe_cnt].pipe.src.num_cursors = 0; else pipes[pipe_cnt].pipe.src.num_cursors = dc->dml.ip.number_of_cursors; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index 3686f1e7de3a..63c48c29ba49 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -3542,7 +3542,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l { struct vba_vars_st *v = &mode_lib->vba; int MinPrefetchMode, MaxPrefetchMode; - int i; + int i, start_state; unsigned int j, k, m; bool EnoughWritebackUnits = true; bool WritebackModeSupport = true; @@ -3553,6 +3553,11 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ + if (mode_lib->validate_max_state) + start_state = v->soc.num_states - 1; + else + start_state = 0; + CalculateMinAndMaxPrefetchMode( mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &MinPrefetchMode, &MaxPrefetchMode); @@ -3851,7 +3856,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->SingleDPPViewportSizeSupportPerPlane, &v->ViewportSizeSupport[0][0]); - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { for (j = 0; j < 2; j++) { v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed); v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed); @@ -4007,7 +4012,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l /*Total Available Pipes Support Check*/ - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { for (j = 0; j < 2; j++) { if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) { v->TotalAvailablePipesSupport[i][j] = true; @@ -4046,7 +4051,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { v->RequiresDSC[i][k] = false; v->RequiresFEC[i][k] = false; @@ -4174,7 +4179,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } } - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { v->DIOSupport[i] = true; for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { if (!v->skip_dio_check[k] && v->BlendingAndTiming[k] == k && (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi) @@ -4185,7 +4190,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } - for (i = 0; i < v->soc.num_states; ++i) { + for (i = start_state; i < v->soc.num_states; ++i) { v->ODMCombine4To1SupportCheckOK[i] = true; for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 @@ -4197,7 +4202,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */ - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { v->NotEnoughDSCUnits[i] = false; v->TotalDSCUnitsRequired = 0.0; for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { @@ -4217,7 +4222,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } /*DSC Delay per state*/ - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { if (v->OutputBppPerState[i][k] == BPP_INVALID) { v->BPP = 0.0; @@ -4333,7 +4338,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; } - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { for (j = 0; j < 2; j++) { for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; @@ -5075,7 +5080,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l /*PTE Buffer Size Check*/ - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { for (j = 0; j < 2; j++) { v->PTEBufferSizeNotExceeded[i][j] = true; for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { @@ -5136,7 +5141,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } /*Mode Support, Voltage State and SOC Configuration*/ - for (i = v->soc.num_states - 1; i >= 0; i--) { + for (i = v->soc.num_states - 1; i >= start_state; i--) { for (j = 0; j < 2; j++) { if (v->ScaleRatioAndTapsSupport == 1 && v->SourceFormatPixelAndScanSupport == 1 && v->ViewportSizeSupport[i][j] == 1 && v->DIOSupport[i] == 1 && v->ODMCombine4To1SupportCheckOK[i] == 1 @@ -5158,7 +5163,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } { unsigned int MaximumMPCCombine = 0; - for (i = v->soc.num_states; i >= 0; i--) { + for (i = v->soc.num_states; i >= start_state; i--) { if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) { v->VoltageLevel = i; v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 9ec4172d1c2d..9f37f717a1f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -32,6 +32,7 @@ #include "clk_mgr/dcn32/dcn32_smu13_driver_if.h" #include "dcn30/dcn30_resource.h" #include "link.h" +#include "dc_state_priv.h" #define DC_LOGGER_INIT(logger) @@ -45,6 +46,14 @@ static const struct subvp_high_refresh_list subvp_high_refresh_list = { {.width = 1920, .height = 1080, }}, }; +static const struct subvp_active_margin_list subvp_active_margin_list = { + .min_refresh = 55, + .max_refresh = 65, + .res = { + {.width = 2560, .height = 1440, }, + {.width = 1920, .height = 1080, }}, +}; + struct _vcs_dpi_ip_params_st dcn3_2_ip = { .gpuvm_enable = 0, .gpuvm_max_page_table_levels = 4, @@ -333,7 +342,7 @@ void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, if (!pipe->stream) continue; - if (pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (pipe->plane_state && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); pipes[pipe_idx].pipe.dest.vupdate_offset = @@ -616,7 +625,7 @@ static bool dcn32_assign_subvp_pipe(struct dc *dc, if (pipe->plane_state && !pipe->top_pipe && !dcn32_is_center_timing(pipe) && !(pipe->stream->timing.pix_clk_100hz / 10000 > DCN3_2_MAX_SUBVP_PIXEL_RATE_MHZ) && (!dcn32_is_psr_capable(pipe) || (context->stream_count == 1 && dc->caps.dmub_caps.subvp_psr)) && - pipe->stream->mall_stream_config.type == SUBVP_NONE && + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_NONE && (refresh_rate < 120 || dcn32_allow_subvp_high_refresh_rate(dc, context, pipe)) && !pipe->plane_state->address.tmz_surface && (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0 || @@ -674,7 +683,7 @@ static bool dcn32_enough_pipes_for_subvp(struct dc *dc, struct dc_state *context // Find the minimum pipe split count for non SubVP pipes if (resource_is_pipe_type(pipe, OPP_HEAD) && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_NONE) { split_cnt = 0; while (pipe) { split_cnt++; @@ -727,8 +736,8 @@ static bool subvp_subvp_schedulable(struct dc *dc, struct dc_state *context) * and also to store the two main SubVP pipe pointers in subvp_pipes[2]. */ if (pipe->stream && pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) { - phantom = pipe->stream->mall_stream_config.paired_stream; + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) { + phantom = dc_state_get_paired_subvp_stream(context, pipe->stream); microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) + phantom->timing.v_addressable; @@ -796,6 +805,9 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context) int16_t stretched_drr_us = 0; int16_t drr_stretched_vblank_us = 0; int16_t max_vblank_mallregion = 0; + struct dc_stream_state *phantom_stream; + bool subvp_found = false; + bool drr_found = false; // Find SubVP pipe for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -808,8 +820,10 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context) continue; // Find the SubVP pipe - if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) + if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) { + subvp_found = true; break; + } } // Find the DRR pipe @@ -817,32 +831,37 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context) drr_pipe = &context->res_ctx.pipe_ctx[i]; // We check for master pipe only - if (!resource_is_pipe_type(pipe, OTG_MASTER) || - !resource_is_pipe_type(pipe, DPP_PIPE)) + if (!resource_is_pipe_type(drr_pipe, OTG_MASTER) || + !resource_is_pipe_type(drr_pipe, DPP_PIPE)) continue; - if (drr_pipe->stream->mall_stream_config.type == SUBVP_NONE && drr_pipe->stream->ignore_msa_timing_param && - (drr_pipe->stream->allow_freesync || drr_pipe->stream->vrr_active_variable || drr_pipe->stream->vrr_active_fixed)) + if (dc_state_get_pipe_subvp_type(context, drr_pipe) == SUBVP_NONE && drr_pipe->stream->ignore_msa_timing_param && + (drr_pipe->stream->allow_freesync || drr_pipe->stream->vrr_active_variable || drr_pipe->stream->vrr_active_fixed)) { + drr_found = true; break; + } } - main_timing = &pipe->stream->timing; - phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing; - drr_timing = &drr_pipe->stream->timing; - prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + - dc->caps.subvp_prefetch_end_to_mall_start_us; - subvp_active_us = main_timing->v_addressable * main_timing->h_total / - (double)(main_timing->pix_clk_100hz * 100) * 1000000; - drr_frame_us = drr_timing->v_total * drr_timing->h_total / - (double)(drr_timing->pix_clk_100hz * 100) * 1000000; - // P-State allow width and FW delays already included phantom_timing->v_addressable - mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; - stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; - drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total / - (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us); - max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us; + if (subvp_found && drr_found) { + phantom_stream = dc_state_get_paired_subvp_stream(context, pipe->stream); + main_timing = &pipe->stream->timing; + phantom_timing = &phantom_stream->timing; + drr_timing = &drr_pipe->stream->timing; + prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + + dc->caps.subvp_prefetch_end_to_mall_start_us; + subvp_active_us = main_timing->v_addressable * main_timing->h_total / + (double)(main_timing->pix_clk_100hz * 100) * 1000000; + drr_frame_us = drr_timing->v_total * drr_timing->h_total / + (double)(drr_timing->pix_clk_100hz * 100) * 1000000; + // P-State allow width and FW delays already included phantom_timing->v_addressable + mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; + stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; + drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total / + (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us); + max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us; + } /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis @@ -887,6 +906,8 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context) struct dc_crtc_timing *main_timing = NULL; struct dc_crtc_timing *phantom_timing = NULL; struct dc_crtc_timing *vblank_timing = NULL; + struct dc_stream_state *phantom_stream; + enum mall_stream_type pipe_mall_type; /* For SubVP + VBLANK/DRR cases, we assume there can only be * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK @@ -896,6 +917,7 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context) */ for (i = 0; i < dc->res_pool->pipe_count; i++) { pipe = &context->res_ctx.pipe_ctx[i]; + pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe); // We check for master pipe, but it shouldn't matter since we only need // the pipe for timing info (stream should be same for any pipe splits) @@ -903,18 +925,19 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context) !resource_is_pipe_type(pipe, DPP_PIPE)) continue; - if (!found && pipe->stream->mall_stream_config.type == SUBVP_NONE) { + if (!found && pipe_mall_type == SUBVP_NONE) { // Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe). vblank_index = i; found = true; } - if (!subvp_pipe && pipe->stream->mall_stream_config.type == SUBVP_MAIN) + if (!subvp_pipe && pipe_mall_type == SUBVP_MAIN) subvp_pipe = pipe; } if (found) { + phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream); main_timing = &subvp_pipe->stream->timing; - phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; + phantom_timing = &phantom_stream->timing; vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing; // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe // Also include the prefetch end to mallstart delay time @@ -969,7 +992,7 @@ static bool subvp_subvp_admissable(struct dc *dc, continue; if (pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) { refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 + pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); @@ -1018,23 +1041,23 @@ static bool subvp_validate_static_schedulability(struct dc *dc, for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe); if (!pipe->stream) continue; if (pipe->plane_state && !pipe->top_pipe) { - if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) + if (pipe_mall_type == SUBVP_MAIN) subvp_count++; - if (pipe->stream->mall_stream_config.type == SUBVP_NONE) { + if (pipe_mall_type == SUBVP_NONE) non_subvp_pipes++; - } } // Count how many planes that aren't SubVP/phantom are capable of VACTIVE // switching (SubVP + VACTIVE unsupported). In situations where we force // SubVP for a VACTIVE plane, we don't want to increment the vactive_count. if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vlevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] > 0 && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { + pipe_mall_type == SUBVP_NONE) { vactive_count++; } pipe_idx++; @@ -1070,7 +1093,7 @@ static void assign_subvp_index(struct dc *dc, struct dc_state *context) struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) && - pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN) { + dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) { pipe_ctx->subvp_index = index++; } else { pipe_ctx->subvp_index = 0; @@ -1192,13 +1215,16 @@ static bool update_pipe_slice_table_with_split_flags( */ struct pipe_ctx *pipe; bool odm; - int i; + int dc_pipe_idx, dml_pipe_idx = 0; bool updated = false; - for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; + for (dc_pipe_idx = 0; + dc_pipe_idx < dc->res_pool->pipe_count; dc_pipe_idx++) { + pipe = &context->res_ctx.pipe_ctx[dc_pipe_idx]; + if (resource_is_pipe_type(pipe, FREE_PIPE)) + continue; - if (merge[i]) { + if (merge[dc_pipe_idx]) { if (resource_is_pipe_type(pipe, OPP_HEAD)) /* merging OPP head means reducing ODM slice * count by 1 @@ -1213,17 +1239,18 @@ static bool update_pipe_slice_table_with_split_flags( updated = true; } - if (split[i]) { - odm = vba->ODMCombineEnabled[vba->pipe_plane[i]] != + if (split[dc_pipe_idx]) { + odm = vba->ODMCombineEnabled[vba->pipe_plane[dml_pipe_idx]] != dm_odm_combine_mode_disabled; if (odm && resource_is_pipe_type(pipe, OPP_HEAD)) update_slice_table_for_stream( - table, pipe->stream, split[i] - 1); + table, pipe->stream, split[dc_pipe_idx] - 1); else if (!odm && resource_is_pipe_type(pipe, DPP_PIPE)) update_slice_table_for_plane(table, pipe, - pipe->plane_state, split[i] - 1); + pipe->plane_state, split[dc_pipe_idx] - 1); updated = true; } + dml_pipe_idx++; } return updated; } @@ -1233,15 +1260,11 @@ static void update_pipes_with_slice_table(struct dc *dc, struct dc_state *contex { int i; - for (i = 0; i < table->odm_combine_count; i++) { + for (i = 0; i < table->odm_combine_count; i++) resource_update_pipes_for_stream_with_slice_count(context, dc->current_state, dc->res_pool, table->odm_combines[i].stream, table->odm_combines[i].slice_count); - /* TODO: move this into the function above */ - dcn20_build_mapped_resource(dc, context, - table->odm_combines[i].stream); - } for (i = 0; i < table->mpc_combine_count; i++) resource_update_pipes_for_plane_with_slice_count(context, @@ -1408,6 +1431,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, unsigned int dc_pipe_idx = 0; int i = 0; bool found_supported_config = false; + int vlevel_temp = 0; dc_assert_fp_enabled(); @@ -1440,13 +1464,15 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, */ if (!dc->debug.force_disable_subvp && !dc->caps.dmub_caps.gecc_enable && dcn32_all_pipes_have_stream_and_plane(dc, context) && !dcn32_mpo_in_use(context) && !dcn32_any_surfaces_rotated(dc, context) && !is_test_pattern_enabled(context) && - (*vlevel == context->bw_ctx.dml.soc.num_states || + (*vlevel == context->bw_ctx.dml.soc.num_states || (vba->DRAMSpeedPerState[*vlevel] != vba->DRAMSpeedPerState[0] && + vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported) || vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported || dc->debug.force_subvp_mclk_switch)) { dcn32_merge_pipes_for_subvp(dc, context); memset(merge, 0, MAX_PIPES * sizeof(bool)); + vlevel_temp = *vlevel; /* to re-initialize viewport after the pipe merge */ for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; @@ -1515,10 +1541,14 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, } } + if (vba->DRAMSpeedPerState[*vlevel] >= vba->DRAMSpeedPerState[vlevel_temp]) + found_supported_config = false; + // If SubVP pipe config is unsupported (or cannot be used for UCLK switching) // remove phantom pipes and repopulate dml pipes if (!found_supported_config) { - dc->res_pool->funcs->remove_phantom_pipes(dc, context, false); + dc_state_remove_phantom_streams_and_planes(dc, context); + dc_state_release_phantom_streams_and_planes(dc, context); vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported; *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); @@ -1670,7 +1700,7 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) { // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0; context->res_ctx.pipe_ctx[i].unbounded_req = false; @@ -1702,7 +1732,7 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, context->res_ctx.pipe_ctx[i].plane_state != context->res_ctx.pipe_ctx[i].top_pipe->plane_state) && context->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) { /* SS: all active surfaces stored in MALL */ - if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type != SUBVP_PHANTOM) { + if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) != SUBVP_PHANTOM) { context->bw_ctx.bw.dcn.mall_ss_size_bytes += context->res_ctx.pipe_ctx[i].surface_size_in_mall_bytes; if (context->res_ctx.pipe_ctx[i].stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED) { @@ -1916,7 +1946,8 @@ bool dcn32_internal_validate_bw(struct dc *dc, return false; // For each full update, remove all existing phantom pipes first - dc->res_pool->funcs->remove_phantom_pipes(dc, context, fast_validate); + dc_state_remove_phantom_streams_and_planes(dc, context); + dc_state_release_phantom_streams_and_planes(dc, context); dc->res_pool->funcs->update_soc_for_wm_a(dc, context); @@ -2178,6 +2209,7 @@ bool dcn32_internal_validate_bw(struct dc *dc, int i; pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); + dcn32_update_dml_pipes_odm_policy_based_on_context(dc, context, pipes); /* repopulate_pipes = 1 means the pipes were either split or merged. In this case * we have to re-calculate the DET allocation and run through DML once more to @@ -2186,7 +2218,9 @@ bool dcn32_internal_validate_bw(struct dc *dc, * */ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = dm_prefetch_support_uclk_fclk_and_stutter_if_possible; + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + if (vlevel == context->bw_ctx.dml.soc.num_states) { /* failed after DET size changes */ goto validate_fail; @@ -2231,6 +2265,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, int i, pipe_idx, vlevel_temp = 0; double dcfclk = dcn3_2_soc.clock_limits[0].dcfclk_mhz; double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + double dram_speed_from_validation = context->bw_ctx.dml.vba.DRAMSpeed; double dcfclk_from_fw_based_mclk_switching = dcfclk_from_validation; bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != dm_dram_clock_change_unsupported; @@ -2418,7 +2453,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, } if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { - min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; + min_dram_speed_mts = dram_speed_from_validation; min_dram_speed_mts_margin = 160; context->bw_ctx.dml.soc.dram_clock_change_latency_us = @@ -3294,25 +3329,24 @@ bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe) { bool allow = false; uint32_t refresh_rate = 0; + uint32_t min_refresh = subvp_active_margin_list.min_refresh; + uint32_t max_refresh = subvp_active_margin_list.max_refresh; + uint32_t i; - /* Allow subvp on displays that have active margin for 2560x1440@60hz displays - * only for now. There must be no scaling as well. - * - * For now we only enable on 2560x1440@60hz displays to enable 4K60 + 1440p60 configs - * for p-state switching. - */ - if (pipe->stream && pipe->plane_state) { - refresh_rate = (pipe->stream->timing.pix_clk_100hz * 100 + - pipe->stream->timing.v_total * pipe->stream->timing.h_total - 1) - / (double)(pipe->stream->timing.v_total * pipe->stream->timing.h_total); - if (pipe->stream->timing.v_addressable == 1440 && - pipe->stream->timing.h_addressable == 2560 && - refresh_rate >= 55 && refresh_rate <= 65 && - pipe->plane_state->src_rect.height == 1440 && - pipe->plane_state->src_rect.width == 2560 && - pipe->plane_state->dst_rect.height == 1440 && - pipe->plane_state->dst_rect.width == 2560) + for (i = 0; i < SUBVP_ACTIVE_MARGIN_LIST_LEN; i++) { + uint32_t width = subvp_active_margin_list.res[i].width; + uint32_t height = subvp_active_margin_list.res[i].height; + + refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 + + pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); + refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); + refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total); + + if (refresh_rate >= min_refresh && refresh_rate <= max_refresh && + dcn32_check_native_scaling_for_res(pipe, width, height)) { allow = true; + break; + } } return allow; } @@ -3431,7 +3465,15 @@ void dcn32_assign_fpo_vactive_candidate(struct dc *dc, const struct dc_state *co for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - if (!pipe->stream) + /* In DCN32/321, FPO uses per-pipe P-State force. + * If there's no planes, HUBP is power gated and + * therefore programming UCLK_PSTATE_FORCE does + * nothing (P-State will always be asserted naturally + * on a pipe that has HUBP power gated. Therefore we + * only want to enable FPO if the FPO pipe has both + * a stream and a plane. + */ + if (!pipe->stream || !pipe->plane_state) continue; if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index cbdfb762c10c..6c84b0fa40f4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -813,6 +813,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman (v->DRAMSpeedPerState[mode_lib->vba.VoltageLevel] <= MEM_STROBE_FREQ_MHZ || v->DCFCLKPerState[mode_lib->vba.VoltageLevel] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ? mode_lib->vba.ip.min_prefetch_in_strobe_us : 0, + mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] > 0 || mode_lib->vba.DRAMClockChangeRequirementFinal == false, + /* Output */ &v->DSTXAfterScaler[k], &v->DSTYAfterScaler[k], @@ -3317,6 +3319,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->SwathHeightCThisState[k], v->TWait, (v->DRAMSpeedPerState[i] <= MEM_STROBE_FREQ_MHZ || v->DCFCLKState[i][j] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ? mode_lib->vba.ip.min_prefetch_in_strobe_us : 0, + mode_lib->vba.PrefetchModePerState[i][j] > 0 || mode_lib->vba.DRAMClockChangeRequirementFinal == false, /* Output */ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler[k], diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index d940dfa5ae43..80fccd4999a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -3423,6 +3423,7 @@ bool dml32_CalculatePrefetchSchedule( unsigned int SwathHeightC, double TWait, double TPreReq, + bool ExtendPrefetchIfPossible, /* Output */ double *DSTXAfterScaler, double *DSTYAfterScaler, @@ -3892,12 +3893,32 @@ bool dml32_CalculatePrefetchSchedule( /* Clamp to oto for bandwidth calculation */ LinesForPrefetchBandwidth = dst_y_prefetch_oto; } else { - *DestinationLinesForPrefetch = dst_y_prefetch_equ; - TimeForFetchingMetaPTE = Tvm_equ; - TimeForFetchingRowInVBlank = Tr0_equ; - *PrefetchBandwidth = prefetch_bw_equ; - /* Clamp to equ for bandwidth calculation */ - LinesForPrefetchBandwidth = dst_y_prefetch_equ; + /* For mode programming we want to extend the prefetch as much as possible + * (up to oto, or as long as we can for equ) if we're not already applying + * the 60us prefetch requirement. This is to avoid intermittent underflow + * issues during prefetch. + * + * The prefetch extension is applied under the following scenarios: + * 1. We're in prefetch mode > 0 (i.e. we don't support MCLK switch in blank) + * 2. We're using subvp or drr methods of p-state switch, in which case we + * we don't care if prefetch takes up more of the blanking time + * + * Mode programming typically chooses the smallest prefetch time possible + * (i.e. highest bandwidth during prefetch) presumably to create margin between + * p-states / c-states that happen in vblank and prefetch. Therefore we only + * apply this prefetch extension when p-state in vblank is not required (UCLK + * p-states take up the most vblank time). + */ + if (ExtendPrefetchIfPossible && TPreReq == 0 && VStartup < MaxVStartup) { + MyError = true; + } else { + *DestinationLinesForPrefetch = dst_y_prefetch_equ; + TimeForFetchingMetaPTE = Tvm_equ; + TimeForFetchingRowInVBlank = Tr0_equ; + *PrefetchBandwidth = prefetch_bw_equ; + /* Clamp to equ for bandwidth calculation */ + LinesForPrefetchBandwidth = dst_y_prefetch_equ; + } } *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 592d174df6c6..5d34735df83d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -747,6 +747,7 @@ bool dml32_CalculatePrefetchSchedule( unsigned int SwathHeightC, double TWait, double TPreReq, + bool ExtendPrefetchIfPossible, /* Output */ double *DSTXAfterScaler, double *DSTYAfterScaler, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index a5fe523668e9..475c4ec43c01 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -124,7 +124,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 600.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 186.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 1, @@ -133,7 +133,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 2, @@ -142,7 +142,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 3, @@ -151,7 +151,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 371.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 4, @@ -160,15 +160,15 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 417.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, }, .num_states = 5, - .sr_exit_time_us = 9.0, - .sr_enter_plus_exit_time_us = 11.0, - .sr_exit_z8_time_us = 50.0, /*changed from 442.0*/ - .sr_enter_plus_exit_z8_time_us = 50.0,/*changed from 560.0*/ - .fclk_change_latency_us = 20.0, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_z8_time_us = 210.0, + .sr_enter_plus_exit_z8_time_us = 320.0, + .fclk_change_latency_us = 24.0, .usr_retraining_latency_us = 2, .writeback_latency_us = 12.0, @@ -326,9 +326,74 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc, dcn3_5_soc.dram_clock_change_latency_us = dc->debug.dram_clock_change_latency_ns / 1000.0; } + + if (dc->bb_overrides.dram_clock_change_latency_ns > 0) + dcn3_5_soc.dram_clock_change_latency_us = + dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; + + if (dc->bb_overrides.sr_exit_time_ns > 0) + dcn3_5_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0; + + if (dc->bb_overrides.sr_enter_plus_exit_time_ns > 0) + dcn3_5_soc.sr_enter_plus_exit_time_us = + dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; + + if (dc->bb_overrides.sr_exit_z8_time_ns > 0) + dcn3_5_soc.sr_exit_z8_time_us = dc->bb_overrides.sr_exit_z8_time_ns / 1000.0; + + if (dc->bb_overrides.sr_enter_plus_exit_z8_time_ns > 0) + dcn3_5_soc.sr_enter_plus_exit_z8_time_us = + dc->bb_overrides.sr_enter_plus_exit_z8_time_ns / 1000.0; + /*temp till dml2 fully work without dml1*/ dml_init_instance(&dc->dml, &dcn3_5_soc, &dcn3_5_ip, DML_PROJECT_DCN31); + + /*copy to dml2, before dml2_create*/ + if (clk_table->num_entries > 2) { + + for (i = 0; i < clk_table->num_entries; i++) { + dc->dml2_options.bbox_overrides.clks_table.num_states = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz = + clock_limits[i].dcfclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].fclk_mhz = + clock_limits[i].fabricclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz = + clock_limits[i].dispclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz = + clock_limits[i].dppclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].socclk_mhz = + clock_limits[i].socclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz = + clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz = + clock_limits[i].dtbclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dppclk_levels = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels = + clk_table->num_entries; + } + } + + /* Update latency values */ + dc->dml2_options.bbox_overrides.dram_clock_change_latency_us = dcn3_5_soc.dram_clock_change_latency_us; + + dc->dml2_options.bbox_overrides.sr_exit_latency_us = dcn3_5_soc.sr_exit_time_us; + dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us = dcn3_5_soc.sr_enter_plus_exit_time_us; + + dc->dml2_options.bbox_overrides.sr_exit_z8_time_us = dcn3_5_soc.sr_exit_z8_time_us; + dc->dml2_options.bbox_overrides.sr_enter_plus_exit_z8_time_us = dcn3_5_soc.sr_enter_plus_exit_z8_time_us; } static bool is_dual_plane(enum surface_pixel_format format) @@ -507,3 +572,37 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc, return pipe_cnt; } + +void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context) +{ + enum dcn_zstate_support_state support = DCN_ZSTATE_SUPPORT_DISALLOW; + unsigned int i, plane_count = 0; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (context->res_ctx.pipe_ctx[i].plane_state) + plane_count++; + } + + if (plane_count == 0) { + support = DCN_ZSTATE_SUPPORT_ALLOW; + } else if (plane_count == 1 && context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) { + struct dc_link *link = context->streams[0]->sink->link; + bool is_pwrseq0 = link && link->link_index == 0; + bool is_psr1 = link && link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr; + int minmum_z8_residency = + dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000; + bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency; + int minmum_z10_residency = + dc->debug.minimum_z10_residency_time > 0 ? dc->debug.minimum_z10_residency_time : 5000; + bool allow_z10 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z10_residency; + + if (is_pwrseq0 && allow_z10) + support = DCN_ZSTATE_SUPPORT_ALLOW; + else if (is_pwrseq0 && is_psr1) + support = allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY; + else if (allow_z8) + support = DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY; + } + + context->bw_ctx.bw.dcn.clk.zstate_support = support; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h index e8d5a170893e..067480fc3691 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h @@ -39,4 +39,6 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc, display_e2e_pipe_params_st *pipes, bool fast_validate); +void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context); + #endif |