diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-19 16:24:24 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-19 16:24:24 -0700 |
commit | 574cc4539762561d96b456dbc0544d8898bd4c6e (patch) | |
tree | 07d84db8cf9fd30cbde6f539ce3a3f6116593e41 /drivers/gpu/drm/amd/display/dc/dml | |
parent | 3c2edc36a77420d8be05d656019dbc8c31535992 (diff) | |
parent | 945b584c94f8c665b2df3834a8a6a8faf256cd5f (diff) | |
download | linux-574cc4539762561d96b456dbc0544d8898bd4c6e.tar.gz linux-574cc4539762561d96b456dbc0544d8898bd4c6e.tar.bz2 linux-574cc4539762561d96b456dbc0544d8898bd4c6e.zip |
Merge tag 'drm-next-2019-09-18' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie:
"This is the main pull request for 5.4-rc1 merge window. I don't think
there is anything outstanding so next week should just be fixes, but
we'll see if I missed anything. I landed some fixes earlier in the
week but got delayed writing summary and sending it out, due to a mix
of sick kid and jetlag!
There are some fixes pending, but I'd rather get the main merge out of
the way instead of delaying it longer.
It's also pretty large in commit count and new amd header file size.
The largest thing is four new amdgpu products (navi12/14, arcturus and
renoir APU support).
Otherwise it's pretty much lots of work across the board, i915 has
started landing tigerlake support, lots of icelake fixes and lots of
locking reworking for future gpu support, lots of header file rework
(drmP.h is nearly gone), some old legacy hacks (DRM_WAIT_ON) have been
put into the places they are needed.
uapi:
- content protection type property for HDCP
core:
- rework include dependencies
- lots of drmP.h removals
- link rate calculation robustness fix
- make fb helper map only when required
- add connector->DDC adapter link
- DRM_WAIT_ON removed
- drop DRM_AUTH usage from drivers
dma-buf:
- reservation object fence helper
dma-fence:
- shrink dma_fence struct
- merge signal functions
- store timestamps in dma_fence
- selftests
ttm:
- embed drm_get_object struct into ttm_buffer_object
- release_notify callback
bridges:
- sii902x - audio graph card support
- tc358767 - aux data handling rework
- ti-snd64dsi86 - debugfs support, DSI mode flags support
panels:
- Support for GiantPlus GPM940B0, Sharp LQ070Y3DG3B, Ortustech
COM37H3M, Novatek NT39016, Sharp LS020B1DD01D, Raydium RM67191, Boe
Himax8279d, Sharp LD-D5116Z01B
- TI nspire, NEC NL8048HL11, LG Philips LB035Q02, Sharp LS037V7DW01,
Sony ACX565AKM, Toppoly TD028TTEC1 Toppoly TD043MTEA1
i915:
- Initial tigerlake platform support
- Locking simplification work, general all over refactoring.
- Selftests
- HDCP debug info improvements
- DSI properties
- Icelake display PLL fixes, colorspace fixes, bandwidth fixes, DSI
suspend/resume
- GuC fixes
- Perf fixes
- ElkhartLake enablement
- DP MST fixes
- GVT - command parser enhancements
amdgpu:
- add wipe memory on release flag for buffer creation
- Navi12/14 support (may be marked experimental)
- Arcturus support
- Renoir APU support
- mclk DPM for Navi
- DC display fixes
- Raven scatter/gather support
- RAS support for GFX
- Navi12 + Arcturus power features
- GPU reset for Picasso
- smu11 i2c controller support
amdkfd:
- navi12/14 support
- Arcturus support
radeon:
- kexec fix
nouveau:
- improved display color management
- detect lack of GPU power cables
vmwgfx:
- evicition priority support
- remove unused security feature
msm:
- msm8998 display support
- better async commit support for cursor updates
etnaviv:
- per-process address space support
- performance counter fixes
- softpin support
mcde:
- DCS transfers fix
exynos:
- drmP.h cleanup
lima:
- reduce logging
kirin:
- misc clenaups
komeda:
- dual-link support
- DT memory regions
hisilicon:
- misc fixes
imx:
- IPUv3 image converter fixes
- 32-bit RGB V4L2 pixel format support
ingenic:
- more support for panel related cases
mgag200:
- cursor support fix
panfrost:
- export GPU features register to userspace
- gpu heap allocations
- per-fd address space support
pl111:
- CLD pads wiring support removed from DT
rockchip:
- rework to use DRM PSR helpers
- fix bug in VOP_WIN_GET macro
- DSI DT binding rework
sun4i:
- improve support for color encoding and range
- DDC enabled GPIO
tinydrm:
- rework SPI support
- improve MIPI-DBI support
- moved to drm/tiny
vkms:
- rework CRC tracking
dw-hdmi:
- get_eld and i2s improvements
gm12u320:
- misc fixes
meson:
- global code cleanup
- vpu feature detect
omap:
- alpha/pixel blend mode properties
rcar-du:
- misc fixes"
* tag 'drm-next-2019-09-18' of git://anongit.freedesktop.org/drm/drm: (2112 commits)
drm/nouveau/bar/gm20b: Avoid BAR1 teardown during init
drm/nouveau: Fix ordering between TTM and GEM release
drm/nouveau/prime: Extend DMA reservation object lock
drm/nouveau: Fix fallout from reservation object rework
drm/nouveau/kms/nv50-: Don't create MSTMs for eDP connectors
drm/i915: Use NOEVICT for first pass on attemping to pin a GGTT mmap
drm/i915: to make vgpu ppgtt notificaiton as atomic operation
drm/i915: Flush the existing fence before GGTT read/write
drm/i915: Hold irq-off for the entire fake lock period
drm/i915/gvt: update RING_START reg of vGPU when the context is submitted to i915
drm/i915/gvt: update vgpu workload head pointer correctly
drm/mcde: Fix DSI transfers
drm/msm: Use the correct dma_sync calls harder
drm/msm: remove unlikely() from WARN_ON() conditions
drm/msm/dsi: Fix return value check for clk_get_parent
drm/msm: add atomic traces
drm/msm/dpu: async commit support
drm/msm: async commit support
drm/msm: split power control from prepare/complete_commit
drm/msm: add kms->flush_commit()
...
Diffstat (limited to 'drivers/gpu/drm/amd/display/dc/dml')
15 files changed, 15101 insertions, 8 deletions
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 0bb7a20675c4..af2a864a6da0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -32,12 +32,22 @@ endif dml_ccflags := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_CLANG +dml_ccflags += -msse2 +endif + CFLAGS_display_mode_lib.o := $(dml_ccflags) ifdef CONFIG_DRM_AMD_DC_DCN2_0 CFLAGS_display_mode_vba.o := $(dml_ccflags) CFLAGS_display_mode_vba_20.o := $(dml_ccflags) CFLAGS_display_rq_dlg_calc_20.o := $(dml_ccflags) +CFLAGS_display_mode_vba_20v2.o := $(dml_ccflags) +CFLAGS_display_rq_dlg_calc_20v2.o := $(dml_ccflags) +endif +ifdef CONFIG_DRM_AMD_DC_DCN2_1 +CFLAGS_display_mode_vba_21.o := $(dml_ccflags) +CFLAGS_display_rq_dlg_calc_21.o := $(dml_ccflags) endif ifdef CONFIG_DRM_AMD_DCN3AG CFLAGS_display_mode_vba_3ag.o := $(dml_ccflags) @@ -51,7 +61,12 @@ DML = display_mode_lib.o display_rq_dlg_helpers.o dml1_display_rq_dlg_calc.o \ ifdef CONFIG_DRM_AMD_DC_DCN2_0 DML += display_mode_vba.o dcn20/display_rq_dlg_calc_20.o dcn20/display_mode_vba_20.o +DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o endif +ifdef CONFIG_DRM_AMD_DC_DCN2_1 +DML += dcn21/display_rq_dlg_calc_21.o dcn21/display_mode_vba_21.o +endif + AMD_DAL_DML = $(addprefix $(AMDDALPATH)/dc/dml/,$(DML)) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c new file mode 100644 index 000000000000..0fafd693ffb4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c @@ -0,0 +1,5136 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "../display_mode_lib.h" +#include "display_mode_vba_20v2.h" +#include "../dml_inline_defs.h" + +/* + * NOTE: + * This file is gcc-parseable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + +#define BPP_INVALID 0 +#define BPP_BLENDED_PIPE 0xffffffff + +static double adjust_ReturnBW( + struct display_mode_lib *mode_lib, + double ReturnBW, + bool DCCEnabledAnyPlane, + double ReturnBandwidthToDCN); +static unsigned int dscceComputeDelay( + unsigned int bpc, + double bpp, + unsigned int sliceWidth, + unsigned int numSlices, + enum output_format_class pixelFormat); +static unsigned int dscComputeDelay(enum output_format_class pixelFormat); +static bool CalculateDelayAfterScaler( + struct display_mode_lib *mode_lib, + double ReturnBW, + double ReadBandwidthPlaneLuma, + double ReadBandwidthPlaneChroma, + double TotalDataReadBandwidth, + double DisplayPipeLineDeliveryTimeLuma, + double DisplayPipeLineDeliveryTimeChroma, + double DPPCLK, + double DISPCLK, + double PixelClock, + unsigned int DSCDelay, + unsigned int DPPPerPlane, + bool ScalerEnabled, + unsigned int NumberOfCursors, + double DPPCLKDelaySubtotal, + double DPPCLKDelaySCL, + double DPPCLKDelaySCLLBOnly, + double DPPCLKDelayCNVCFormater, + double DPPCLKDelayCNVCCursor, + double DISPCLKDelaySubtotal, + unsigned int ScalerRecoutWidth, + enum output_format_class OutputFormat, + unsigned int HTotal, + unsigned int SwathWidthSingleDPPY, + double BytePerPixelDETY, + double BytePerPixelDETC, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + double *DSTXAfterScaler, + double *DSTYAfterScaler + ); +// Super monster function with some 45 argument +static bool CalculatePrefetchSchedule( + struct display_mode_lib *mode_lib, + double DPPCLK, + double DISPCLK, + double PixelClock, + double DCFCLKDeepSleep, + unsigned int DPPPerPlane, + unsigned int NumberOfCursors, + unsigned int VBlank, + unsigned int HTotal, + unsigned int MaxInterDCNTileRepeaters, + unsigned int VStartup, + unsigned int PageTableLevels, + bool GPUVMEnable, + bool DynamicMetadataEnable, + unsigned int DynamicMetadataLinesBeforeActiveRequired, + unsigned int DynamicMetadataTransmittedBytes, + bool DCCEnable, + double UrgentLatencyPixelDataOnly, + double UrgentExtraLatency, + double TCalc, + unsigned int PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + double PrefetchSourceLinesY, + unsigned int SwathWidthY, + double BytePerPixelDETY, + double VInitPreFillY, + unsigned int MaxNumSwathY, + double PrefetchSourceLinesC, + double BytePerPixelDETC, + double VInitPreFillC, + unsigned int MaxNumSwathC, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double TWait, + bool XFCEnabled, + double XFCRemoteSurfaceFlipDelay, + bool InterlaceEnable, + bool ProgressiveToInterlaceUnitInOPP, + double DSTXAfterScaler, + double DSTYAfterScaler, + double *DestinationLinesForPrefetch, + double *PrefetchBandwidth, + double *DestinationLinesToRequestVMInVBlank, + double *DestinationLinesToRequestRowInVBlank, + double *VRatioPrefetchY, + double *VRatioPrefetchC, + double *RequiredPrefetchPixDataBW, + double *Tno_bw, + unsigned int *VUpdateOffsetPix, + double *VUpdateWidthPix, + double *VReadyOffsetPix); +static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); +static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); +static double CalculatePrefetchSourceLines( + struct display_mode_lib *mode_lib, + double VRatio, + double vtaps, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + unsigned int SwathHeight, + unsigned int ViewportYStart, + double *VInitPreFill, + unsigned int *MaxNumSwath); +static unsigned int CalculateVMAndRowBytes( + struct display_mode_lib *mode_lib, + bool DCCEnable, + unsigned int BlockHeight256Bytes, + unsigned int BlockWidth256Bytes, + enum source_format_class SourcePixelFormat, + unsigned int SurfaceTiling, + unsigned int BytePerPixel, + enum scan_direction_class ScanDirection, + unsigned int ViewportWidth, + unsigned int ViewportHeight, + unsigned int SwathWidthY, + bool GPUVMEnable, + unsigned int VMMPageSize, + unsigned int PTEBufferSizeInRequestsLuma, + unsigned int PDEProcessingBufIn64KBReqs, + unsigned int Pitch, + unsigned int DCCMetaPitch, + unsigned int *MacroTileWidth, + unsigned int *MetaRowByte, + unsigned int *PixelPTEBytesPerRow, + bool *PTEBufferSizeNotExceeded, + unsigned int *dpte_row_height, + unsigned int *meta_row_height); +static double CalculateTWait( + unsigned int PrefetchMode, + double DRAMClockChangeLatency, + double UrgentLatencyPixelDataOnly, + double SREnterPlusExitTime); +static double CalculateRemoteSurfaceFlipDelay( + struct display_mode_lib *mode_lib, + double VRatio, + double SwathWidth, + double Bpp, + double LineTime, + double XFCTSlvVupdateOffset, + double XFCTSlvVupdateWidth, + double XFCTSlvVreadyOffset, + double XFCXBUFLatencyTolerance, + double XFCFillBWOverhead, + double XFCSlvChunkSize, + double XFCBusTransportTime, + double TCalc, + double TWait, + double *SrcActiveDrainRate, + double *TInitXFill, + double *TslvChk); +static void CalculateActiveRowBandwidth( + bool GPUVMEnable, + enum source_format_class SourcePixelFormat, + double VRatio, + bool DCCEnable, + double LineTime, + unsigned int MetaRowByteLuma, + unsigned int MetaRowByteChroma, + unsigned int meta_row_height_luma, + unsigned int meta_row_height_chroma, + unsigned int PixelPTEBytesPerRowLuma, + unsigned int PixelPTEBytesPerRowChroma, + unsigned int dpte_row_height_luma, + unsigned int dpte_row_height_chroma, + double *meta_row_bw, + double *dpte_row_bw, + double *qual_row_bw); +static void CalculateFlipSchedule( + struct display_mode_lib *mode_lib, + double UrgentExtraLatency, + double UrgentLatencyPixelDataOnly, + unsigned int GPUVMMaxPageTableLevels, + bool GPUVMEnable, + double BandwidthAvailableForImmediateFlip, + unsigned int TotImmediateFlipBytes, + enum source_format_class SourcePixelFormat, + unsigned int ImmediateFlipBytes, + double LineTime, + double VRatio, + double Tno_bw, + double PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + bool DCCEnable, + unsigned int dpte_row_height, + unsigned int meta_row_height, + double qual_row_bw, + double *DestinationLinesToRequestVMInImmediateFlip, + double *DestinationLinesToRequestRowInImmediateFlip, + double *final_flip_bw, + bool *ImmediateFlipSupportedForPipe); +static double CalculateWriteBackDelay( + enum source_format_class WritebackPixelFormat, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackLumaHTaps, + unsigned int WritebackLumaVTaps, + unsigned int WritebackChromaHTaps, + unsigned int WritebackChromaVTaps, + unsigned int WritebackDestinationWidth); + +static void dml20v2_DisplayPipeConfiguration(struct display_mode_lib *mode_lib); +static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( + struct display_mode_lib *mode_lib); + +void dml20v2_recalculate(struct display_mode_lib *mode_lib) +{ + ModeSupportAndSystemConfiguration(mode_lib); + mode_lib->vba.FabricAndDRAMBandwidth = dml_min( + mode_lib->vba.DRAMSpeed * mode_lib->vba.NumberOfChannels * mode_lib->vba.DRAMChannelWidth, + mode_lib->vba.FabricClock * mode_lib->vba.FabricDatapathToDCNDataReturn) / 1000.0; + PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); + dml20v2_DisplayPipeConfiguration(mode_lib); + dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); +} + +static double adjust_ReturnBW( + struct display_mode_lib *mode_lib, + double ReturnBW, + bool DCCEnabledAnyPlane, + double ReturnBandwidthToDCN) +{ + double CriticalCompression; + + if (DCCEnabledAnyPlane + && ReturnBandwidthToDCN + > mode_lib->vba.DCFCLK * mode_lib->vba.ReturnBusWidth / 4.0) + ReturnBW = + dml_min( + ReturnBW, + ReturnBandwidthToDCN * 4 + * (1.0 + - mode_lib->vba.UrgentLatencyPixelDataOnly + / ((mode_lib->vba.ROBBufferSizeInKByte + - mode_lib->vba.PixelChunkSizeInKByte) + * 1024 + / ReturnBandwidthToDCN + - mode_lib->vba.DCFCLK + * mode_lib->vba.ReturnBusWidth + / 4) + + mode_lib->vba.UrgentLatencyPixelDataOnly)); + + CriticalCompression = 2.0 * mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK + * mode_lib->vba.UrgentLatencyPixelDataOnly + / (ReturnBandwidthToDCN * mode_lib->vba.UrgentLatencyPixelDataOnly + + (mode_lib->vba.ROBBufferSizeInKByte + - mode_lib->vba.PixelChunkSizeInKByte) + * 1024); + + if (DCCEnabledAnyPlane && CriticalCompression > 1.0 && CriticalCompression < 4.0) + ReturnBW = + dml_min( + ReturnBW, + 4.0 * ReturnBandwidthToDCN + * (mode_lib->vba.ROBBufferSizeInKByte + - mode_lib->vba.PixelChunkSizeInKByte) + * 1024 + * mode_lib->vba.ReturnBusWidth + * mode_lib->vba.DCFCLK + * mode_lib->vba.UrgentLatencyPixelDataOnly + / dml_pow( + (ReturnBandwidthToDCN + * mode_lib->vba.UrgentLatencyPixelDataOnly + + (mode_lib->vba.ROBBufferSizeInKByte + - mode_lib->vba.PixelChunkSizeInKByte) + * 1024), + 2)); + + return ReturnBW; +} + +static unsigned int dscceComputeDelay( + unsigned int bpc, + double bpp, + unsigned int sliceWidth, + unsigned int numSlices, + enum output_format_class pixelFormat) +{ + // valid bpc = source bits per component in the set of {8, 10, 12} + // valid bpp = increments of 1/16 of a bit + // min = 6/7/8 in N420/N422/444, respectively + // max = such that compression is 1:1 + //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) + //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} + //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} + + // fixed value + unsigned int rcModelSize = 8192; + + // N422/N420 operate at 2 pixels per clock + unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, l, + Delay, pixels; + + if (pixelFormat == dm_n422 || pixelFormat == dm_420) + pixelsPerClock = 2; + // #all other modes operate at 1 pixel per clock + else + pixelsPerClock = 1; + + //initial transmit delay as per PPS + initalXmitDelay = dml_round(rcModelSize / 2.0 / bpp / pixelsPerClock); + + //compute ssm delay + if (bpc == 8) + D = 81; + else if (bpc == 10) + D = 89; + else + D = 113; + + //divide by pixel per cycle to compute slice width as seen by DSC + w = sliceWidth / pixelsPerClock; + + //422 mode has an additional cycle of delay + if (pixelFormat == dm_s422) + s = 1; + else + s = 0; + + //main calculation for the dscce + ix = initalXmitDelay + 45; + wx = (w + 2) / 3; + p = 3 * wx - w; + l0 = ix / w; + a = ix + p * l0; + ax = (a + 2) / 3 + D + 6 + 1; + l = (ax + wx - 1) / wx; + if ((ix % w) == 0 && p != 0) + lstall = 1; + else + lstall = 0; + Delay = l * wx * (numSlices - 1) + ax + s + lstall + 22; + + //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels + pixels = Delay * 3 * pixelsPerClock; + return pixels; +} + +static unsigned int dscComputeDelay(enum output_format_class pixelFormat) +{ + unsigned int Delay = 0; + + if (pixelFormat == dm_420) { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 0; + // dscc - input deserializer + Delay = Delay + 3; + // dscc gets pixels every other cycle + Delay = Delay + 2; + // dscc - input cdc fifo + Delay = Delay + 12; + // dscc gets pixels every other cycle + Delay = Delay + 13; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 7; + // dscc gets pixels every other cycle + Delay = Delay + 3; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output serializer + Delay = Delay + 1; + // sft + Delay = Delay + 1; + } else if (pixelFormat == dm_n422) { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 1; + // dscc - input deserializer + Delay = Delay + 5; + // dscc - input cdc fifo + Delay = Delay + 25; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 10; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output serializer + Delay = Delay + 1; + // sft + Delay = Delay + 1; + } else { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 0; + // dscc - input deserializer + Delay = Delay + 3; + // dscc - input cdc fifo + Delay = Delay + 12; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 7; + // dscc - output serializer + Delay = Delay + 1; + // dscc - cdc uncertainty + Delay = Delay + 2; + // sft + Delay = Delay + 1; + } + + return Delay; +} + +static bool CalculateDelayAfterScaler( + struct display_mode_lib *mode_lib, + double ReturnBW, + double ReadBandwidthPlaneLuma, + double ReadBandwidthPlaneChroma, + double TotalDataReadBandwidth, + double DisplayPipeLineDeliveryTimeLuma, + double DisplayPipeLineDeliveryTimeChroma, + double DPPCLK, + double DISPCLK, + double PixelClock, + unsigned int DSCDelay, + unsigned int DPPPerPlane, + bool ScalerEnabled, + unsigned int NumberOfCursors, + double DPPCLKDelaySubtotal, + double DPPCLKDelaySCL, + double DPPCLKDelaySCLLBOnly, + double DPPCLKDelayCNVCFormater, + double DPPCLKDelayCNVCCursor, + double DISPCLKDelaySubtotal, + unsigned int ScalerRecoutWidth, + enum output_format_class OutputFormat, + unsigned int HTotal, + unsigned int SwathWidthSingleDPPY, + double BytePerPixelDETY, + double BytePerPixelDETC, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + double *DSTXAfterScaler, + double *DSTYAfterScaler + ) +{ + unsigned int DPPCycles, DISPCLKCycles; + double DataFabricLineDeliveryTimeLuma; + double DataFabricLineDeliveryTimeChroma; + double DSTTotalPixelsAfterScaler; + + DataFabricLineDeliveryTimeLuma = SwathWidthSingleDPPY * SwathHeightY * dml_ceil(BytePerPixelDETY, 1) / (mode_lib->vba.ReturnBW * ReadBandwidthPlaneLuma / TotalDataReadBandwidth); + mode_lib->vba.LastPixelOfLineExtraWatermark = dml_max(mode_lib->vba.LastPixelOfLineExtraWatermark, DataFabricLineDeliveryTimeLuma - DisplayPipeLineDeliveryTimeLuma); + + if (BytePerPixelDETC != 0) { + DataFabricLineDeliveryTimeChroma = SwathWidthSingleDPPY / 2 * SwathHeightC * dml_ceil(BytePerPixelDETC, 2) / (mode_lib->vba.ReturnBW * ReadBandwidthPlaneChroma / TotalDataReadBandwidth); + mode_lib->vba.LastPixelOfLineExtraWatermark = dml_max(mode_lib->vba.LastPixelOfLineExtraWatermark, DataFabricLineDeliveryTimeChroma - DisplayPipeLineDeliveryTimeChroma); + } + + if (ScalerEnabled) + DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCL; + else + DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCLLBOnly; + + DPPCycles = DPPCycles + DPPCLKDelayCNVCFormater + NumberOfCursors * DPPCLKDelayCNVCCursor; + + DISPCLKCycles = DISPCLKDelaySubtotal; + + if (DPPCLK == 0.0 || DISPCLK == 0.0) + return true; + + *DSTXAfterScaler = DPPCycles * PixelClock / DPPCLK + DISPCLKCycles * PixelClock / DISPCLK + + DSCDelay; + + if (DPPPerPlane > 1) + *DSTXAfterScaler = *DSTXAfterScaler + ScalerRecoutWidth; + + if (OutputFormat == dm_420 || (Interlace && ProgressiveToInterlaceUnitInOPP)) + *DSTYAfterScaler = 1; + else + *DSTYAfterScaler = 0; + + DSTTotalPixelsAfterScaler = ((double) (*DSTYAfterScaler * HTotal)) + *DSTXAfterScaler; + *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / HTotal, 1); + *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * HTotal)); + + return true; +} + +static bool CalculatePrefetchSchedule( + struct display_mode_lib *mode_lib, + double DPPCLK, + double DISPCLK, + double PixelClock, + double DCFCLKDeepSleep, + unsigned int DPPPerPlane, + unsigned int NumberOfCursors, + unsigned int VBlank, + unsigned int HTotal, + unsigned int MaxInterDCNTileRepeaters, + unsigned int VStartup, + unsigned int PageTableLevels, + bool GPUVMEnable, + bool DynamicMetadataEnable, + unsigned int DynamicMetadataLinesBeforeActiveRequired, + unsigned int DynamicMetadataTransmittedBytes, + bool DCCEnable, + double UrgentLatencyPixelDataOnly, + double UrgentExtraLatency, + double TCalc, + unsigned int PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + double PrefetchSourceLinesY, + unsigned int SwathWidthY, + double BytePerPixelDETY, + double VInitPreFillY, + unsigned int MaxNumSwathY, + double PrefetchSourceLinesC, + double BytePerPixelDETC, + double VInitPreFillC, + unsigned int MaxNumSwathC, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double TWait, + bool XFCEnabled, + double XFCRemoteSurfaceFlipDelay, + bool InterlaceEnable, + bool ProgressiveToInterlaceUnitInOPP, + double DSTXAfterScaler, + double DSTYAfterScaler, + double *DestinationLinesForPrefetch, + double *PrefetchBandwidth, + double *DestinationLinesToRequestVMInVBlank, + double *DestinationLinesToRequestRowInVBlank, + double *VRatioPrefetchY, + double *VRatioPrefetchC, + double *RequiredPrefetchPixDataBW, + double *Tno_bw, + unsigned int *VUpdateOffsetPix, + double *VUpdateWidthPix, + double *VReadyOffsetPix) +{ + bool MyError = false; + double TotalRepeaterDelayTime; + double Tdm, LineTime, Tsetup; + double dst_y_prefetch_equ; + double Tsw_oto; + double prefetch_bw_oto; + double Tvm_oto; + double Tr0_oto; + double Tpre_oto; + double dst_y_prefetch_oto; + double TimeForFetchingMetaPTE = 0; + double TimeForFetchingRowInVBlank = 0; + double LinesToRequestPrefetchPixelData = 0; + + *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1); + TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2.0 / DPPCLK + 3.0 / DISPCLK); + *VUpdateWidthPix = (14.0 / DCFCLKDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) + * PixelClock; + + *VReadyOffsetPix = dml_max( + 150.0 / DPPCLK, + TotalRepeaterDelayTime + 20.0 / DCFCLKDeepSleep + 10.0 / DPPCLK) + * PixelClock; + + Tsetup = (double) (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; + + LineTime = (double) HTotal / PixelClock; + + if (DynamicMetadataEnable) { + double Tdmbf, Tdmec, Tdmsks; + + Tdm = dml_max(0.0, UrgentExtraLatency - TCalc); + Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK; + Tdmec = LineTime; + if (DynamicMetadataLinesBeforeActiveRequired == 0) + Tdmsks = VBlank * LineTime / 2.0; + else + Tdmsks = DynamicMetadataLinesBeforeActiveRequired * LineTime; + if (InterlaceEnable && !ProgressiveToInterlaceUnitInOPP) + Tdmsks = Tdmsks / 2; + if (VStartup * LineTime + < Tsetup + TWait + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) { + MyError = true; + } + } else + Tdm = 0; + + if (GPUVMEnable) { + if (PageTableLevels == 4) + *Tno_bw = UrgentExtraLatency + UrgentLatencyPixelDataOnly; + else if (PageTableLevels == 3) + *Tno_bw = UrgentExtraLatency; + else + *Tno_bw = 0; + } else if (DCCEnable) + *Tno_bw = LineTime; + else + *Tno_bw = LineTime / 4; + + dst_y_prefetch_equ = VStartup - dml_max(TCalc + TWait, XFCRemoteSurfaceFlipDelay) / LineTime + - (Tsetup + Tdm) / LineTime + - (DSTYAfterScaler + DSTXAfterScaler / HTotal); + + Tsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; + + prefetch_bw_oto = (MetaRowByte + PixelPTEBytesPerRow + + PrefetchSourceLinesY * SwathWidthY * dml_ceil(BytePerPixelDETY, 1) + + PrefetchSourceLinesC * SwathWidthY / 2 * dml_ceil(BytePerPixelDETC, 2)) + / Tsw_oto; + + if (GPUVMEnable == true) { + Tvm_oto = + dml_max( + *Tno_bw + PDEAndMetaPTEBytesFrame / prefetch_bw_oto, + dml_max( + UrgentExtraLatency + + UrgentLatencyPixelDataOnly + * (PageTableLevels + - 1), + LineTime / 4.0)); + } else + Tvm_oto = LineTime / 4.0; + + if ((GPUVMEnable == true || DCCEnable == true)) { + Tr0_oto = dml_max( + (MetaRowByte + PixelPTEBytesPerRow) / prefetch_bw_oto, + dml_max(UrgentLatencyPixelDataOnly, dml_max(LineTime - Tvm_oto, LineTime / 4))); + } else + Tr0_oto = LineTime - Tvm_oto; + + Tpre_oto = Tvm_oto + Tr0_oto + Tsw_oto; + + dst_y_prefetch_oto = Tpre_oto / LineTime; + + if (dst_y_prefetch_oto < dst_y_prefetch_equ) + *DestinationLinesForPrefetch = dst_y_prefetch_oto; + else + *DestinationLinesForPrefetch = dst_y_prefetch_equ; + + *DestinationLinesForPrefetch = dml_floor(4.0 * (*DestinationLinesForPrefetch + 0.125), 1) + / 4; + + dml_print("DML: VStartup: %d\n", VStartup); + dml_print("DML: TCalc: %f\n", TCalc); + dml_print("DML: TWait: %f\n", TWait); + dml_print("DML: XFCRemoteSurfaceFlipDelay: %f\n", XFCRemoteSurfaceFlipDelay); + dml_print("DML: LineTime: %f\n", LineTime); + dml_print("DML: Tsetup: %f\n", Tsetup); + dml_print("DML: Tdm: %f\n", Tdm); + dml_print("DML: DSTYAfterScaler: %f\n", DSTYAfterScaler); + dml_print("DML: DSTXAfterScaler: %f\n", DSTXAfterScaler); + dml_print("DML: HTotal: %d\n", HTotal); + + *PrefetchBandwidth = 0; + *DestinationLinesToRequestVMInVBlank = 0; + *DestinationLinesToRequestRowInVBlank = 0; + *VRatioPrefetchY = 0; + *VRatioPrefetchC = 0; + *RequiredPrefetchPixDataBW = 0; + if (*DestinationLinesForPrefetch > 1) { + *PrefetchBandwidth = (PDEAndMetaPTEBytesFrame + 2 * MetaRowByte + + 2 * PixelPTEBytesPerRow + + PrefetchSourceLinesY * SwathWidthY * dml_ceil(BytePerPixelDETY, 1) + + PrefetchSourceLinesC * SwathWidthY / 2 + * dml_ceil(BytePerPixelDETC, 2)) + / (*DestinationLinesForPrefetch * LineTime - *Tno_bw); + if (GPUVMEnable) { + TimeForFetchingMetaPTE = + dml_max( + *Tno_bw + + (double) PDEAndMetaPTEBytesFrame + / *PrefetchBandwidth, + dml_max( + UrgentExtraLatency + + UrgentLatencyPixelDataOnly + * (PageTableLevels + - 1), + LineTime / 4)); + } else { + if (NumberOfCursors > 0 || XFCEnabled) + TimeForFetchingMetaPTE = LineTime / 4; + else + TimeForFetchingMetaPTE = 0.0; + } + + if ((GPUVMEnable == true || DCCEnable == true)) { + TimeForFetchingRowInVBlank = + dml_max( + (MetaRowByte + PixelPTEBytesPerRow) + / *PrefetchBandwidth, + dml_max( + UrgentLatencyPixelDataOnly, + dml_max( + LineTime + - TimeForFetchingMetaPTE, + LineTime + / 4.0))); + } else { + if (NumberOfCursors > 0 || XFCEnabled) + TimeForFetchingRowInVBlank = LineTime - TimeForFetchingMetaPTE; + else + TimeForFetchingRowInVBlank = 0.0; + } + + *DestinationLinesToRequestVMInVBlank = dml_floor( + 4.0 * (TimeForFetchingMetaPTE / LineTime + 0.125), + 1) / 4.0; + + *DestinationLinesToRequestRowInVBlank = dml_floor( + 4.0 * (TimeForFetchingRowInVBlank / LineTime + 0.125), + 1) / 4.0; + + LinesToRequestPrefetchPixelData = + *DestinationLinesForPrefetch + - ((NumberOfCursors > 0 || GPUVMEnable + || DCCEnable) ? + (*DestinationLinesToRequestVMInVBlank + + *DestinationLinesToRequestRowInVBlank) : + 0.0); + + if (LinesToRequestPrefetchPixelData > 0) { + + *VRatioPrefetchY = (double) PrefetchSourceLinesY + / LinesToRequestPrefetchPixelData; + *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); + if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { + if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { + *VRatioPrefetchY = + dml_max( + (double) PrefetchSourceLinesY + / LinesToRequestPrefetchPixelData, + (double) MaxNumSwathY + * SwathHeightY + / (LinesToRequestPrefetchPixelData + - (VInitPreFillY + - 3.0) + / 2.0)); + *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); + } else { + MyError = true; + *VRatioPrefetchY = 0; + } + } + + *VRatioPrefetchC = (double) PrefetchSourceLinesC + / LinesToRequestPrefetchPixelData; + *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); + + if ((SwathHeightC > 4)) { + if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { + *VRatioPrefetchC = + dml_max( + *VRatioPrefetchC, + (double) MaxNumSwathC + * SwathHeightC + / (LinesToRequestPrefetchPixelData + - (VInitPreFillC + - 3.0) + / 2.0)); + *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); + } else { + MyError = true; + *VRatioPrefetchC = 0; + } + } + + *RequiredPrefetchPixDataBW = + DPPPerPlane + * ((double) PrefetchSourceLinesY + / LinesToRequestPrefetchPixelData + * dml_ceil( + BytePerPixelDETY, + 1) + + (double) PrefetchSourceLinesC + / LinesToRequestPrefetchPixelData + * dml_ceil( + BytePerPixelDETC, + 2) + / 2) + * SwathWidthY / LineTime; + } else { + MyError = true; + *VRatioPrefetchY = 0; + *VRatioPrefetchC = 0; + *RequiredPrefetchPixDataBW = 0; + } + + } else { + MyError = true; + } + + if (MyError) { + *PrefetchBandwidth = 0; + TimeForFetchingMetaPTE = 0; + TimeForFetchingRowInVBlank = 0; + *DestinationLinesToRequestVMInVBlank = 0; + *DestinationLinesToRequestRowInVBlank = 0; + *DestinationLinesForPrefetch = 0; + LinesToRequestPrefetchPixelData = 0; + *VRatioPrefetchY = 0; + *VRatioPrefetchC = 0; + *RequiredPrefetchPixDataBW = 0; + } + + return MyError; +} + +static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) +{ + return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); +} + +static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) +{ + return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4 / Clock, 1); +} + +static double CalculatePrefetchSourceLines( + struct display_mode_lib *mode_lib, + double VRatio, + double vtaps, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + unsigned int SwathHeight, + unsigned int ViewportYStart, + double *VInitPreFill, + unsigned int *MaxNumSwath) +{ + unsigned int MaxPartialSwath; + + if (ProgressiveToInterlaceUnitInOPP) + *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); + else + *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); + + if (!mode_lib->vba.IgnoreViewportPositioning) { + + *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; + + if (*VInitPreFill > 1.0) + MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; + else + MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) + % SwathHeight; + MaxPartialSwath = dml_max(1U, MaxPartialSwath); + + } else { + + if (ViewportYStart != 0) + dml_print( + "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); + + *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); + + if (*VInitPreFill > 1.0) + MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; + else + MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) + % SwathHeight; + } + + return *MaxNumSwath * SwathHeight + MaxPartialSwath; +} + +static unsigned int CalculateVMAndRowBytes( + struct display_mode_lib *mode_lib, + bool DCCEnable, + unsigned int BlockHeight256Bytes, + unsigned int BlockWidth256Bytes, + enum source_format_class SourcePixelFormat, + unsigned int SurfaceTiling, + unsigned int BytePerPixel, + enum scan_direction_class ScanDirection, + unsigned int ViewportWidth, + unsigned int ViewportHeight, + unsigned int SwathWidth, + bool GPUVMEnable, + unsigned int VMMPageSize, + unsigned int PTEBufferSizeInRequestsLuma, + unsigned int PDEProcessingBufIn64KBReqs, + unsigned int Pitch, + unsigned int DCCMetaPitch, + unsigned int *MacroTileWidth, + unsigned int *MetaRowByte, + unsigned int *PixelPTEBytesPerRow, + bool *PTEBufferSizeNotExceeded, + unsigned int *dpte_row_height, + unsigned int *meta_row_height) +{ + unsigned int MetaRequestHeight; + unsigned int MetaRequestWidth; + unsigned int MetaSurfWidth; + unsigned int MetaSurfHeight; + unsigned int MPDEBytesFrame; + unsigned int MetaPTEBytesFrame; + unsigned int DCCMetaSurfaceBytes; + + unsigned int MacroTileSizeBytes; + unsigned int MacroTileHeight; + unsigned int DPDE0BytesFrame; + unsigned int ExtraDPDEBytesFrame; + unsigned int PDEAndMetaPTEBytesFrame; + + if (DCCEnable == true) { + MetaRequestHeight = 8 * BlockHeight256Bytes; + MetaRequestWidth = 8 * BlockWidth256Bytes; + if (ScanDirection == dm_horz) { + *meta_row_height = MetaRequestHeight; + MetaSurfWidth = dml_ceil((double) SwathWidth - 1, MetaRequestWidth) + + MetaRequestWidth; + *MetaRowByte = MetaSurfWidth * MetaRequestHeight * BytePerPixel / 256.0; + } else { + *meta_row_height = MetaRequestWidth; + MetaSurfHeight = dml_ceil((double) SwathWidth - 1, MetaRequestHeight) + + MetaRequestHeight; + *MetaRowByte = MetaSurfHeight * MetaRequestWidth * BytePerPixel / 256.0; + } + if (ScanDirection == dm_horz) { + DCCMetaSurfaceBytes = DCCMetaPitch + * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + + 64 * BlockHeight256Bytes) * BytePerPixel + / 256; + } else { + DCCMetaSurfaceBytes = DCCMetaPitch + * (dml_ceil( + (double) ViewportHeight - 1, + 64 * BlockHeight256Bytes) + + 64 * BlockHeight256Bytes) * BytePerPixel + / 256; + } + if (GPUVMEnable == true) { + MetaPTEBytesFrame = (dml_ceil( + (double) (DCCMetaSurfaceBytes - VMMPageSize) + / (8 * VMMPageSize), + 1) + 1) * 64; + MPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 1); + } else { + MetaPTEBytesFrame = 0; + MPDEBytesFrame = 0; + } + } else { + MetaPTEBytesFrame = 0; + MPDEBytesFrame = 0; + *MetaRowByte = 0; + } + + if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) { + MacroTileSizeBytes = 256; + MacroTileHeight = BlockHeight256Bytes; + } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x + || SurfaceTiling == dm_sw_4kb_d || SurfaceTiling == dm_sw_4kb_d_x) { + MacroTileSizeBytes = 4096; + MacroTileHeight = 4 * BlockHeight256Bytes; + } else if (SurfaceTiling == dm_sw_64kb_s || SurfaceTiling == dm_sw_64kb_s_t + || SurfaceTiling == dm_sw_64kb_s_x || SurfaceTiling == dm_sw_64kb_d + || SurfaceTiling == dm_sw_64kb_d_t || SurfaceTiling == dm_sw_64kb_d_x + || SurfaceTiling == dm_sw_64kb_r_x) { + MacroTileSizeBytes = 65536; + MacroTileHeight = 16 * BlockHeight256Bytes; + } else { + MacroTileSizeBytes = 262144; + MacroTileHeight = 32 * BlockHeight256Bytes; + } + *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; + + if (GPUVMEnable == true && mode_lib->vba.GPUVMMaxPageTableLevels > 1) { + if (ScanDirection == dm_horz) { + DPDE0BytesFrame = + 64 + * (dml_ceil( + ((Pitch + * (dml_ceil( + ViewportHeight + - 1, + MacroTileHeight) + + MacroTileHeight) + * BytePerPixel) + - MacroTileSizeBytes) + / (8 + * 2097152), + 1) + 1); + } else { + DPDE0BytesFrame = + 64 + * (dml_ceil( + ((Pitch + * (dml_ceil( + (double) SwathWidth + - 1, + MacroTileHeight) + + MacroTileHeight) + * BytePerPixel) + - MacroTileSizeBytes) + / (8 + * 2097152), + 1) + 1); + } + ExtraDPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 2); + } else { + DPDE0BytesFrame = 0; + ExtraDPDEBytesFrame = 0; + } + + PDEAndMetaPTEBytesFrame = MetaPTEBytesFrame + MPDEBytesFrame + DPDE0BytesFrame + + ExtraDPDEBytesFrame; + + if (GPUVMEnable == true) { + unsigned int PTERequestSize; + unsigned int PixelPTEReqHeight; + unsigned int PixelPTEReqWidth; + double FractionOfPTEReturnDrop; + unsigned int EffectivePDEProcessingBufIn64KBReqs; + + if (SurfaceTiling == dm_sw_linear) { + PixelPTEReqHeight = 1; + PixelPTEReqWidth = 8.0 * VMMPageSize / BytePerPixel; + PTERequestSize = 64; + FractionOfPTEReturnDrop = 0; + } else if (MacroTileSizeBytes == 4096) { + PixelPTEReqHeight = MacroTileHeight; + PixelPTEReqWidth = 8 * *MacroTileWidth; + PTERequestSize = 64; + if (ScanDirection == dm_horz) + FractionOfPTEReturnDrop = 0; + else + FractionOfPTEReturnDrop = 7 / 8; + } else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) { + PixelPTEReqHeight = 16 * BlockHeight256Bytes; + PixelPTEReqWidth = 16 * BlockWidth256Bytes; + PTERequestSize = 128; + FractionOfPTEReturnDrop = 0; + } else { + PixelPTEReqHeight = MacroTileHeight; + PixelPTEReqWidth = 8 * *MacroTileWidth; + PTERequestSize = 64; + FractionOfPTEReturnDrop = 0; + } + + if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) + EffectivePDEProcessingBufIn64KBReqs = PDEProcessingBufIn64KBReqs / 2; + else + EffectivePDEProcessingBufIn64KBReqs = PDEProcessingBufIn64KBReqs; + + if (SurfaceTiling == dm_sw_linear) { + *dpte_row_height = + dml_min( + 128, + 1 + << (unsigned int) dml_floor( + dml_log2( + dml_min( + (double) PTEBufferSizeInRequestsLuma + * PixelPTEReqWidth, + EffectivePDEProcessingBufIn64KBReqs + * 65536.0 + / BytePerPixel) + / Pitch), + 1)); + *PixelPTEBytesPerRow = PTERequestSize + * (dml_ceil( + (double) (Pitch * *dpte_row_height - 1) + / PixelPTEReqWidth, + 1) + 1); + } else if (ScanDirection == dm_horz) { + *dpte_row_height = PixelPTEReqHeight; + *PixelPTEBytesPerRow = PTERequestSize + * (dml_ceil(((double) SwathWidth - 1) / PixelPTEReqWidth, 1) + + 1); + } else { + *dpte_row_height = dml_min(PixelPTEReqWidth, *MacroTileWidth); + *PixelPTEBytesPerRow = PTERequestSize + * (dml_ceil( + ((double) SwathWidth - 1) + / PixelPTEReqHeight, + 1) + 1); + } + if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) + <= 64 * PTEBufferSizeInRequestsLuma) { + *PTEBufferSizeNotExceeded = true; + } else { + *PTEBufferSizeNotExceeded = false; + } + } else { + *PixelPTEBytesPerRow = 0; + *PTEBufferSizeNotExceeded = true; + } + + return PDEAndMetaPTEBytesFrame; +} + +static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( + struct display_mode_lib *mode_lib) +{ + unsigned int j, k; + + mode_lib->vba.WritebackDISPCLK = 0.0; + mode_lib->vba.DISPCLKWithRamping = 0; + mode_lib->vba.DISPCLKWithoutRamping = 0; + mode_lib->vba.GlobalDPPCLK = 0.0; + + // dml_ml->vba.DISPCLK and dml_ml->vba.DPPCLK Calculation + // + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.WritebackEnable[k]) { + mode_lib->vba.WritebackDISPCLK = + dml_max( + mode_lib->vba.WritebackDISPCLK, + CalculateWriteBackDISPCLK( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.PixelClock[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackLumaHTaps[k], + mode_lib->vba.WritebackLumaVTaps[k], + mode_lib->vba.WritebackChromaHTaps[k], + mode_lib->vba.WritebackChromaVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k], + mode_lib->vba.HTotal[k], + mode_lib->vba.WritebackChromaLineBufferWidth)); + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.HRatio[k] > 1) { + mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput + * mode_lib->vba.HRatio[k] + / dml_ceil( + mode_lib->vba.htaps[k] + / 6.0, + 1)); + } else { + mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput); + } + + mode_lib->vba.DPPCLKUsingSingleDPPLuma = + mode_lib->vba.PixelClock[k] + * dml_max( + mode_lib->vba.vtaps[k] / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k]), + dml_max( + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k], + 1.0)); + + if ((mode_lib->vba.htaps[k] > 6 || mode_lib->vba.vtaps[k] > 6) + && mode_lib->vba.DPPCLKUsingSingleDPPLuma + < 2 * mode_lib->vba.PixelClock[k]) { + mode_lib->vba.DPPCLKUsingSingleDPPLuma = 2 * mode_lib->vba.PixelClock[k]; + } + + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { + mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = 0.0; + mode_lib->vba.DPPCLKUsingSingleDPP[k] = + mode_lib->vba.DPPCLKUsingSingleDPPLuma; + } else { + if (mode_lib->vba.HRatio[k] > 1) { + mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = + dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput + * mode_lib->vba.HRatio[k] + / 2 + / dml_ceil( + mode_lib->vba.HTAPsChroma[k] + / 6.0, + 1.0)); + } else { + mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput); + } + mode_lib->vba.DPPCLKUsingSingleDPPChroma = + mode_lib->vba.PixelClock[k] + * dml_max( + mode_lib->vba.VTAPsChroma[k] + / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k] + / 2), + dml_max( + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / 4 + / mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k], + 1.0)); + + if ((mode_lib->vba.HTAPsChroma[k] > 6 || mode_lib->vba.VTAPsChroma[k] > 6) + && mode_lib->vba.DPPCLKUsingSingleDPPChroma + < 2 * mode_lib->vba.PixelClock[k]) { + mode_lib->vba.DPPCLKUsingSingleDPPChroma = 2 + * mode_lib->vba.PixelClock[k]; + } + + mode_lib->vba.DPPCLKUsingSingleDPP[k] = dml_max( + mode_lib->vba.DPPCLKUsingSingleDPPLuma, + mode_lib->vba.DPPCLKUsingSingleDPPChroma); + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.BlendingAndTiming[k] != k) + continue; + if (mode_lib->vba.ODMCombineEnabled[k]) { + mode_lib->vba.DISPCLKWithRamping = + dml_max( + mode_lib->vba.DISPCLKWithRamping, + mode_lib->vba.PixelClock[k] / 2 + * (1 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100) + * (1 + + mode_lib->vba.DISPCLKRampingMargin + / 100)); + mode_lib->vba.DISPCLKWithoutRamping = + dml_max( + mode_lib->vba.DISPCLKWithoutRamping, + mode_lib->vba.PixelClock[k] / 2 + * (1 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100)); + } else if (!mode_lib->vba.ODMCombineEnabled[k]) { + mode_lib->vba.DISPCLKWithRamping = + dml_max( + mode_lib->vba.DISPCLKWithRamping, + mode_lib->vba.PixelClock[k] + * (1 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100) + * (1 + + mode_lib->vba.DISPCLKRampingMargin + / 100)); + mode_lib->vba.DISPCLKWithoutRamping = + dml_max( + mode_lib->vba.DISPCLKWithoutRamping, + mode_lib->vba.PixelClock[k] + * (1 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100)); + } + } + + mode_lib->vba.DISPCLKWithRamping = dml_max( + mode_lib->vba.DISPCLKWithRamping, + mode_lib->vba.WritebackDISPCLK); + mode_lib->vba.DISPCLKWithoutRamping = dml_max( + mode_lib->vba.DISPCLKWithoutRamping, + mode_lib->vba.WritebackDISPCLK); + + ASSERT(mode_lib->vba.DISPCLKDPPCLKVCOSpeed != 0); + mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( + mode_lib->vba.DISPCLKWithRamping, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( + mode_lib->vba.DISPCLKWithoutRamping, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + mode_lib->vba.MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( + mode_lib->vba.soc.clock_limits[mode_lib->vba.soc.num_states].dispclk_mhz, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + if (mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity + > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { + mode_lib->vba.DISPCLK_calculated = + mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity; + } else if (mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity + > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { + mode_lib->vba.DISPCLK_calculated = mode_lib->vba.MaxDispclkRoundedToDFSGranularity; + } else { + mode_lib->vba.DISPCLK_calculated = + mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity; + } + DTRACE(" dispclk_mhz (calculated) = %f", mode_lib->vba.DISPCLK_calculated); + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.DPPPerPlane[k] == 0) { + mode_lib->vba.DPPCLK_calculated[k] = 0; + } else { + mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.DPPCLKUsingSingleDPP[k] + / mode_lib->vba.DPPPerPlane[k] + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100); + } + mode_lib->vba.GlobalDPPCLK = dml_max( + mode_lib->vba.GlobalDPPCLK, + mode_lib->vba.DPPCLK_calculated[k]); + } + mode_lib->vba.GlobalDPPCLK = RoundToDFSGranularityUp( + mode_lib->vba.GlobalDPPCLK, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.GlobalDPPCLK / 255 + * dml_ceil( + mode_lib->vba.DPPCLK_calculated[k] * 255 + / mode_lib->vba.GlobalDPPCLK, + 1); + DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, mode_lib->vba.DPPCLK_calculated[k]); + } + + // Urgent Watermark + mode_lib->vba.DCCEnabledAnyPlane = false; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) + if (mode_lib->vba.DCCEnable[k]) + mode_lib->vba.DCCEnabledAnyPlane = true; + + mode_lib->vba.ReturnBandwidthToDCN = dml_min( + mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK, + mode_lib->vba.FabricAndDRAMBandwidth * 1000) + * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100; + + mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBandwidthToDCN; + mode_lib->vba.ReturnBW = adjust_ReturnBW( + mode_lib, + mode_lib->vba.ReturnBW, + mode_lib->vba.DCCEnabledAnyPlane, + mode_lib->vba.ReturnBandwidthToDCN); + + // Let's do this calculation again?? + mode_lib->vba.ReturnBandwidthToDCN = dml_min( + mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK, + mode_lib->vba.FabricAndDRAMBandwidth * 1000); + mode_lib->vba.ReturnBW = adjust_ReturnBW( + mode_lib, + mode_lib->vba.ReturnBW, + mode_lib->vba.DCCEnabledAnyPlane, + mode_lib->vba.ReturnBandwidthToDCN); + + DTRACE(" dcfclk_mhz = %f", mode_lib->vba.DCFCLK); + DTRACE(" return_bw_to_dcn = %f", mode_lib->vba.ReturnBandwidthToDCN); + DTRACE(" return_bus_bw = %f", mode_lib->vba.ReturnBW); + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + bool MainPlaneDoesODMCombine = false; + + if (mode_lib->vba.SourceScan[k] == dm_horz) + mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportWidth[k]; + else + mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k]; + + if (mode_lib->vba.ODMCombineEnabled[k] == true) + MainPlaneDoesODMCombine = true; + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) + if (mode_lib->vba.BlendingAndTiming[k] == j + && mode_lib->vba.ODMCombineEnabled[j] == true) + MainPlaneDoesODMCombine = true; + + if (MainPlaneDoesODMCombine == true) + mode_lib->vba.SwathWidthY[k] = dml_min( + (double) mode_lib->vba.SwathWidthSingleDPPY[k], + dml_round( + mode_lib->vba.HActive[k] / 2.0 + * mode_lib->vba.HRatio[k])); + else { + if (mode_lib->vba.DPPPerPlane[k] == 0) { + mode_lib->vba.SwathWidthY[k] = 0; + } else { + mode_lib->vba.SwathWidthY[k] = mode_lib->vba.SwathWidthSingleDPPY[k] + / mode_lib->vba.DPPPerPlane[k]; + } + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { + mode_lib->vba.BytePerPixelDETY[k] = 8; + mode_lib->vba.BytePerPixelDETC[k] = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { + mode_lib->vba.BytePerPixelDETY[k] = 4; + mode_lib->vba.BytePerPixelDETC[k] = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { + mode_lib->vba.BytePerPixelDETY[k] = 2; + mode_lib->vba.BytePerPixelDETC[k] = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) { + mode_lib->vba.BytePerPixelDETY[k] = 1; + mode_lib->vba.BytePerPixelDETC[k] = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { + mode_lib->vba.BytePerPixelDETY[k] = 1; + mode_lib->vba.BytePerPixelDETC[k] = 2; + } else { // dm_420_10 + mode_lib->vba.BytePerPixelDETY[k] = 4.0 / 3.0; + mode_lib->vba.BytePerPixelDETC[k] = 8.0 / 3.0; + } + } + + mode_lib->vba.TotalDataReadBandwidth = 0.0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.ReadBandwidthPlaneLuma[k] = mode_lib->vba.SwathWidthSingleDPPY[k] + * dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + * mode_lib->vba.VRatio[k]; + mode_lib->vba.ReadBandwidthPlaneChroma[k] = mode_lib->vba.SwathWidthSingleDPPY[k] + / 2 * dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + * mode_lib->vba.VRatio[k] / 2; + DTRACE( + " read_bw[%i] = %fBps", + k, + mode_lib->vba.ReadBandwidthPlaneLuma[k] + + mode_lib->vba.ReadBandwidthPlaneChroma[k]); + mode_lib->vba.TotalDataReadBandwidth += mode_lib->vba.ReadBandwidthPlaneLuma[k] + + mode_lib->vba.ReadBandwidthPlaneChroma[k]; + } + + mode_lib->vba.TotalDCCActiveDPP = 0; + mode_lib->vba.TotalActiveDPP = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + + mode_lib->vba.DPPPerPlane[k]; + if (mode_lib->vba.DCCEnable[k]) + mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + + mode_lib->vba.DPPPerPlane[k]; + } + + mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency = + (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLK + + mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly + * mode_lib->vba.NumberOfChannels + / mode_lib->vba.ReturnBW; + + mode_lib->vba.LastPixelOfLineExtraWatermark = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.VRatio[k] <= 1.0) + mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k] = + (double) mode_lib->vba.SwathWidthY[k] + * mode_lib->vba.DPPPerPlane[k] + / mode_lib->vba.HRatio[k] + / mode_lib->vba.PixelClock[k]; + else + mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k] = + (double) mode_lib->vba.SwathWidthY[k] + / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] + / mode_lib->vba.DPPCLK[k]; + + if (mode_lib->vba.BytePerPixelDETC[k] == 0) + mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k] = 0.0; + else if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) + mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k] = + mode_lib->vba.SwathWidthY[k] / 2.0 + * mode_lib->vba.DPPPerPlane[k] + / (mode_lib->vba.HRatio[k] / 2.0) + / mode_lib->vba.PixelClock[k]; + else + mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k] = + mode_lib->vba.SwathWidthY[k] / 2.0 + / mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] + / mode_lib->vba.DPPCLK[k]; + } + + mode_lib->vba.UrgentExtraLatency = mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency + + (mode_lib->vba.TotalActiveDPP * mode_lib->vba.PixelChunkSizeInKByte + + mode_lib->vba.TotalDCCActiveDPP + * mode_lib->vba.MetaChunkSize) * 1024.0 + / mode_lib->vba.ReturnBW; + + if (mode_lib->vba.GPUVMEnable) + mode_lib->vba.UrgentExtraLatency += mode_lib->vba.TotalActiveDPP + * mode_lib->vba.PTEGroupSize / mode_lib->vba.ReturnBW; + + mode_lib->vba.UrgentWatermark = mode_lib->vba.UrgentLatencyPixelDataOnly + + mode_lib->vba.LastPixelOfLineExtraWatermark + + mode_lib->vba.UrgentExtraLatency; + + DTRACE(" urgent_extra_latency = %fus", mode_lib->vba.UrgentExtraLatency); + DTRACE(" wm_urgent = %fus", mode_lib->vba.UrgentWatermark); + + mode_lib->vba.UrgentLatency = mode_lib->vba.UrgentLatencyPixelDataOnly; + + mode_lib->vba.TotalActiveWriteback = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.WritebackEnable[k]) + mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + mode_lib->vba.ActiveWritebacksPerPlane[k]; + } + + if (mode_lib->vba.TotalActiveWriteback <= 1) + mode_lib->vba.WritebackUrgentWatermark = mode_lib->vba.WritebackLatency; + else + mode_lib->vba.WritebackUrgentWatermark = mode_lib->vba.WritebackLatency + + mode_lib->vba.WritebackChunkSize * 1024.0 / 32 + / mode_lib->vba.SOCCLK; + + DTRACE(" wm_wb_urgent = %fus", mode_lib->vba.WritebackUrgentWatermark); + + // NB P-State/DRAM Clock Change Watermark + mode_lib->vba.DRAMClockChangeWatermark = mode_lib->vba.DRAMClockChangeLatency + + mode_lib->vba.UrgentWatermark; + + DTRACE(" wm_pstate_change = %fus", mode_lib->vba.DRAMClockChangeWatermark); + + DTRACE(" calculating wb pstate watermark"); + DTRACE(" total wb outputs %d", mode_lib->vba.TotalActiveWriteback); + DTRACE(" socclk frequency %f Mhz", mode_lib->vba.SOCCLK); + + if (mode_lib->vba.TotalActiveWriteback <= 1) + mode_lib->vba.WritebackDRAMClockChangeWatermark = + mode_lib->vba.DRAMClockChangeLatency + + mode_lib->vba.WritebackLatency; + else + mode_lib->vba.WritebackDRAMClockChangeWatermark = + mode_lib->vba.DRAMClockChangeLatency + + mode_lib->vba.WritebackLatency + + mode_lib->vba.WritebackChunkSize * 1024.0 / 32 + / mode_lib->vba.SOCCLK; + + DTRACE(" wm_wb_pstate %fus", mode_lib->vba.WritebackDRAMClockChangeWatermark); + + // Stutter Efficiency + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.LinesInDETY[k] = mode_lib->vba.DETBufferSizeY[k] + / mode_lib->vba.BytePerPixelDETY[k] / mode_lib->vba.SwathWidthY[k]; + mode_lib->vba.LinesInDETYRoundedDownToSwath[k] = dml_floor( + mode_lib->vba.LinesInDETY[k], + mode_lib->vba.SwathHeightY[k]); + mode_lib->vba.FullDETBufferingTimeY[k] = + mode_lib->vba.LinesInDETYRoundedDownToSwath[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) + / mode_lib->vba.VRatio[k]; + if (mode_lib->vba.BytePerPixelDETC[k] > 0) { + mode_lib->vba.LinesInDETC[k] = mode_lib->vba.DETBufferSizeC[k] + / mode_lib->vba.BytePerPixelDETC[k] + / (mode_lib->vba.SwathWidthY[k] / 2); + mode_lib->vba.LinesInDETCRoundedDownToSwath[k] = dml_floor( + mode_lib->vba.LinesInDETC[k], + mode_lib->vba.SwathHeightC[k]); + mode_lib->vba.FullDETBufferingTimeC[k] = + mode_lib->vba.LinesInDETCRoundedDownToSwath[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) + / (mode_lib->vba.VRatio[k] / 2); + } else { + mode_lib->vba.LinesInDETC[k] = 0; + mode_lib->vba.LinesInDETCRoundedDownToSwath[k] = 0; + mode_lib->vba.FullDETBufferingTimeC[k] = 999999; + } + } + + mode_lib->vba.MinFullDETBufferingTime = 999999.0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.FullDETBufferingTimeY[k] + < mode_lib->vba.MinFullDETBufferingTime) { + mode_lib->vba.MinFullDETBufferingTime = + mode_lib->vba.FullDETBufferingTimeY[k]; + mode_lib->vba.FrameTimeForMinFullDETBufferingTime = + (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]; + } + if (mode_lib->vba.FullDETBufferingTimeC[k] + < mode_lib->vba.MinFullDETBufferingTime) { + mode_lib->vba.MinFullDETBufferingTime = + mode_lib->vba.FullDETBufferingTimeC[k]; + mode_lib->vba.FrameTimeForMinFullDETBufferingTime = + (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]; + } + } + + mode_lib->vba.AverageReadBandwidthGBytePerSecond = 0.0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.DCCEnable[k]) { + mode_lib->vba.AverageReadBandwidthGBytePerSecond = + mode_lib->vba.AverageReadBandwidthGBytePerSecond + + mode_lib->vba.ReadBandwidthPlaneLuma[k] + / mode_lib->vba.DCCRate[k] + / 1000 + + mode_lib->vba.ReadBandwidthPlaneChroma[k] + / mode_lib->vba.DCCRate[k] + / 1000; + } else { + mode_lib->vba.AverageReadBandwidthGBytePerSecond = + mode_lib->vba.AverageReadBandwidthGBytePerSecond + + mode_lib->vba.ReadBandwidthPlaneLuma[k] + / 1000 + + mode_lib->vba.ReadBandwidthPlaneChroma[k] + / 1000; + } + if (mode_lib->vba.DCCEnable[k]) { + mode_lib->vba.AverageReadBandwidthGBytePerSecond = + mode_lib->vba.AverageReadBandwidthGBytePerSecond + + mode_lib->vba.ReadBandwidthPlaneLuma[k] + / 1000 / 256 + + mode_lib->vba.ReadBandwidthPlaneChroma[k] + / 1000 / 256; + } + if (mode_lib->vba.GPUVMEnable) { + mode_lib->vba.AverageReadBandwidthGBytePerSecond = + mode_lib->vba.AverageReadBandwidthGBytePerSecond + + mode_lib->vba.ReadBandwidthPlaneLuma[k] + / 1000 / 512 + + mode_lib->vba.ReadBandwidthPlaneChroma[k] + / 1000 / 512; + } + } + + mode_lib->vba.PartOfBurstThatFitsInROB = + dml_min( + mode_lib->vba.MinFullDETBufferingTime + * mode_lib->vba.TotalDataReadBandwidth, + mode_lib->vba.ROBBufferSizeInKByte * 1024 + * mode_lib->vba.TotalDataReadBandwidth + / (mode_lib->vba.AverageReadBandwidthGBytePerSecond + * 1000)); + mode_lib->vba.StutterBurstTime = mode_lib->vba.PartOfBurstThatFitsInROB + * (mode_lib->vba.AverageReadBandwidthGBytePerSecond * 1000) + / mode_lib->vba.TotalDataReadBandwidth / mode_lib->vba.ReturnBW + + (mode_lib->vba.MinFullDETBufferingTime + * mode_lib->vba.TotalDataReadBandwidth + - mode_lib->vba.PartOfBurstThatFitsInROB) + / (mode_lib->vba.DCFCLK * 64); + if (mode_lib->vba.TotalActiveWriteback == 0) { + mode_lib->vba.StutterEfficiencyNotIncludingVBlank = (1 + - (mode_lib->vba.SRExitTime + mode_lib->vba.StutterBurstTime) + / mode_lib->vba.MinFullDETBufferingTime) * 100; + } else { + mode_lib->vba.StutterEfficiencyNotIncludingVBlank = 0; + } + + mode_lib->vba.SmallestVBlank = 999999; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) { + mode_lib->vba.VBlankTime = (double) (mode_lib->vba.VTotal[k] + - mode_lib->vba.VActive[k]) * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]; + } else { + mode_lib->vba.VBlankTime = 0; + } + mode_lib->vba.SmallestVBlank = dml_min( + mode_lib->vba.SmallestVBlank, + mode_lib->vba.VBlankTime); + } + + mode_lib->vba.StutterEfficiency = (mode_lib->vba.StutterEfficiencyNotIncludingVBlank / 100 + * (mode_lib->vba.FrameTimeForMinFullDETBufferingTime + - mode_lib->vba.SmallestVBlank) + + mode_lib->vba.SmallestVBlank) + / mode_lib->vba.FrameTimeForMinFullDETBufferingTime * 100; + + // dml_ml->vba.DCFCLK Deep Sleep + mode_lib->vba.DCFCLKDeepSleep = 8.0; + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; k++) { + if (mode_lib->vba.BytePerPixelDETC[k] > 0) { + mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = + dml_max( + 1.1 * mode_lib->vba.SwathWidthY[k] + * dml_ceil( + mode_lib->vba.BytePerPixelDETY[k], + 1) / 32 + / mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], + 1.1 * mode_lib->vba.SwathWidthY[k] / 2.0 + * dml_ceil( + mode_lib->vba.BytePerPixelDETC[k], + 2) / 32 + / mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k]); + } else + mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * mode_lib->vba.SwathWidthY[k] + * dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1) / 64.0 + / mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k]; + mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max( + mode_lib->vba.DCFCLKDeepSleepPerPlane[k], + mode_lib->vba.PixelClock[k] / 16.0); + mode_lib->vba.DCFCLKDeepSleep = dml_max( + mode_lib->vba.DCFCLKDeepSleep, + mode_lib->vba.DCFCLKDeepSleepPerPlane[k]); + + DTRACE( + " dcfclk_deepsleep_per_plane[%i] = %fMHz", + k, + mode_lib->vba.DCFCLKDeepSleepPerPlane[k]); + } + + DTRACE(" dcfclk_deepsleep_mhz = %fMHz", mode_lib->vba.DCFCLKDeepSleep); + + // Stutter Watermark + mode_lib->vba.StutterExitWatermark = mode_lib->vba.SRExitTime + + mode_lib->vba.LastPixelOfLineExtraWatermark + + mode_lib->vba.UrgentExtraLatency + 10 / mode_lib->vba.DCFCLKDeepSleep; + mode_lib->vba.StutterEnterPlusExitWatermark = mode_lib->vba.SREnterPlusExitTime + + mode_lib->vba.LastPixelOfLineExtraWatermark + + mode_lib->vba.UrgentExtraLatency; + + DTRACE(" wm_cstate_exit = %fus", mode_lib->vba.StutterExitWatermark); + DTRACE(" wm_cstate_enter_exit = %fus", mode_lib->vba.StutterEnterPlusExitWatermark); + + // Urgent Latency Supported + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.EffectiveDETPlusLBLinesLuma = + dml_floor( + mode_lib->vba.LinesInDETY[k] + + dml_min( + mode_lib->vba.LinesInDETY[k] + * mode_lib->vba.DPPCLK[k] + * mode_lib->vba.BytePerPixelDETY[k] + * mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] + / (mode_lib->vba.ReturnBW + / mode_lib->vba.DPPPerPlane[k]), + (double) mode_lib->vba.EffectiveLBLatencyHidingSourceLinesLuma), + mode_lib->vba.SwathHeightY[k]); + + mode_lib->vba.UrgentLatencySupportUsLuma = mode_lib->vba.EffectiveDETPlusLBLinesLuma + * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + / mode_lib->vba.VRatio[k] + - mode_lib->vba.EffectiveDETPlusLBLinesLuma + * mode_lib->vba.SwathWidthY[k] + * mode_lib->vba.BytePerPixelDETY[k] + / (mode_lib->vba.ReturnBW + / mode_lib->vba.DPPPerPlane[k]); + + if (mode_lib->vba.BytePerPixelDETC[k] > 0) { + mode_lib->vba.EffectiveDETPlusLBLinesChroma = + dml_floor( + mode_lib->vba.LinesInDETC[k] + + dml_min( + mode_lib->vba.LinesInDETC[k] + * mode_lib->vba.DPPCLK[k] + * mode_lib->vba.BytePerPixelDETC[k] + * mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] + / (mode_lib->vba.ReturnBW + / mode_lib->vba.DPPPerPlane[k]), + (double) mode_lib->vba.EffectiveLBLatencyHidingSourceLinesChroma), + mode_lib->vba.SwathHeightC[k]); + mode_lib->vba.UrgentLatencySupportUsChroma = + mode_lib->vba.EffectiveDETPlusLBLinesChroma + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) + / (mode_lib->vba.VRatio[k] / 2) + - mode_lib->vba.EffectiveDETPlusLBLinesChroma + * (mode_lib->vba.SwathWidthY[k] + / 2) + * mode_lib->vba.BytePerPixelDETC[k] + / (mode_lib->vba.ReturnBW + / mode_lib->vba.DPPPerPlane[k]); + mode_lib->vba.UrgentLatencySupportUs[k] = dml_min( + mode_lib->vba.UrgentLatencySupportUsLuma, + mode_lib->vba.UrgentLatencySupportUsChroma); + } else { + mode_lib->vba.UrgentLatencySupportUs[k] = + mode_lib->vba.UrgentLatencySupportUsLuma; + } + } + + mode_lib->vba.MinUrgentLatencySupportUs = 999999; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.MinUrgentLatencySupportUs = dml_min( + mode_lib->vba.MinUrgentLatencySupportUs, + mode_lib->vba.UrgentLatencySupportUs[k]); + } + + // Non-Urgent Latency Tolerance + mode_lib->vba.NonUrgentLatencyTolerance = mode_lib->vba.MinUrgentLatencySupportUs + - mode_lib->vba.UrgentWatermark; + + // DSCCLK + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) { + mode_lib->vba.DSCCLK_calculated[k] = 0.0; + } else { + if (mode_lib->vba.OutputFormat[k] == dm_420 + || mode_lib->vba.OutputFormat[k] == dm_n422) + mode_lib->vba.DSCFormatFactor = 2; + else + mode_lib->vba.DSCFormatFactor = 1; + if (mode_lib->vba.ODMCombineEnabled[k]) + mode_lib->vba.DSCCLK_calculated[k] = + mode_lib->vba.PixelClockBackEnd[k] / 6 + / mode_lib->vba.DSCFormatFactor + / (1 + - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100); + else + mode_lib->vba.DSCCLK_calculated[k] = + mode_lib->vba.PixelClockBackEnd[k] / 3 + / mode_lib->vba.DSCFormatFactor + / (1 + - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100); + } + } + + // DSC Delay + // TODO + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + double bpp = mode_lib->vba.OutputBpp[k]; + unsigned int slices = mode_lib->vba.NumberOfDSCSlices[k]; + + if (mode_lib->vba.DSCEnabled[k] && bpp != 0) { + if (!mode_lib->vba.ODMCombineEnabled[k]) { + mode_lib->vba.DSCDelay[k] = + dscceComputeDelay( + mode_lib->vba.DSCInputBitPerComponent[k], + bpp, + dml_ceil( + (double) mode_lib->vba.HActive[k] + / mode_lib->vba.NumberOfDSCSlices[k], + 1), + slices, + mode_lib->vba.OutputFormat[k]) + + dscComputeDelay( + mode_lib->vba.OutputFormat[k]); + } else { + mode_lib->vba.DSCDelay[k] = + 2 + * (dscceComputeDelay( + mode_lib->vba.DSCInputBitPerComponent[k], + bpp, + dml_ceil( + (double) mode_lib->vba.HActive[k] + / mode_lib->vba.NumberOfDSCSlices[k], + 1), + slices / 2.0, + mode_lib->vba.OutputFormat[k]) + + dscComputeDelay( + mode_lib->vba.OutputFormat[k])); + } + mode_lib->vba.DSCDelay[k] = mode_lib->vba.DSCDelay[k] + * mode_lib->vba.PixelClock[k] + / mode_lib->vba.PixelClockBackEnd[k]; + } else { + mode_lib->vba.DSCDelay[k] = 0; + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) // NumberOfPlanes + if (j != k && mode_lib->vba.BlendingAndTiming[k] == j + && mode_lib->vba.DSCEnabled[j]) + mode_lib->vba.DSCDelay[k] = mode_lib->vba.DSCDelay[j]; + + // Prefetch + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + unsigned int PDEAndMetaPTEBytesFrameY; + unsigned int PixelPTEBytesPerRowY; + unsigned int MetaRowByteY; + unsigned int MetaRowByteC; + unsigned int PDEAndMetaPTEBytesFrameC; + unsigned int PixelPTEBytesPerRowC; + + Calculate256BBlockSizes( + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1), + dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2), + &mode_lib->vba.BlockHeight256BytesY[k], + &mode_lib->vba.BlockHeight256BytesC[k], + &mode_lib->vba.BlockWidth256BytesY[k], + &mode_lib->vba.BlockWidth256BytesC[k]); + PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( + mode_lib, + mode_lib->vba.DCCEnable[k], + mode_lib->vba.BlockHeight256BytesY[k], + mode_lib->vba.BlockWidth256BytesY[k], + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1), + mode_lib->vba.SourceScan[k], + mode_lib->vba.ViewportWidth[k], + mode_lib->vba.ViewportHeight[k], + mode_lib->vba.SwathWidthY[k], + mode_lib->vba.GPUVMEnable, + mode_lib->vba.VMMPageSize, + mode_lib->vba.PTEBufferSizeInRequestsLuma, + mode_lib->vba.PDEProcessingBufIn64KBReqs, + mode_lib->vba.PitchY[k], + mode_lib->vba.DCCMetaPitchY[k], + &mode_lib->vba.MacroTileWidthY[k], + &MetaRowByteY, + &PixelPTEBytesPerRowY, + &mode_lib->vba.PTEBufferSizeNotExceeded[mode_lib->vba.VoltageLevel][0], + &mode_lib->vba.dpte_row_height[k], + &mode_lib->vba.meta_row_height[k]); + mode_lib->vba.PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( + mode_lib, + mode_lib->vba.VRatio[k], + mode_lib->vba.vtaps[k], + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.SwathHeightY[k], + mode_lib->vba.ViewportYStartY[k], + &mode_lib->vba.VInitPreFillY[k], + &mode_lib->vba.MaxNumSwathY[k]); + + if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_8)) { + PDEAndMetaPTEBytesFrameC = + CalculateVMAndRowBytes( + mode_lib, + mode_lib->vba.DCCEnable[k], + mode_lib->vba.BlockHeight256BytesC[k], + mode_lib->vba.BlockWidth256BytesC[k], + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil( + mode_lib->vba.BytePerPixelDETC[k], + 2), + mode_lib->vba.SourceScan[k], + mode_lib->vba.ViewportWidth[k] / 2, + mode_lib->vba.ViewportHeight[k] / 2, + mode_lib->vba.SwathWidthY[k] / 2, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.VMMPageSize, + mode_lib->vba.PTEBufferSizeInRequestsLuma, + mode_lib->vba.PDEProcessingBufIn64KBReqs, + mode_lib->vba.PitchC[k], + 0, + &mode_lib->vba.MacroTileWidthC[k], + &MetaRowByteC, + &PixelPTEBytesPerRowC, + &mode_lib->vba.PTEBufferSizeNotExceeded[mode_lib->vba.VoltageLevel][0], + &mode_lib->vba.dpte_row_height_chroma[k], + &mode_lib->vba.meta_row_height_chroma[k]); + mode_lib->vba.PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( + mode_lib, + mode_lib->vba.VRatio[k] / 2, + mode_lib->vba.VTAPsChroma[k], + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.SwathHeightC[k], + mode_lib->vba.ViewportYStartC[k], + &mode_lib->vba.VInitPreFillC[k], + &mode_lib->vba.MaxNumSwathC[k]); + } else { + PixelPTEBytesPerRowC = 0; + PDEAndMetaPTEBytesFrameC = 0; + MetaRowByteC = 0; + mode_lib->vba.MaxNumSwathC[k] = 0; + mode_lib->vba.PrefetchSourceLinesC[k] = 0; + } + + mode_lib->vba.PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; + mode_lib->vba.PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + + PDEAndMetaPTEBytesFrameC; + mode_lib->vba.MetaRowByte[k] = MetaRowByteY + MetaRowByteC; + + CalculateActiveRowBandwidth( + mode_lib->vba.GPUVMEnable, + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.VRatio[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + MetaRowByteY, + MetaRowByteC, + mode_lib->vba.meta_row_height[k], + mode_lib->vba.meta_row_height_chroma[k], + PixelPTEBytesPerRowY, + PixelPTEBytesPerRowC, + mode_lib->vba.dpte_row_height[k], + mode_lib->vba.dpte_row_height_chroma[k], + &mode_lib->vba.meta_row_bw[k], + &mode_lib->vba.dpte_row_bw[k], + &mode_lib->vba.qual_row_bw[k]); + } + + mode_lib->vba.TCalc = 24.0 / mode_lib->vba.DCFCLKDeepSleep; + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if (mode_lib->vba.WritebackEnable[k] == true) { + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = + mode_lib->vba.WritebackLatency + + CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackLumaHTaps[k], + mode_lib->vba.WritebackLumaVTaps[k], + mode_lib->vba.WritebackChromaHTaps[k], + mode_lib->vba.WritebackChromaVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k]) + / mode_lib->vba.DISPCLK; + } else + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0; + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { + if (mode_lib->vba.BlendingAndTiming[j] == k + && mode_lib->vba.WritebackEnable[j] == true) { + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = + dml_max( + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k], + mode_lib->vba.WritebackLatency + + CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[j], + mode_lib->vba.WritebackHRatio[j], + mode_lib->vba.WritebackVRatio[j], + mode_lib->vba.WritebackLumaHTaps[j], + mode_lib->vba.WritebackLumaVTaps[j], + mode_lib->vba.WritebackChromaHTaps[j], + mode_lib->vba.WritebackChromaVTaps[j], + mode_lib->vba.WritebackDestinationWidth[j]) + / mode_lib->vba.DISPCLK); + } + } + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) + if (mode_lib->vba.BlendingAndTiming[k] == j) + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][j]; + + mode_lib->vba.VStartupLines = 13; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.MaxVStartupLines[k] = + mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] + - dml_max( + 1.0, + dml_ceil( + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] + / (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]), + 1)); + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) + mode_lib->vba.MaximumMaxVStartupLines = dml_max( + mode_lib->vba.MaximumMaxVStartupLines, + mode_lib->vba.MaxVStartupLines[k]); + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.cursor_bw[k] = 0.0; + for (j = 0; j < mode_lib->vba.NumberOfCursors[k]; ++j) + mode_lib->vba.cursor_bw[k] += mode_lib->vba.CursorWidth[k][j] + * mode_lib->vba.CursorBPP[k][j] / 8.0 + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + * mode_lib->vba.VRatio[k]; + } + + do { + double MaxTotalRDBandwidth = 0; + bool DestinationLineTimesForPrefetchLessThan2 = false; + bool VRatioPrefetchMoreThan4 = false; + bool prefetch_vm_bw_valid = true; + bool prefetch_row_bw_valid = true; + double TWait = CalculateTWait( + mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.SREnterPlusExitTime); + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.XFCEnabled[k] == true) { + mode_lib->vba.XFCRemoteSurfaceFlipDelay = + CalculateRemoteSurfaceFlipDelay( + mode_lib, + mode_lib->vba.VRatio[k], + mode_lib->vba.SwathWidthY[k], + dml_ceil( + mode_lib->vba.BytePerPixelDETY[k], + 1), + mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k], + mode_lib->vba.XFCTSlvVupdateOffset, + mode_lib->vba.XFCTSlvVupdateWidth, + mode_lib->vba.XFCTSlvVreadyOffset, + mode_lib->vba.XFCXBUFLatencyTolerance, + mode_lib->vba.XFCFillBWOverhead, + mode_lib->vba.XFCSlvChunkSize, + mode_lib->vba.XFCBusTransportTime, + mode_lib->vba.TCalc, + TWait, + &mode_lib->vba.SrcActiveDrainRate, + &mode_lib->vba.TInitXFill, + &mode_lib->vba.TslvChk); + } else { + mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0; + } + + CalculateDelayAfterScaler(mode_lib, mode_lib->vba.ReturnBW, mode_lib->vba.ReadBandwidthPlaneLuma[k], mode_lib->vba.ReadBandwidthPlaneChroma[k], mode_lib->vba.TotalDataReadBandwidth, + mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k], + mode_lib->vba.DPPCLK[k], mode_lib->vba.DISPCLK, mode_lib->vba.PixelClock[k], mode_lib->vba.DSCDelay[k], mode_lib->vba.DPPPerPlane[k], mode_lib->vba.ScalerEnabled[k], mode_lib->vba.NumberOfCursors[k], + mode_lib->vba.DPPCLKDelaySubtotal, mode_lib->vba.DPPCLKDelaySCL, mode_lib->vba.DPPCLKDelaySCLLBOnly, mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelayCNVCCursor, mode_lib->vba.DISPCLKDelaySubtotal, + mode_lib->vba.SwathWidthY[k] / mode_lib->vba.HRatio[k], mode_lib->vba.OutputFormat[k], mode_lib->vba.HTotal[k], + mode_lib->vba.SwathWidthSingleDPPY[k], mode_lib->vba.BytePerPixelDETY[k], mode_lib->vba.BytePerPixelDETC[k], mode_lib->vba.SwathHeightY[k], mode_lib->vba.SwathHeightC[k], mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, &mode_lib->vba.DSTXAfterScaler[k], &mode_lib->vba.DSTYAfterScaler[k]); + + mode_lib->vba.ErrorResult[k] = + CalculatePrefetchSchedule( + mode_lib, + mode_lib->vba.DPPCLK[k], + mode_lib->vba.DISPCLK, + mode_lib->vba.PixelClock[k], + mode_lib->vba.DCFCLKDeepSleep, + mode_lib->vba.DPPPerPlane[k], + mode_lib->vba.NumberOfCursors[k], + mode_lib->vba.VTotal[k] + - mode_lib->vba.VActive[k], + mode_lib->vba.HTotal[k], + mode_lib->vba.MaxInterDCNTileRepeaters, + dml_min( + mode_lib->vba.VStartupLines, + mode_lib->vba.MaxVStartupLines[k]), + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.DynamicMetadataEnable[k], + mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], + mode_lib->vba.DynamicMetadataTransmittedBytes[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.UrgentExtraLatency, + mode_lib->vba.TCalc, + mode_lib->vba.PDEAndMetaPTEBytesFrame[k], + mode_lib->vba.MetaRowByte[k], + mode_lib->vba.PixelPTEBytesPerRow[k], + mode_lib->vba.PrefetchSourceLinesY[k], + mode_lib->vba.SwathWidthY[k], + mode_lib->vba.BytePerPixelDETY[k], + mode_lib->vba.VInitPreFillY[k], + mode_lib->vba.MaxNumSwathY[k], + mode_lib->vba.PrefetchSourceLinesC[k], + mode_lib->vba.BytePerPixelDETC[k], + mode_lib->vba.VInitPreFillC[k], + mode_lib->vba.MaxNumSwathC[k], + mode_lib->vba.SwathHeightY[k], + mode_lib->vba.SwathHeightC[k], + TWait, + mode_lib->vba.XFCEnabled[k], + mode_lib->vba.XFCRemoteSurfaceFlipDelay, + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.DSTXAfterScaler[k], + mode_lib->vba.DSTYAfterScaler[k], + &mode_lib->vba.DestinationLinesForPrefetch[k], + &mode_lib->vba.PrefetchBandwidth[k], + &mode_lib->vba.DestinationLinesToRequestVMInVBlank[k], + &mode_lib->vba.DestinationLinesToRequestRowInVBlank[k], + &mode_lib->vba.VRatioPrefetchY[k], + &mode_lib->vba.VRatioPrefetchC[k], + &mode_lib->vba.RequiredPrefetchPixDataBWLuma[k], + &mode_lib->vba.Tno_bw[k], + &mode_lib->vba.VUpdateOffsetPix[k], + &mode_lib->vba.VUpdateWidthPix[k], + &mode_lib->vba.VReadyOffsetPix[k]); + + if (mode_lib->vba.BlendingAndTiming[k] == k) { + mode_lib->vba.VStartup[k] = dml_min( + mode_lib->vba.VStartupLines, + mode_lib->vba.MaxVStartupLines[k]); + if (mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata + != 0) { + mode_lib->vba.VStartup[k] = + mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata; + } + } else { + mode_lib->vba.VStartup[k] = + dml_min( + mode_lib->vba.VStartupLines, + mode_lib->vba.MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]); + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + + if (mode_lib->vba.PDEAndMetaPTEBytesFrame[k] == 0) + mode_lib->vba.prefetch_vm_bw[k] = 0; + else if (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] > 0) { + mode_lib->vba.prefetch_vm_bw[k] = + (double) mode_lib->vba.PDEAndMetaPTEBytesFrame[k] + / (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + } else { + mode_lib->vba.prefetch_vm_bw[k] = 0; + prefetch_vm_bw_valid = false; + } + if (mode_lib->vba.MetaRowByte[k] + mode_lib->vba.PixelPTEBytesPerRow[k] + == 0) + mode_lib->vba.prefetch_row_bw[k] = 0; + else if (mode_lib->vba.DestinationLinesToRequestRowInVBlank[k] > 0) { + mode_lib->vba.prefetch_row_bw[k] = + (double) (mode_lib->vba.MetaRowByte[k] + + mode_lib->vba.PixelPTEBytesPerRow[k]) + / (mode_lib->vba.DestinationLinesToRequestRowInVBlank[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + } else { + mode_lib->vba.prefetch_row_bw[k] = 0; + prefetch_row_bw_valid = false; + } + + MaxTotalRDBandwidth = + MaxTotalRDBandwidth + mode_lib->vba.cursor_bw[k] + + dml_max( + mode_lib->vba.prefetch_vm_bw[k], + dml_max( + mode_lib->vba.prefetch_row_bw[k], + dml_max( + mode_lib->vba.ReadBandwidthPlaneLuma[k] + + mode_lib->vba.ReadBandwidthPlaneChroma[k], + mode_lib->vba.RequiredPrefetchPixDataBWLuma[k]) + + mode_lib->vba.meta_row_bw[k] + + mode_lib->vba.dpte_row_bw[k])); + + if (mode_lib->vba.DestinationLinesForPrefetch[k] < 2) + DestinationLineTimesForPrefetchLessThan2 = true; + if (mode_lib->vba.VRatioPrefetchY[k] > 4 + || mode_lib->vba.VRatioPrefetchC[k] > 4) + VRatioPrefetchMoreThan4 = true; + } + + if (MaxTotalRDBandwidth <= mode_lib->vba.ReturnBW && prefetch_vm_bw_valid + && prefetch_row_bw_valid && !VRatioPrefetchMoreThan4 + && !DestinationLineTimesForPrefetchLessThan2) + mode_lib->vba.PrefetchModeSupported = true; + else { + mode_lib->vba.PrefetchModeSupported = false; + dml_print( + "DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n"); + } + + if (mode_lib->vba.PrefetchModeSupported == true) { + double final_flip_bw[DC__NUM_DPP__MAX]; + unsigned int ImmediateFlipBytes[DC__NUM_DPP__MAX]; + double total_dcn_read_bw_with_flip = 0; + + mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.ReturnBW; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.BandwidthAvailableForImmediateFlip = + mode_lib->vba.BandwidthAvailableForImmediateFlip + - mode_lib->vba.cursor_bw[k] + - dml_max( + mode_lib->vba.ReadBandwidthPlaneLuma[k] + + mode_lib->vba.ReadBandwidthPlaneChroma[k] + + mode_lib->vba.qual_row_bw[k], + mode_lib->vba.PrefetchBandwidth[k]); + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + ImmediateFlipBytes[k] = 0; + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { + ImmediateFlipBytes[k] = + mode_lib->vba.PDEAndMetaPTEBytesFrame[k] + + mode_lib->vba.MetaRowByte[k] + + mode_lib->vba.PixelPTEBytesPerRow[k]; + } + } + mode_lib->vba.TotImmediateFlipBytes = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { + mode_lib->vba.TotImmediateFlipBytes = + mode_lib->vba.TotImmediateFlipBytes + + ImmediateFlipBytes[k]; + } + } + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + CalculateFlipSchedule( + mode_lib, + mode_lib->vba.UrgentExtraLatency, + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.BandwidthAvailableForImmediateFlip, + mode_lib->vba.TotImmediateFlipBytes, + mode_lib->vba.SourcePixelFormat[k], + ImmediateFlipBytes[k], + mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k], + mode_lib->vba.VRatio[k], + mode_lib->vba.Tno_bw[k], + mode_lib->vba.PDEAndMetaPTEBytesFrame[k], + mode_lib->vba.MetaRowByte[k], + mode_lib->vba.PixelPTEBytesPerRow[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.dpte_row_height[k], + mode_lib->vba.meta_row_height[k], + mode_lib->vba.qual_row_bw[k], + &mode_lib->vba.DestinationLinesToRequestVMInImmediateFlip[k], + &mode_lib->vba.DestinationLinesToRequestRowInImmediateFlip[k], + &final_flip_bw[k], + &mode_lib->vba.ImmediateFlipSupportedForPipe[k]); + } + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + total_dcn_read_bw_with_flip = + total_dcn_read_bw_with_flip + + mode_lib->vba.cursor_bw[k] + + dml_max( + mode_lib->vba.prefetch_vm_bw[k], + dml_max( + mode_lib->vba.prefetch_row_bw[k], + final_flip_bw[k] + + dml_max( + mode_lib->vba.ReadBandwidthPlaneLuma[k] + + mode_lib->vba.ReadBandwidthPlaneChroma[k], + mode_lib->vba.RequiredPrefetchPixDataBWLuma[k]))); + } + mode_lib->vba.ImmediateFlipSupported = true; + if (total_dcn_read_bw_with_flip > mode_lib->vba.ReturnBW) { + mode_lib->vba.ImmediateFlipSupported = false; + } + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.ImmediateFlipSupportedForPipe[k] == false) { + mode_lib->vba.ImmediateFlipSupported = false; + } + } + } else { + mode_lib->vba.ImmediateFlipSupported = false; + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.ErrorResult[k]) { + mode_lib->vba.PrefetchModeSupported = false; + dml_print( + "DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n"); + } + } + + mode_lib->vba.VStartupLines = mode_lib->vba.VStartupLines + 1; + } while (!((mode_lib->vba.PrefetchModeSupported + && (!mode_lib->vba.ImmediateFlipSupport + || mode_lib->vba.ImmediateFlipSupported)) + || mode_lib->vba.MaximumMaxVStartupLines < mode_lib->vba.VStartupLines)); + + //Display Pipeline Delivery Time in Prefetch + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.VRatioPrefetchY[k] <= 1) { + mode_lib->vba.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = + mode_lib->vba.SwathWidthY[k] * mode_lib->vba.DPPPerPlane[k] + / mode_lib->vba.HRatio[k] + / mode_lib->vba.PixelClock[k]; + } else { + mode_lib->vba.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = + mode_lib->vba.SwathWidthY[k] + / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] + / mode_lib->vba.DPPCLK[k]; + } + if (mode_lib->vba.BytePerPixelDETC[k] == 0) { + mode_lib->vba.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; + } else { + if (mode_lib->vba.VRatioPrefetchC[k] <= 1) { + mode_lib->vba.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = + mode_lib->vba.SwathWidthY[k] + * mode_lib->vba.DPPPerPlane[k] + / mode_lib->vba.HRatio[k] + / mode_lib->vba.PixelClock[k]; + } else { + mode_lib->vba.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = + mode_lib->vba.SwathWidthY[k] + / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] + / mode_lib->vba.DPPCLK[k]; + } + } + } + + // Min TTUVBlank + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) { + mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k] = true; + mode_lib->vba.AllowDRAMSelfRefreshDuringVBlank[k] = true; + mode_lib->vba.MinTTUVBlank[k] = dml_max( + mode_lib->vba.DRAMClockChangeWatermark, + dml_max( + mode_lib->vba.StutterEnterPlusExitWatermark, + mode_lib->vba.UrgentWatermark)); + } else if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 1) { + mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k] = false; + mode_lib->vba.AllowDRAMSelfRefreshDuringVBlank[k] = true; + mode_lib->vba.MinTTUVBlank[k] = dml_max( + mode_lib->vba.StutterEnterPlusExitWatermark, + mode_lib->vba.UrgentWatermark); + } else { + mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k] = false; + mode_lib->vba.AllowDRAMSelfRefreshDuringVBlank[k] = false; + mode_lib->vba.MinTTUVBlank[k] = mode_lib->vba.UrgentWatermark; + } + if (!mode_lib->vba.DynamicMetadataEnable[k]) + mode_lib->vba.MinTTUVBlank[k] = mode_lib->vba.TCalc + + mode_lib->vba.MinTTUVBlank[k]; + } + + // DCC Configuration + mode_lib->vba.ActiveDPPs = 0; + // NB P-State/DRAM Clock Change Support + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.ActiveDPPs = mode_lib->vba.ActiveDPPs + mode_lib->vba.DPPPerPlane[k]; + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + double EffectiveLBLatencyHidingY; + double EffectiveLBLatencyHidingC; + double DPPOutputBufferLinesY; + double DPPOutputBufferLinesC; + double DPPOPPBufferingY; + double MaxDETBufferingTimeY; + double ActiveDRAMClockChangeLatencyMarginY; + + mode_lib->vba.LBLatencyHidingSourceLinesY = + dml_min( + mode_lib->vba.MaxLineBufferLines, + (unsigned int) dml_floor( + (double) mode_lib->vba.LineBufferSize + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.SwathWidthY[k] + / dml_max( + mode_lib->vba.HRatio[k], + 1.0)), + 1)) - (mode_lib->vba.vtaps[k] - 1); + + mode_lib->vba.LBLatencyHidingSourceLinesC = + dml_min( + mode_lib->vba.MaxLineBufferLines, + (unsigned int) dml_floor( + (double) mode_lib->vba.LineBufferSize + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.SwathWidthY[k] + / 2.0 + / dml_max( + mode_lib->vba.HRatio[k] + / 2, + 1.0)), + 1)) + - (mode_lib->vba.VTAPsChroma[k] - 1); + + EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY + / mode_lib->vba.VRatio[k] + * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]); + + EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC + / (mode_lib->vba.VRatio[k] / 2) + * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]); + + if (mode_lib->vba.SwathWidthY[k] > 2 * mode_lib->vba.DPPOutputBufferPixels) { + DPPOutputBufferLinesY = mode_lib->vba.DPPOutputBufferPixels + / mode_lib->vba.SwathWidthY[k]; + } else if (mode_lib->vba.SwathWidthY[k] > mode_lib->vba.DPPOutputBufferPixels) { + DPPOutputBufferLinesY = 0.5; + } else { + DPPOutputBufferLinesY = 1; + } + + if (mode_lib->vba.SwathWidthY[k] / 2 > 2 * mode_lib->vba.DPPOutputBufferPixels) { + DPPOutputBufferLinesC = mode_lib->vba.DPPOutputBufferPixels + / (mode_lib->vba.SwathWidthY[k] / 2); + } else if (mode_lib->vba.SwathWidthY[k] / 2 > mode_lib->vba.DPPOutputBufferPixels) { + DPPOutputBufferLinesC = 0.5; + } else { + DPPOutputBufferLinesC = 1; + } + + DPPOPPBufferingY = (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + * (DPPOutputBufferLinesY + mode_lib->vba.OPPOutputBufferLines); + MaxDETBufferingTimeY = mode_lib->vba.FullDETBufferingTimeY[k] + + (mode_lib->vba.LinesInDETY[k] + - mode_lib->vba.LinesInDETYRoundedDownToSwath[k]) + / mode_lib->vba.SwathHeightY[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + + ActiveDRAMClockChangeLatencyMarginY = DPPOPPBufferingY + EffectiveLBLatencyHidingY + + MaxDETBufferingTimeY - mode_lib->vba.DRAMClockChangeWatermark; + + if (mode_lib->vba.ActiveDPPs > 1) { + ActiveDRAMClockChangeLatencyMarginY = + ActiveDRAMClockChangeLatencyMarginY + - (1 - 1 / (mode_lib->vba.ActiveDPPs - 1)) + * mode_lib->vba.SwathHeightY[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + } + + if (mode_lib->vba.BytePerPixelDETC[k] > 0) { + double DPPOPPBufferingC = (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) + * (DPPOutputBufferLinesC + + mode_lib->vba.OPPOutputBufferLines); + double MaxDETBufferingTimeC = + mode_lib->vba.FullDETBufferingTimeC[k] + + (mode_lib->vba.LinesInDETC[k] + - mode_lib->vba.LinesInDETCRoundedDownToSwath[k]) + / mode_lib->vba.SwathHeightC[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + double ActiveDRAMClockChangeLatencyMarginC = DPPOPPBufferingC + + EffectiveLBLatencyHidingC + MaxDETBufferingTimeC + - mode_lib->vba.DRAMClockChangeWatermark; + + if (mode_lib->vba.ActiveDPPs > 1) { + ActiveDRAMClockChangeLatencyMarginC = + ActiveDRAMClockChangeLatencyMarginC + - (1 + - 1 + / (mode_lib->vba.ActiveDPPs + - 1)) + * mode_lib->vba.SwathHeightC[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + } + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min( + ActiveDRAMClockChangeLatencyMarginY, + ActiveDRAMClockChangeLatencyMarginC); + } else { + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = + ActiveDRAMClockChangeLatencyMarginY; + } + + if (mode_lib->vba.WritebackEnable[k]) { + double WritebackDRAMClockChangeLatencyMargin; + + if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { + WritebackDRAMClockChangeLatencyMargin = + (double) (mode_lib->vba.WritebackInterfaceLumaBufferSize + + mode_lib->vba.WritebackInterfaceChromaBufferSize) + / (mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) + * 4) + - mode_lib->vba.WritebackDRAMClockChangeWatermark; + } else if (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { + WritebackDRAMClockChangeLatencyMargin = + dml_min( + (double) mode_lib->vba.WritebackInterfaceLumaBufferSize + * 8.0 / 10, + 2.0 + * mode_lib->vba.WritebackInterfaceChromaBufferSize + * 8 / 10) + / (mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k])) + - mode_lib->vba.WritebackDRAMClockChangeWatermark; + } else { + WritebackDRAMClockChangeLatencyMargin = + dml_min( + (double) mode_lib->vba.WritebackInterfaceLumaBufferSize, + 2.0 + * mode_lib->vba.WritebackInterfaceChromaBufferSize) + / (mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k])) + - mode_lib->vba.WritebackDRAMClockChangeWatermark; + } + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min( + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k], + WritebackDRAMClockChangeLatencyMargin); + } + } + + mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] + < mode_lib->vba.MinActiveDRAMClockChangeMargin) { + mode_lib->vba.MinActiveDRAMClockChangeMargin = + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]; + } + } + + mode_lib->vba.MinActiveDRAMClockChangeLatencySupported = + mode_lib->vba.MinActiveDRAMClockChangeMargin + + mode_lib->vba.DRAMClockChangeLatency; + + if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) { + mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive; + } else { + if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) { + mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vblank; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (!mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k]) { + mode_lib->vba.DRAMClockChangeSupport[0][0] = + dm_dram_clock_change_unsupported; + } + } + } else { + mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_unsupported; + } + } + for (k = 0; k <= mode_lib->vba.soc.num_states; k++) + for (j = 0; j < 2; j++) + mode_lib->vba.DRAMClockChangeSupport[k][j] = mode_lib->vba.DRAMClockChangeSupport[0][0]; + + //XFC Parameters: + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.XFCEnabled[k] == true) { + double TWait; + + mode_lib->vba.XFCSlaveVUpdateOffset[k] = mode_lib->vba.XFCTSlvVupdateOffset; + mode_lib->vba.XFCSlaveVupdateWidth[k] = mode_lib->vba.XFCTSlvVupdateWidth; + mode_lib->vba.XFCSlaveVReadyOffset[k] = mode_lib->vba.XFCTSlvVreadyOffset; + TWait = CalculateTWait( + mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.SREnterPlusExitTime); + mode_lib->vba.XFCRemoteSurfaceFlipDelay = CalculateRemoteSurfaceFlipDelay( + mode_lib, + mode_lib->vba.VRatio[k], + mode_lib->vba.SwathWidthY[k], + dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1), + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + mode_lib->vba.XFCTSlvVupdateOffset, + mode_lib->vba.XFCTSlvVupdateWidth, + mode_lib->vba.XFCTSlvVreadyOffset, + mode_lib->vba.XFCXBUFLatencyTolerance, + mode_lib->vba.XFCFillBWOverhead, + mode_lib->vba.XFCSlvChunkSize, + mode_lib->vba.XFCBusTransportTime, + mode_lib->vba.TCalc, + TWait, + &mode_lib->vba.SrcActiveDrainRate, + &mode_lib->vba.TInitXFill, + &mode_lib->vba.TslvChk); + mode_lib->vba.XFCRemoteSurfaceFlipLatency[k] = + dml_floor( + mode_lib->vba.XFCRemoteSurfaceFlipDelay + / (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]), + 1); + mode_lib->vba.XFCTransferDelay[k] = + dml_ceil( + mode_lib->vba.XFCBusTransportTime + / (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]), + 1); + mode_lib->vba.XFCPrechargeDelay[k] = + dml_ceil( + (mode_lib->vba.XFCBusTransportTime + + mode_lib->vba.TInitXFill + + mode_lib->vba.TslvChk) + / (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]), + 1); + mode_lib->vba.InitFillLevel = mode_lib->vba.XFCXBUFLatencyTolerance + * mode_lib->vba.SrcActiveDrainRate; + mode_lib->vba.FinalFillMargin = + (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] + + mode_lib->vba.DestinationLinesToRequestRowInVBlank[k]) + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k] + * mode_lib->vba.SrcActiveDrainRate + + mode_lib->vba.XFCFillConstant; + mode_lib->vba.FinalFillLevel = mode_lib->vba.XFCRemoteSurfaceFlipDelay + * mode_lib->vba.SrcActiveDrainRate + + mode_lib->vba.FinalFillMargin; + mode_lib->vba.RemainingFillLevel = dml_max( + 0.0, + mode_lib->vba.FinalFillLevel - mode_lib->vba.InitFillLevel); + mode_lib->vba.TFinalxFill = mode_lib->vba.RemainingFillLevel + / (mode_lib->vba.SrcActiveDrainRate + * mode_lib->vba.XFCFillBWOverhead / 100); + mode_lib->vba.XFCPrefetchMargin[k] = + mode_lib->vba.XFCRemoteSurfaceFlipDelay + + mode_lib->vba.TFinalxFill + + (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] + + mode_lib->vba.DestinationLinesToRequestRowInVBlank[k]) + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]; + } else { + mode_lib->vba.XFCSlaveVUpdateOffset[k] = 0; + mode_lib->vba.XFCSlaveVupdateWidth[k] = 0; + mode_lib->vba.XFCSlaveVReadyOffset[k] = 0; + mode_lib->vba.XFCRemoteSurfaceFlipLatency[k] = 0; + mode_lib->vba.XFCPrechargeDelay[k] = 0; + mode_lib->vba.XFCTransferDelay[k] = 0; + mode_lib->vba.XFCPrefetchMargin[k] = 0; + } + } + { + unsigned int VStartupMargin = 0; + bool FirstMainPlane = true; + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + unsigned int Margin = (mode_lib->vba.MaxVStartupLines[k] - mode_lib->vba.VStartup[k]) + * mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]; + + if (FirstMainPlane) { + VStartupMargin = Margin; + FirstMainPlane = false; + } else + VStartupMargin = dml_min(VStartupMargin, Margin); + } + + if (mode_lib->vba.UseMaximumVStartup) { + if (mode_lib->vba.VTotal_Max[k] == mode_lib->vba.VTotal[k]) { + //only use max vstart if it is not drr or lateflip. + mode_lib->vba.VStartup[k] = mode_lib->vba.MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]; + } + } + } +} +} + +static void dml20v2_DisplayPipeConfiguration(struct display_mode_lib *mode_lib) +{ + double BytePerPixDETY; + double BytePerPixDETC; + double Read256BytesBlockHeightY; + double Read256BytesBlockHeightC; + double Read256BytesBlockWidthY; + double Read256BytesBlockWidthC; + double MaximumSwathHeightY; + double MaximumSwathHeightC; + double MinimumSwathHeightY; + double MinimumSwathHeightC; + double SwathWidth; + double SwathWidthGranularityY; + double SwathWidthGranularityC; + double RoundedUpMaxSwathSizeBytesY; + double RoundedUpMaxSwathSizeBytesC; + unsigned int j, k; + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + bool MainPlaneDoesODMCombine = false; + + if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { + BytePerPixDETY = 8; + BytePerPixDETC = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { + BytePerPixDETY = 4; + BytePerPixDETC = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { + BytePerPixDETY = 2; + BytePerPixDETC = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) { + BytePerPixDETY = 1; + BytePerPixDETC = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { + BytePerPixDETY = 1; + BytePerPixDETC = 2; + } else { + BytePerPixDETY = 4.0 / 3.0; + BytePerPixDETC = 8.0 / 3.0; + } + + if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + Read256BytesBlockHeightY = 1; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { + Read256BytesBlockHeightY = 4; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { + Read256BytesBlockHeightY = 8; + } else { + Read256BytesBlockHeightY = 16; + } + Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1) + / Read256BytesBlockHeightY; + Read256BytesBlockHeightC = 0; + Read256BytesBlockWidthC = 0; + } else { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + Read256BytesBlockHeightY = 1; + Read256BytesBlockHeightC = 1; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { + Read256BytesBlockHeightY = 16; + Read256BytesBlockHeightC = 8; + } else { + Read256BytesBlockHeightY = 8; + Read256BytesBlockHeightC = 8; + } + Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1) + / Read256BytesBlockHeightY; + Read256BytesBlockWidthC = 256 / dml_ceil(BytePerPixDETC, 2) + / Read256BytesBlockHeightC; + } + + if (mode_lib->vba.SourceScan[k] == dm_horz) { + MaximumSwathHeightY = Read256BytesBlockHeightY; + MaximumSwathHeightC = Read256BytesBlockHeightC; + } else { + MaximumSwathHeightY = Read256BytesBlockWidthY; + MaximumSwathHeightC = Read256BytesBlockWidthC; + } + + if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear + || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + && (mode_lib->vba.SurfaceTiling[k] + == dm_sw_4kb_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_4kb_s_x + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s_t + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s_x + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_var_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_var_s_x) + && mode_lib->vba.SourceScan[k] == dm_horz)) { + MinimumSwathHeightY = MaximumSwathHeightY; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8 + && mode_lib->vba.SourceScan[k] != dm_horz) { + MinimumSwathHeightY = MaximumSwathHeightY; + } else { + MinimumSwathHeightY = MaximumSwathHeightY / 2.0; + } + MinimumSwathHeightC = MaximumSwathHeightC; + } else { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + MinimumSwathHeightY = MaximumSwathHeightY; + MinimumSwathHeightC = MaximumSwathHeightC; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 + && mode_lib->vba.SourceScan[k] == dm_horz) { + MinimumSwathHeightY = MaximumSwathHeightY / 2.0; + MinimumSwathHeightC = MaximumSwathHeightC; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10 + && mode_lib->vba.SourceScan[k] == dm_horz) { + MinimumSwathHeightC = MaximumSwathHeightC / 2.0; + MinimumSwathHeightY = MaximumSwathHeightY; + } else { + MinimumSwathHeightY = MaximumSwathHeightY; + MinimumSwathHeightC = MaximumSwathHeightC; + } + } + + if (mode_lib->vba.SourceScan[k] == dm_horz) { + SwathWidth = mode_lib->vba.ViewportWidth[k]; + } else { + SwathWidth = mode_lib->vba.ViewportHeight[k]; + } + + if (mode_lib->vba.ODMCombineEnabled[k] == true) { + MainPlaneDoesODMCombine = true; + } + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { + if (mode_lib->vba.BlendingAndTiming[k] == j + && mode_lib->vba.ODMCombineEnabled[j] == true) { + MainPlaneDoesODMCombine = true; + } + } + + if (MainPlaneDoesODMCombine == true) { + SwathWidth = dml_min( + SwathWidth, + mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]); + } else { + if (mode_lib->vba.DPPPerPlane[k] == 0) + SwathWidth = 0; + else + SwathWidth = SwathWidth / mode_lib->vba.DPPPerPlane[k]; + } + + SwathWidthGranularityY = 256 / dml_ceil(BytePerPixDETY, 1) / MaximumSwathHeightY; + RoundedUpMaxSwathSizeBytesY = (dml_ceil( + (double) (SwathWidth - 1), + SwathWidthGranularityY) + SwathWidthGranularityY) * BytePerPixDETY + * MaximumSwathHeightY; + if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { + RoundedUpMaxSwathSizeBytesY = dml_ceil(RoundedUpMaxSwathSizeBytesY, 256) + + 256; + } + if (MaximumSwathHeightC > 0) { + SwathWidthGranularityC = 256.0 / dml_ceil(BytePerPixDETC, 2) + / MaximumSwathHeightC; + RoundedUpMaxSwathSizeBytesC = (dml_ceil( + (double) (SwathWidth / 2.0 - 1), + SwathWidthGranularityC) + SwathWidthGranularityC) + * BytePerPixDETC * MaximumSwathHeightC; + if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { + RoundedUpMaxSwathSizeBytesC = dml_ceil( + RoundedUpMaxSwathSizeBytesC, + 256) + 256; + } + } else + RoundedUpMaxSwathSizeBytesC = 0.0; + + if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC + <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) { + mode_lib->vba.SwathHeightY[k] = MaximumSwathHeightY; + mode_lib->vba.SwathHeightC[k] = MaximumSwathHeightC; + } else { + mode_lib->vba.SwathHeightY[k] = MinimumSwathHeightY; + mode_lib->vba.SwathHeightC[k] = MinimumSwathHeightC; + } + + if (mode_lib->vba.SwathHeightC[k] == 0) { + mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte * 1024; + mode_lib->vba.DETBufferSizeC[k] = 0; + } else if (mode_lib->vba.SwathHeightY[k] <= mode_lib->vba.SwathHeightC[k]) { + mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte + * 1024.0 / 2; + mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte + * 1024.0 / 2; + } else { + mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte + * 1024.0 * 2 / 3; + mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte + * 1024.0 / 3; + } + } +} + +static double CalculateTWait( + unsigned int PrefetchMode, + double DRAMClockChangeLatency, + double UrgentLatencyPixelDataOnly, + double SREnterPlusExitTime) +{ + if (PrefetchMode == 0) { + return dml_max( + DRAMClockChangeLatency + UrgentLatencyPixelDataOnly, + dml_max(SREnterPlusExitTime, UrgentLatencyPixelDataOnly)); + } else if (PrefetchMode == 1) { + return dml_max(SREnterPlusExitTime, UrgentLatencyPixelDataOnly); + } else { + return UrgentLatencyPixelDataOnly; + } +} + +static double CalculateRemoteSurfaceFlipDelay( + struct display_mode_lib *mode_lib, + double VRatio, + double SwathWidth, + double Bpp, + double LineTime, + double XFCTSlvVupdateOffset, + double XFCTSlvVupdateWidth, + double XFCTSlvVreadyOffset, + double XFCXBUFLatencyTolerance, + double XFCFillBWOverhead, + double XFCSlvChunkSize, + double XFCBusTransportTime, + double TCalc, + double TWait, + double *SrcActiveDrainRate, + double *TInitXFill, + double *TslvChk) +{ + double TSlvSetup, AvgfillRate, result; + + *SrcActiveDrainRate = VRatio * SwathWidth * Bpp / LineTime; + TSlvSetup = XFCTSlvVupdateOffset + XFCTSlvVupdateWidth + XFCTSlvVreadyOffset; + *TInitXFill = XFCXBUFLatencyTolerance / (1 + XFCFillBWOverhead / 100); + AvgfillRate = *SrcActiveDrainRate * (1 + XFCFillBWOverhead / 100); + *TslvChk = XFCSlvChunkSize / AvgfillRate; + dml_print( + "DML::CalculateRemoteSurfaceFlipDelay: SrcActiveDrainRate: %f\n", + *SrcActiveDrainRate); + dml_print("DML::CalculateRemoteSurfaceFlipDelay: TSlvSetup: %f\n", TSlvSetup); + dml_print("DML::CalculateRemoteSurfaceFlipDelay: TInitXFill: %f\n", *TInitXFill); + dml_print("DML::CalculateRemoteSurfaceFlipDelay: AvgfillRate: %f\n", AvgfillRate); + dml_print("DML::CalculateRemoteSurfaceFlipDelay: TslvChk: %f\n", *TslvChk); + result = 2 * XFCBusTransportTime + TSlvSetup + TCalc + TWait + *TslvChk + *TInitXFill; // TODO: This doesn't seem to match programming guide + dml_print("DML::CalculateRemoteSurfaceFlipDelay: RemoteSurfaceFlipDelay: %f\n", result); + return result; +} + +static double CalculateWriteBackDelay( + enum source_format_class WritebackPixelFormat, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackLumaHTaps, + unsigned int WritebackLumaVTaps, + unsigned int WritebackChromaHTaps, + unsigned int WritebackChromaVTaps, + unsigned int WritebackDestinationWidth) +{ + double CalculateWriteBackDelay = + dml_max( + dml_ceil(WritebackLumaHTaps / 4.0, 1) / WritebackHRatio, + WritebackLumaVTaps * dml_ceil(1.0 / WritebackVRatio, 1) + * dml_ceil( + WritebackDestinationWidth + / 4.0, + 1) + + dml_ceil(1.0 / WritebackVRatio, 1) + * (dml_ceil( + WritebackLumaVTaps + / 4.0, + 1) + 4)); + + if (WritebackPixelFormat != dm_444_32) { + CalculateWriteBackDelay = + dml_max( + CalculateWriteBackDelay, + dml_max( + dml_ceil( + WritebackChromaHTaps + / 2.0, + 1) + / (2 + * WritebackHRatio), + WritebackChromaVTaps + * dml_ceil( + 1 + / (2 + * WritebackVRatio), + 1) + * dml_ceil( + WritebackDestinationWidth + / 2.0 + / 2.0, + 1) + + dml_ceil( + 1 + / (2 + * WritebackVRatio), + 1) + * (dml_ceil( + WritebackChromaVTaps + / 4.0, + 1) + + 4))); + } + return CalculateWriteBackDelay; +} + +static void CalculateActiveRowBandwidth( + bool GPUVMEnable, + enum source_format_class SourcePixelFormat, + double VRatio, + bool DCCEnable, + double LineTime, + unsigned int MetaRowByteLuma, + unsigned int MetaRowByteChroma, + unsigned int meta_row_height_luma, + unsigned int meta_row_height_chroma, + unsigned int PixelPTEBytesPerRowLuma, + unsigned int PixelPTEBytesPerRowChroma, + unsigned int dpte_row_height_luma, + unsigned int dpte_row_height_chroma, + double *meta_row_bw, + double *dpte_row_bw, + double *qual_row_bw) +{ + if (DCCEnable != true) { + *meta_row_bw = 0; + } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { + *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + + VRatio / 2 * MetaRowByteChroma + / (meta_row_height_chroma * LineTime); + } else { + *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); + } + + if (GPUVMEnable != true) { + *dpte_row_bw = 0; + } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + + VRatio / 2 * PixelPTEBytesPerRowChroma + / (dpte_row_height_chroma * LineTime); + } else { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); + } + + if ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10)) { + *qual_row_bw = *meta_row_bw + *dpte_row_bw; + } else { + *qual_row_bw = 0; + } +} + +static void CalculateFlipSchedule( + struct display_mode_lib *mode_lib, + double UrgentExtraLatency, + double UrgentLatencyPixelDataOnly, + unsigned int GPUVMMaxPageTableLevels, + bool GPUVMEnable, + double BandwidthAvailableForImmediateFlip, + unsigned int TotImmediateFlipBytes, + enum source_format_class SourcePixelFormat, + unsigned int ImmediateFlipBytes, + double LineTime, + double VRatio, + double Tno_bw, + double PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + bool DCCEnable, + unsigned int dpte_row_height, + unsigned int meta_row_height, + double qual_row_bw, + double *DestinationLinesToRequestVMInImmediateFlip, + double *DestinationLinesToRequestRowInImmediateFlip, + double *final_flip_bw, + bool *ImmediateFlipSupportedForPipe) +{ + double min_row_time = 0.0; + + if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { + *DestinationLinesToRequestVMInImmediateFlip = 0.0; + *DestinationLinesToRequestRowInImmediateFlip = 0.0; + *final_flip_bw = qual_row_bw; + *ImmediateFlipSupportedForPipe = true; + } else { + double TimeForFetchingMetaPTEImmediateFlip; + double TimeForFetchingRowInVBlankImmediateFlip; + + if (GPUVMEnable == true) { + mode_lib->vba.ImmediateFlipBW[0] = BandwidthAvailableForImmediateFlip + * ImmediateFlipBytes / TotImmediateFlipBytes; + TimeForFetchingMetaPTEImmediateFlip = + dml_max( + Tno_bw + + PDEAndMetaPTEBytesFrame + / mode_lib->vba.ImmediateFlipBW[0], + dml_max( + UrgentExtraLatency + + UrgentLatencyPixelDataOnly + * (GPUVMMaxPageTableLevels + - 1), + LineTime / 4.0)); + } else { + TimeForFetchingMetaPTEImmediateFlip = 0; + } + + *DestinationLinesToRequestVMInImmediateFlip = dml_floor( + 4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime + 0.125), + 1) / 4.0; + + if ((GPUVMEnable == true || DCCEnable == true)) { + mode_lib->vba.ImmediateFlipBW[0] = BandwidthAvailableForImmediateFlip + * ImmediateFlipBytes / TotImmediateFlipBytes; + TimeForFetchingRowInVBlankImmediateFlip = dml_max( + (MetaRowByte + PixelPTEBytesPerRow) + / mode_lib->vba.ImmediateFlipBW[0], + dml_max(UrgentLatencyPixelDataOnly, LineTime / 4.0)); + } else { + TimeForFetchingRowInVBlankImmediateFlip = 0; + } + + *DestinationLinesToRequestRowInImmediateFlip = dml_floor( + 4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime + 0.125), + 1) / 4.0; + + if (GPUVMEnable == true) { + *final_flip_bw = + dml_max( + PDEAndMetaPTEBytesFrame + / (*DestinationLinesToRequestVMInImmediateFlip + * LineTime), + (MetaRowByte + PixelPTEBytesPerRow) + / (TimeForFetchingRowInVBlankImmediateFlip + * LineTime)); + } else if (MetaRowByte + PixelPTEBytesPerRow > 0) { + *final_flip_bw = (MetaRowByte + PixelPTEBytesPerRow) + / (TimeForFetchingRowInVBlankImmediateFlip * LineTime); + } else { + *final_flip_bw = 0; + } + + if (GPUVMEnable && !DCCEnable) + min_row_time = dpte_row_height * LineTime / VRatio; + else if (!GPUVMEnable && DCCEnable) + min_row_time = meta_row_height * LineTime / VRatio; + else + min_row_time = dml_min(dpte_row_height, meta_row_height) * LineTime + / VRatio; + + if (*DestinationLinesToRequestVMInImmediateFlip >= 8 + || *DestinationLinesToRequestRowInImmediateFlip >= 16 + || TimeForFetchingMetaPTEImmediateFlip + + 2 * TimeForFetchingRowInVBlankImmediateFlip + > min_row_time) + *ImmediateFlipSupportedForPipe = false; + else + *ImmediateFlipSupportedForPipe = true; + } +} + +static unsigned int TruncToValidBPP( + double DecimalBPP, + bool DSCEnabled, + enum output_encoder_class Output, + enum output_format_class Format, + unsigned int DSCInputBitPerComponent) +{ + if (Output == dm_hdmi) { + if (Format == dm_420) { + if (DecimalBPP >= 18) + return 18; + else if (DecimalBPP >= 15) + return 15; + else if (DecimalBPP >= 12) + return 12; + else + return BPP_INVALID; + } else if (Format == dm_444) { + if (DecimalBPP >= 36) + return 36; + else if (DecimalBPP >= 30) + return 30; + else if (DecimalBPP >= 24) + return 24; + else if (DecimalBPP >= 18) + return 18; + else + return BPP_INVALID; + } else { + if (DecimalBPP / 1.5 >= 24) + return 24; + else if (DecimalBPP / 1.5 >= 20) + return 20; + else if (DecimalBPP / 1.5 >= 16) + return 16; + else + return BPP_INVALID; + } + } else { + if (DSCEnabled) { + if (Format == dm_420) { + if (DecimalBPP < 6) + return BPP_INVALID; + else if (DecimalBPP >= 1.5 * DSCInputBitPerComponent - 1 / 16) + return 1.5 * DSCInputBitPerComponent - 1 / 16; + else + return dml_floor(16 * DecimalBPP, 1) / 16; + } else if (Format == dm_n422) { + if (DecimalBPP < 7) + return BPP_INVALID; + else if (DecimalBPP >= 2 * DSCInputBitPerComponent - 1 / 16) + return 2 * DSCInputBitPerComponent - 1 / 16; + else + return dml_floor(16 * DecimalBPP, 1) / 16; + } else { + if (DecimalBPP < 8) + return BPP_INVALID; + else if (DecimalBPP >= 3 * DSCInputBitPerComponent - 1 / 16) + return 3 * DSCInputBitPerComponent - 1 / 16; + else + return dml_floor(16 * DecimalBPP, 1) / 16; + } + } else if (Format == dm_420) { + if (DecimalBPP >= 18) + return 18; + else if (DecimalBPP >= 15) + return 15; + else if (DecimalBPP >= 12) + return 12; + else + return BPP_INVALID; + } else if (Format == dm_s422 || Format == dm_n422) { + if (DecimalBPP >= 24) + return 24; + else if (DecimalBPP >= 20) + return 20; + else if (DecimalBPP >= 16) + return 16; + else + return BPP_INVALID; + } else { + if (DecimalBPP >= 36) + return 36; + else if (DecimalBPP >= 30) + return 30; + else if (DecimalBPP >= 24) + return 24; + else if (DecimalBPP >= 18) + return 18; + else + return BPP_INVALID; + } + } +} + +void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) +{ + struct vba_vars_st *locals = &mode_lib->vba; + + int i; + unsigned int j, k, m; + + /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ + + /*Scale Ratio, taps Support Check*/ + + mode_lib->vba.ScaleRatioAndTapsSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.ScalerEnabled[k] == false + && ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) + || mode_lib->vba.HRatio[k] != 1.0 + || mode_lib->vba.htaps[k] != 1.0 + || mode_lib->vba.VRatio[k] != 1.0 + || mode_lib->vba.vtaps[k] != 1.0)) { + mode_lib->vba.ScaleRatioAndTapsSupport = false; + } else if (mode_lib->vba.vtaps[k] < 1.0 || mode_lib->vba.vtaps[k] > 8.0 + || mode_lib->vba.htaps[k] < 1.0 || mode_lib->vba.htaps[k] > 8.0 + || (mode_lib->vba.htaps[k] > 1.0 + && (mode_lib->vba.htaps[k] % 2) == 1) + || mode_lib->vba.HRatio[k] > mode_lib->vba.MaxHSCLRatio + || mode_lib->vba.VRatio[k] > mode_lib->vba.MaxVSCLRatio + || mode_lib->vba.HRatio[k] > mode_lib->vba.htaps[k] + || mode_lib->vba.VRatio[k] > mode_lib->vba.vtaps[k] + || (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8 + && (mode_lib->vba.HRatio[k] / 2.0 + > mode_lib->vba.HTAPsChroma[k] + || mode_lib->vba.VRatio[k] / 2.0 + > mode_lib->vba.VTAPsChroma[k]))) { + mode_lib->vba.ScaleRatioAndTapsSupport = false; + } + } + /*Source Format, Pixel Format and Scan Support Check*/ + + mode_lib->vba.SourceFormatPixelAndScanSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if ((mode_lib->vba.SurfaceTiling[k] == dm_sw_linear + && mode_lib->vba.SourceScan[k] != dm_horz) + || ((mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d + || mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d_x + || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d + || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_t + || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_x + || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d + || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d_x) + && mode_lib->vba.SourcePixelFormat[k] != dm_444_64) + || (mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_r_x + && (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8 + || mode_lib->vba.SourcePixelFormat[k] + == dm_420_8 + || mode_lib->vba.SourcePixelFormat[k] + == dm_420_10)) + || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_gfx7_2d_thin_lvp) + && !((mode_lib->vba.SourcePixelFormat[k] + == dm_444_64 + || mode_lib->vba.SourcePixelFormat[k] + == dm_444_32) + && mode_lib->vba.SourceScan[k] + == dm_horz + && mode_lib->vba.SupportGFX7CompatibleTilingIn32bppAnd64bpp + == true + && mode_lib->vba.DCCEnable[k] + == false)) + || (mode_lib->vba.DCCEnable[k] == true + && (mode_lib->vba.SurfaceTiling[k] + == dm_sw_linear + || mode_lib->vba.SourcePixelFormat[k] + == dm_420_8 + || mode_lib->vba.SourcePixelFormat[k] + == dm_420_10)))) { + mode_lib->vba.SourceFormatPixelAndScanSupport = false; + } + } + /*Bandwidth Support Check*/ + + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { + locals->BytePerPixelInDETY[k] = 8.0; + locals->BytePerPixelInDETC[k] = 0.0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { + locals->BytePerPixelInDETY[k] = 4.0; + locals->BytePerPixelInDETC[k] = 0.0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) { + locals->BytePerPixelInDETY[k] = 2.0; + locals->BytePerPixelInDETC[k] = 0.0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) { + locals->BytePerPixelInDETY[k] = 1.0; + locals->BytePerPixelInDETC[k] = 0.0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { + locals->BytePerPixelInDETY[k] = 1.0; + locals->BytePerPixelInDETC[k] = 2.0; + } else { + locals->BytePerPixelInDETY[k] = 4.0 / 3; + locals->BytePerPixelInDETC[k] = 8.0 / 3; + } + if (mode_lib->vba.SourceScan[k] == dm_horz) { + locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportWidth[k]; + } else { + locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportHeight[k]; + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->ReadBandwidthLuma[k] = locals->SwathWidthYSingleDPP[k] * dml_ceil(locals->BytePerPixelInDETY[k], 1.0) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; + locals->ReadBandwidthChroma[k] = locals->SwathWidthYSingleDPP[k] / 2 * dml_ceil(locals->BytePerPixelInDETC[k], 2.0) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k] / 2.0; + locals->ReadBandwidth[k] = locals->ReadBandwidthLuma[k] + locals->ReadBandwidthChroma[k]; + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true + && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { + locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) * 4.0; + } else if (mode_lib->vba.WritebackEnable[k] == true + && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { + locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) * 3.0; + } else if (mode_lib->vba.WritebackEnable[k] == true) { + locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) * 1.5; + } else { + locals->WriteBandwidth[k] = 0.0; + } + } + mode_lib->vba.DCCEnabledInAnyPlane = false; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.DCCEnable[k] == true) { + mode_lib->vba.DCCEnabledInAnyPlane = true; + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->FabricAndDRAMBandwidthPerState[i] = dml_min( + mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels + * mode_lib->vba.DRAMChannelWidth, + mode_lib->vba.FabricClockPerState[i] + * mode_lib->vba.FabricDatapathToDCNDataReturn) / 1000; + locals->ReturnBWToDCNPerState = dml_min(locals->ReturnBusWidth * locals->DCFCLKPerState[i], + locals->FabricAndDRAMBandwidthPerState[i] * 1000) + * locals->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100; + + locals->ReturnBWPerState[i] = locals->ReturnBWToDCNPerState; + + if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { + locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / + ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 + / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] + * locals->ReturnBusWidth / 4) + locals->UrgentLatency))); + } + locals->CriticalPoint = 2 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * + locals->UrgentLatency / (locals->ReturnBWToDCNPerState * locals->UrgentLatency + + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); + + if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { + locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + 4 * locals->ReturnBWToDCNPerState * + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 + * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / + dml_pow((locals->ReturnBWToDCNPerState * locals->UrgentLatency + + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024), 2)); + } + + locals->ReturnBWToDCNPerState = dml_min(locals->ReturnBusWidth * + locals->DCFCLKPerState[i], locals->FabricAndDRAMBandwidthPerState[i] * 1000); + + if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { + locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / + ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 + / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] + * locals->ReturnBusWidth / 4) + locals->UrgentLatency))); + } + locals->CriticalPoint = 2 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * + locals->UrgentLatency / (locals->ReturnBWToDCNPerState * locals->UrgentLatency + + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); + + if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { + locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + 4 * locals->ReturnBWToDCNPerState * + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 + * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / + dml_pow((locals->ReturnBWToDCNPerState * locals->UrgentLatency + + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024), 2)); + } + } + /*Writeback Latency support check*/ + + mode_lib->vba.WritebackLatencySupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) { + if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { + if (locals->WriteBandwidth[k] + > (mode_lib->vba.WritebackInterfaceLumaBufferSize + + mode_lib->vba.WritebackInterfaceChromaBufferSize) + / mode_lib->vba.WritebackLatency) { + mode_lib->vba.WritebackLatencySupport = false; + } + } else { + if (locals->WriteBandwidth[k] + > 1.5 + * dml_min( + mode_lib->vba.WritebackInterfaceLumaBufferSize, + 2.0 + * mode_lib->vba.WritebackInterfaceChromaBufferSize) + / mode_lib->vba.WritebackLatency) { + mode_lib->vba.WritebackLatencySupport = false; + } + } + } + } + /*Re-ordering Buffer Support Check*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] = + (mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i] + + locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i]; + if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i] + > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) { + locals->ROBSupport[i] = true; + } else { + locals->ROBSupport[i] = false; + } + } + /*Writeback Mode Support Check*/ + + mode_lib->vba.TotalNumberOfActiveWriteback = 0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) { + if (mode_lib->vba.ActiveWritebacksPerPlane[k] == 0) + mode_lib->vba.ActiveWritebacksPerPlane[k] = 1; + mode_lib->vba.TotalNumberOfActiveWriteback = + mode_lib->vba.TotalNumberOfActiveWriteback + + mode_lib->vba.ActiveWritebacksPerPlane[k]; + } + } + mode_lib->vba.WritebackModeSupport = true; + if (mode_lib->vba.TotalNumberOfActiveWriteback > mode_lib->vba.MaxNumWriteback) { + mode_lib->vba.WritebackModeSupport = false; + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true + && mode_lib->vba.Writeback10bpc420Supported != true + && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { + mode_lib->vba.WritebackModeSupport = false; + } + } + /*Writeback Scale Ratio and Taps Support Check*/ + + mode_lib->vba.WritebackScaleRatioAndTapsSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) { + if (mode_lib->vba.WritebackLumaAndChromaScalingSupported == false + && (mode_lib->vba.WritebackHRatio[k] != 1.0 + || mode_lib->vba.WritebackVRatio[k] != 1.0)) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + if (mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackMaxHSCLRatio + || mode_lib->vba.WritebackVRatio[k] + > mode_lib->vba.WritebackMaxVSCLRatio + || mode_lib->vba.WritebackHRatio[k] + < mode_lib->vba.WritebackMinHSCLRatio + || mode_lib->vba.WritebackVRatio[k] + < mode_lib->vba.WritebackMinVSCLRatio + || mode_lib->vba.WritebackLumaHTaps[k] + > mode_lib->vba.WritebackMaxHSCLTaps + || mode_lib->vba.WritebackLumaVTaps[k] + > mode_lib->vba.WritebackMaxVSCLTaps + || mode_lib->vba.WritebackHRatio[k] + > mode_lib->vba.WritebackLumaHTaps[k] + || mode_lib->vba.WritebackVRatio[k] + > mode_lib->vba.WritebackLumaVTaps[k] + || (mode_lib->vba.WritebackLumaHTaps[k] > 2.0 + && ((mode_lib->vba.WritebackLumaHTaps[k] % 2) + == 1)) + || (mode_lib->vba.WritebackPixelFormat[k] != dm_444_32 + && (mode_lib->vba.WritebackChromaHTaps[k] + > mode_lib->vba.WritebackMaxHSCLTaps + || mode_lib->vba.WritebackChromaVTaps[k] + > mode_lib->vba.WritebackMaxVSCLTaps + || 2.0 + * mode_lib->vba.WritebackHRatio[k] + > mode_lib->vba.WritebackChromaHTaps[k] + || 2.0 + * mode_lib->vba.WritebackVRatio[k] + > mode_lib->vba.WritebackChromaVTaps[k] + || (mode_lib->vba.WritebackChromaHTaps[k] > 2.0 + && ((mode_lib->vba.WritebackChromaHTaps[k] % 2) == 1))))) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + if (mode_lib->vba.WritebackVRatio[k] < 1.0) { + mode_lib->vba.WritebackLumaVExtra = + dml_max(1.0 - 2.0 / dml_ceil(1.0 / mode_lib->vba.WritebackVRatio[k], 1.0), 0.0); + } else { + mode_lib->vba.WritebackLumaVExtra = -1; + } + if ((mode_lib->vba.WritebackPixelFormat[k] == dm_444_32 + && mode_lib->vba.WritebackLumaVTaps[k] + > (mode_lib->vba.WritebackLineBufferLumaBufferSize + + mode_lib->vba.WritebackLineBufferChromaBufferSize) + / 3.0 + / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackLumaVExtra) + || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_8 + && mode_lib->vba.WritebackLumaVTaps[k] + > mode_lib->vba.WritebackLineBufferLumaBufferSize + * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackLumaVExtra) + || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10 + && mode_lib->vba.WritebackLumaVTaps[k] + > mode_lib->vba.WritebackLineBufferLumaBufferSize + * 8.0 / 10.0 + / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackLumaVExtra)) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + if (2.0 * mode_lib->vba.WritebackVRatio[k] < 1) { + mode_lib->vba.WritebackChromaVExtra = 0.0; + } else { + mode_lib->vba.WritebackChromaVExtra = -1; + } + if ((mode_lib->vba.WritebackPixelFormat[k] == dm_420_8 + && mode_lib->vba.WritebackChromaVTaps[k] + > mode_lib->vba.WritebackLineBufferChromaBufferSize + * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackChromaVExtra) + || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10 + && mode_lib->vba.WritebackChromaVTaps[k] + > mode_lib->vba.WritebackLineBufferChromaBufferSize + * 8.0 / 10.0 + / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackChromaVExtra)) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + } + } + /*Maximum DISPCLK/DPPCLK Support check*/ + + mode_lib->vba.WritebackRequiredDISPCLK = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) { + mode_lib->vba.WritebackRequiredDISPCLK = + dml_max( + mode_lib->vba.WritebackRequiredDISPCLK, + CalculateWriteBackDISPCLK( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.PixelClock[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackLumaHTaps[k], + mode_lib->vba.WritebackLumaVTaps[k], + mode_lib->vba.WritebackChromaHTaps[k], + mode_lib->vba.WritebackChromaVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k], + mode_lib->vba.HTotal[k], + mode_lib->vba.WritebackChromaLineBufferWidth)); + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.HRatio[k] > 1.0) { + locals->PSCL_FACTOR[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput + * mode_lib->vba.HRatio[k] + / dml_ceil( + mode_lib->vba.htaps[k] + / 6.0, + 1.0)); + } else { + locals->PSCL_FACTOR[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput); + } + if (locals->BytePerPixelInDETC[k] == 0.0) { + locals->PSCL_FACTOR_CHROMA[k] = 0.0; + locals->MinDPPCLKUsingSingleDPP[k] = + mode_lib->vba.PixelClock[k] + * dml_max3( + mode_lib->vba.vtaps[k] / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k]), + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / locals->PSCL_FACTOR[k], + 1.0); + if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0) + && locals->MinDPPCLKUsingSingleDPP[k] + < 2.0 * mode_lib->vba.PixelClock[k]) { + locals->MinDPPCLKUsingSingleDPP[k] = 2.0 + * mode_lib->vba.PixelClock[k]; + } + } else { + if (mode_lib->vba.HRatio[k] / 2.0 > 1.0) { + locals->PSCL_FACTOR_CHROMA[k] = + dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput + * mode_lib->vba.HRatio[k] + / 2.0 + / dml_ceil( + mode_lib->vba.HTAPsChroma[k] + / 6.0, + 1.0)); + } else { + locals->PSCL_FACTOR_CHROMA[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput); + } + locals->MinDPPCLKUsingSingleDPP[k] = + mode_lib->vba.PixelClock[k] + * dml_max5( + mode_lib->vba.vtaps[k] / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k]), + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / locals->PSCL_FACTOR[k], + mode_lib->vba.VTAPsChroma[k] + / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k] + / 2.0), + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / 4.0 + / locals->PSCL_FACTOR_CHROMA[k], + 1.0); + if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0 + || mode_lib->vba.HTAPsChroma[k] > 6.0 + || mode_lib->vba.VTAPsChroma[k] > 6.0) + && locals->MinDPPCLKUsingSingleDPP[k] + < 2.0 * mode_lib->vba.PixelClock[k]) { + locals->MinDPPCLKUsingSingleDPP[k] = 2.0 + * mode_lib->vba.PixelClock[k]; + } + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + Calculate256BBlockSizes( + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(locals->BytePerPixelInDETY[k], 1.0), + dml_ceil(locals->BytePerPixelInDETC[k], 2.0), + &locals->Read256BlockHeightY[k], + &locals->Read256BlockHeightC[k], + &locals->Read256BlockWidthY[k], + &locals->Read256BlockWidthC[k]); + if (mode_lib->vba.SourceScan[k] == dm_horz) { + locals->MaxSwathHeightY[k] = locals->Read256BlockHeightY[k]; + locals->MaxSwathHeightC[k] = locals->Read256BlockHeightC[k]; + } else { + locals->MaxSwathHeightY[k] = locals->Read256BlockWidthY[k]; + locals->MaxSwathHeightC[k] = locals->Read256BlockWidthC[k]; + } + if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8)) { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear + || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + && (mode_lib->vba.SurfaceTiling[k] + == dm_sw_4kb_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_4kb_s_x + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s_t + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s_x + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_var_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_var_s_x) + && mode_lib->vba.SourceScan[k] == dm_horz)) { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; + } else { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k] + / 2.0; + } + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; + } else { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 + && mode_lib->vba.SourceScan[k] == dm_horz) { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k] + / 2.0; + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10 + && mode_lib->vba.SourceScan[k] == dm_horz) { + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k] + / 2.0; + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; + } else { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; + } + } + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + mode_lib->vba.MaximumSwathWidthSupport = 8192.0; + } else { + mode_lib->vba.MaximumSwathWidthSupport = 5120.0; + } + mode_lib->vba.MaximumSwathWidthInDETBuffer = + dml_min( + mode_lib->vba.MaximumSwathWidthSupport, + mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0 + / (locals->BytePerPixelInDETY[k] + * locals->MinSwathHeightY[k] + + locals->BytePerPixelInDETC[k] + / 2.0 + * locals->MinSwathHeightC[k])); + if (locals->BytePerPixelInDETC[k] == 0.0) { + mode_lib->vba.MaximumSwathWidthInLineBuffer = + mode_lib->vba.LineBufferSize + * dml_max(mode_lib->vba.HRatio[k], 1.0) + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.vtaps[k] + + dml_max( + dml_ceil( + mode_lib->vba.VRatio[k], + 1.0) + - 2, + 0.0)); + } else { + mode_lib->vba.MaximumSwathWidthInLineBuffer = + dml_min( + mode_lib->vba.LineBufferSize + * dml_max( + mode_lib->vba.HRatio[k], + 1.0) + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.vtaps[k] + + dml_max( + dml_ceil( + mode_lib->vba.VRatio[k], + 1.0) + - 2, + 0.0)), + 2.0 * mode_lib->vba.LineBufferSize + * dml_max( + mode_lib->vba.HRatio[k] + / 2.0, + 1.0) + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.VTAPsChroma[k] + + dml_max( + dml_ceil( + mode_lib->vba.VRatio[k] + / 2.0, + 1.0) + - 2, + 0.0))); + } + locals->MaximumSwathWidth[k] = dml_min( + mode_lib->vba.MaximumSwathWidthInDETBuffer, + mode_lib->vba.MaximumSwathWidthInLineBuffer); + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (j = 0; j < 2; j++) { + mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown( + mode_lib->vba.MaxDispclk[i], + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown( + mode_lib->vba.MaxDppclk[i], + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + locals->RequiredDISPCLK[i][j] = 0.0; + locals->DISPCLK_DPPCLK_Support[i][j] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = + mode_lib->vba.PixelClock[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + * (1.0 + mode_lib->vba.DISPCLKRampingMargin / 100.0); + if (mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine >= mode_lib->vba.MaxDispclk[i] + && i == mode_lib->vba.soc.num_states) + mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = mode_lib->vba.PixelClock[k] + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + + mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + mode_lib->vba.DISPCLKRampingMargin / 100.0); + if (mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine >= mode_lib->vba.MaxDispclk[i] + && i == mode_lib->vba.soc.num_states) + mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + if (mode_lib->vba.ODMCapability == false || mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine <= mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) { + locals->ODMCombineEnablePerState[i][k] = false; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine; + } else { + locals->ODMCombineEnablePerState[i][k] = true; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; + } + if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity + && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k] + && locals->ODMCombineEnablePerState[i][k] == false) { + locals->NoOfDPP[i][j][k] = 1; + locals->RequiredDPPCLK[i][j][k] = + locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + } else { + locals->NoOfDPP[i][j][k] = 2; + locals->RequiredDPPCLK[i][j][k] = + locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; + } + locals->RequiredDISPCLK[i][j] = dml_max( + locals->RequiredDISPCLK[i][j], + mode_lib->vba.PlaneRequiredDISPCLK); + if ((locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity) + || (mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)) { + locals->DISPCLK_DPPCLK_Support[i][j] = false; + } + } + locals->TotalNumberOfActiveDPP[i][j] = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) + locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; + if (j == 1) { + while (locals->TotalNumberOfActiveDPP[i][j] < mode_lib->vba.MaxNumDPP + && locals->TotalNumberOfActiveDPP[i][j] < 2 * mode_lib->vba.NumberOfActivePlanes) { + double BWOfNonSplitPlaneOfMaximumBandwidth; + unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth; + + BWOfNonSplitPlaneOfMaximumBandwidth = 0; + NumberOfNonSplitPlaneOfMaximumBandwidth = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (locals->ReadBandwidth[k] > BWOfNonSplitPlaneOfMaximumBandwidth && locals->NoOfDPP[i][j][k] == 1) { + BWOfNonSplitPlaneOfMaximumBandwidth = locals->ReadBandwidth[k]; + NumberOfNonSplitPlaneOfMaximumBandwidth = k; + } + } + locals->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2; + locals->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = + locals->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth] + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2; + locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + 1; + } + } + if (locals->TotalNumberOfActiveDPP[i][j] > mode_lib->vba.MaxNumDPP) { + locals->RequiredDISPCLK[i][j] = 0.0; + locals->DISPCLK_DPPCLK_Support[i][j] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->ODMCombineEnablePerState[i][k] = false; + if (locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]) { + locals->NoOfDPP[i][j][k] = 1; + locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + } else { + locals->NoOfDPP[i][j][k] = 2; + locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; + } + if (i != mode_lib->vba.soc.num_states) { + mode_lib->vba.PlaneRequiredDISPCLK = + mode_lib->vba.PixelClock[k] + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + * (1.0 + mode_lib->vba.DISPCLKRampingMargin / 100.0); + } else { + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PixelClock[k] + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + } + locals->RequiredDISPCLK[i][j] = dml_max( + locals->RequiredDISPCLK[i][j], + mode_lib->vba.PlaneRequiredDISPCLK); + if (locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity + || mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) + locals->DISPCLK_DPPCLK_Support[i][j] = false; + } + locals->TotalNumberOfActiveDPP[i][j] = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) + locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; + } + locals->RequiredDISPCLK[i][j] = dml_max( + locals->RequiredDISPCLK[i][j], + mode_lib->vba.WritebackRequiredDISPCLK); + if (mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity + < mode_lib->vba.WritebackRequiredDISPCLK) { + locals->DISPCLK_DPPCLK_Support[i][j] = false; + } + } + } + /*Viewport Size Check*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->ViewportSizeSupport[i] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->ODMCombineEnablePerState[i][k] == true) { + if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])) + > locals->MaximumSwathWidth[k]) { + locals->ViewportSizeSupport[i] = false; + } + } else { + if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) { + locals->ViewportSizeSupport[i] = false; + } + } + } + } + /*Total Available Pipes Support Check*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (j = 0; j < 2; j++) { + if (locals->TotalNumberOfActiveDPP[i][j] <= mode_lib->vba.MaxNumDPP) + locals->TotalAvailablePipesSupport[i][j] = true; + else + locals->TotalAvailablePipesSupport[i][j] = false; + } + } + /*Total Available OTG Support Check*/ + + mode_lib->vba.TotalNumberOfActiveOTG = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + + 1.0; + } + } + if (mode_lib->vba.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG) { + mode_lib->vba.NumberOfOTGSupport = true; + } else { + mode_lib->vba.NumberOfOTGSupport = false; + } + /*Display IO and DSC Support Check*/ + + mode_lib->vba.NonsupportedDSCInputBPC = false; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (!(mode_lib->vba.DSCInputBitPerComponent[k] == 12.0 + || mode_lib->vba.DSCInputBitPerComponent[k] == 10.0 + || mode_lib->vba.DSCInputBitPerComponent[k] == 8.0)) { + mode_lib->vba.NonsupportedDSCInputBPC = true; + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->RequiresDSC[i][k] = 0; + locals->RequiresFEC[i][k] = 0; + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if (mode_lib->vba.Output[k] == dm_hdmi) { + locals->RequiresDSC[i][k] = 0; + locals->RequiresFEC[i][k] = 0; + locals->OutputBppPerState[i][k] = TruncToValidBPP( + dml_min(600.0, mode_lib->vba.PHYCLKPerState[i]) / mode_lib->vba.PixelClockBackEnd[k] * 24, + false, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + } else if (mode_lib->vba.Output[k] == dm_dp + || mode_lib->vba.Output[k] == dm_edp) { + if (mode_lib->vba.Output[k] == dm_edp) { + mode_lib->vba.EffectiveFECOverhead = 0.0; + } else { + mode_lib->vba.EffectiveFECOverhead = + mode_lib->vba.FECOverhead; + } + if (mode_lib->vba.PHYCLKPerState[i] >= 270.0) { + mode_lib->vba.Outbpp = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * 270.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + false, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + mode_lib->vba.OutbppDSC = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 270.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + true, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + if (mode_lib->vba.DSCEnabled[k] == true) { + locals->RequiresDSC[i][k] = true; + if (mode_lib->vba.Output[k] == dm_dp) { + locals->RequiresFEC[i][k] = true; + } else { + locals->RequiresFEC[i][k] = false; + } + mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; + } else { + locals->RequiresDSC[i][k] = false; + locals->RequiresFEC[i][k] = false; + } + locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp; + } + if (mode_lib->vba.Outbpp == BPP_INVALID && mode_lib->vba.PHYCLKPerState[i] >= 540.0) { + mode_lib->vba.Outbpp = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * 540.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + false, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + mode_lib->vba.OutbppDSC = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 540.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + true, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + if (mode_lib->vba.DSCEnabled[k] == true) { + locals->RequiresDSC[i][k] = true; + if (mode_lib->vba.Output[k] == dm_dp) { + locals->RequiresFEC[i][k] = true; + } else { + locals->RequiresFEC[i][k] = false; + } + mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; + } else { + locals->RequiresDSC[i][k] = false; + locals->RequiresFEC[i][k] = false; + } + locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp; + } + if (mode_lib->vba.Outbpp == BPP_INVALID + && mode_lib->vba.PHYCLKPerState[i] + >= 810.0) { + mode_lib->vba.Outbpp = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * 810.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + false, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + mode_lib->vba.OutbppDSC = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 810.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + true, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + if (mode_lib->vba.DSCEnabled[k] == true || mode_lib->vba.Outbpp == BPP_INVALID) { + locals->RequiresDSC[i][k] = true; + if (mode_lib->vba.Output[k] == dm_dp) { + locals->RequiresFEC[i][k] = true; + } else { + locals->RequiresFEC[i][k] = false; + } + mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; + } else { + locals->RequiresDSC[i][k] = false; + locals->RequiresFEC[i][k] = false; + } + locals->OutputBppPerState[i][k] = + mode_lib->vba.Outbpp; + } + } + } else { + locals->OutputBppPerState[i][k] = BPP_BLENDED_PIPE; + } + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->DIOSupport[i] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->OutputBppPerState[i][k] == BPP_INVALID + || (mode_lib->vba.OutputFormat[k] == dm_420 + && mode_lib->vba.Interlace[k] == true + && mode_lib->vba.ProgressiveToInterlaceUnitInOPP == true)) { + locals->DIOSupport[i] = false; + } + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->DSCCLKRequiredMoreThanSupported[i] = false; + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if ((mode_lib->vba.Output[k] == dm_dp + || mode_lib->vba.Output[k] == dm_edp)) { + if (mode_lib->vba.OutputFormat[k] == dm_420 + || mode_lib->vba.OutputFormat[k] + == dm_n422) { + mode_lib->vba.DSCFormatFactor = 2; + } else { + mode_lib->vba.DSCFormatFactor = 1; + } + if (locals->RequiresDSC[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] + == true) { + if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor + > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { + locals->DSCCLKRequiredMoreThanSupported[i] = + true; + } + } else { + if (mode_lib->vba.PixelClockBackEnd[k] / 3.0 / mode_lib->vba.DSCFormatFactor + > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { + locals->DSCCLKRequiredMoreThanSupported[i] = + true; + } + } + } + } + } + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->NotEnoughDSCUnits[i] = false; + mode_lib->vba.TotalDSCUnitsRequired = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->RequiresDSC[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == true) { + mode_lib->vba.TotalDSCUnitsRequired = + mode_lib->vba.TotalDSCUnitsRequired + 2.0; + } else { + mode_lib->vba.TotalDSCUnitsRequired = + mode_lib->vba.TotalDSCUnitsRequired + 1.0; + } + } + } + if (mode_lib->vba.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) { + locals->NotEnoughDSCUnits[i] = true; + } + } + /*DSC Delay per state*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] != k) { + mode_lib->vba.slices = 0; + } else if (locals->RequiresDSC[i][k] == 0 + || locals->RequiresDSC[i][k] == false) { + mode_lib->vba.slices = 0; + } else if (mode_lib->vba.PixelClockBackEnd[k] > 3200.0) { + mode_lib->vba.slices = dml_ceil( + mode_lib->vba.PixelClockBackEnd[k] / 400.0, + 4.0); + } else if (mode_lib->vba.PixelClockBackEnd[k] > 1360.0) { + mode_lib->vba.slices = 8.0; + } else if (mode_lib->vba.PixelClockBackEnd[k] > 680.0) { + mode_lib->vba.slices = 4.0; + } else if (mode_lib->vba.PixelClockBackEnd[k] > 340.0) { + mode_lib->vba.slices = 2.0; + } else { + mode_lib->vba.slices = 1.0; + } + if (locals->OutputBppPerState[i][k] == BPP_BLENDED_PIPE + || locals->OutputBppPerState[i][k] == BPP_INVALID) { + mode_lib->vba.bpp = 0.0; + } else { + mode_lib->vba.bpp = locals->OutputBppPerState[i][k]; + } + if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) { + if (locals->ODMCombineEnablePerState[i][k] == false) { + locals->DSCDelayPerState[i][k] = + dscceComputeDelay( + mode_lib->vba.DSCInputBitPerComponent[k], + mode_lib->vba.bpp, + dml_ceil( + mode_lib->vba.HActive[k] + / mode_lib->vba.slices, + 1.0), + mode_lib->vba.slices, + mode_lib->vba.OutputFormat[k]) + + dscComputeDelay( + mode_lib->vba.OutputFormat[k]); + } else { + locals->DSCDelayPerState[i][k] = + 2.0 * (dscceComputeDelay( + mode_lib->vba.DSCInputBitPerComponent[k], + mode_lib->vba.bpp, + dml_ceil(mode_lib->vba.HActive[k] / mode_lib->vba.slices, 1.0), + mode_lib->vba.slices / 2, + mode_lib->vba.OutputFormat[k]) + + dscComputeDelay(mode_lib->vba.OutputFormat[k])); + } + locals->DSCDelayPerState[i][k] = + locals->DSCDelayPerState[i][k] * mode_lib->vba.PixelClock[k] / mode_lib->vba.PixelClockBackEnd[k]; + } else { + locals->DSCDelayPerState[i][k] = 0.0; + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { + for (j = 0; j <= mode_lib->vba.NumberOfActivePlanes - 1; j++) { + if (mode_lib->vba.BlendingAndTiming[k] == m && locals->RequiresDSC[i][m] == true) + locals->DSCDelayPerState[i][k] = locals->DSCDelayPerState[i][m]; + } + } + } + } + + //Prefetch Check + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (j = 0; j < 2; j++) { + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->ODMCombineEnablePerState[i][k] == true) + locals->SwathWidthYPerState[i][j][k] = dml_min(locals->SwathWidthYSingleDPP[k], dml_round(locals->HActive[k] / 2 * locals->HRatio[k])); + else + locals->SwathWidthYPerState[i][j][k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k]; + locals->SwathWidthGranularityY = 256 / dml_ceil(locals->BytePerPixelInDETY[k], 1) / locals->MaxSwathHeightY[k]; + locals->RoundedUpMaxSwathSizeBytesY = (dml_ceil(locals->SwathWidthYPerState[i][j][k] - 1, locals->SwathWidthGranularityY) + + locals->SwathWidthGranularityY) * locals->BytePerPixelInDETY[k] * locals->MaxSwathHeightY[k]; + if (locals->SourcePixelFormat[k] == dm_420_10) { + locals->RoundedUpMaxSwathSizeBytesY = dml_ceil(locals->RoundedUpMaxSwathSizeBytesY, 256) + 256; + } + if (locals->MaxSwathHeightC[k] > 0) { + locals->SwathWidthGranularityC = 256 / dml_ceil(locals->BytePerPixelInDETC[k], 2) / locals->MaxSwathHeightC[k]; + + locals->RoundedUpMaxSwathSizeBytesC = (dml_ceil(locals->SwathWidthYPerState[i][j][k] / 2 - 1, locals->SwathWidthGranularityC) + + locals->SwathWidthGranularityC) * locals->BytePerPixelInDETC[k] * locals->MaxSwathHeightC[k]; + } + if (locals->SourcePixelFormat[k] == dm_420_10) { + locals->RoundedUpMaxSwathSizeBytesC = dml_ceil(locals->RoundedUpMaxSwathSizeBytesC, 256) + 256; + } else { + locals->RoundedUpMaxSwathSizeBytesC = 0; + } + + if (locals->RoundedUpMaxSwathSizeBytesY + locals->RoundedUpMaxSwathSizeBytesC <= locals->DETBufferSizeInKByte * 1024 / 2) { + locals->SwathHeightYPerState[i][j][k] = locals->MaxSwathHeightY[k]; + locals->SwathHeightCPerState[i][j][k] = locals->MaxSwathHeightC[k]; + } else { + locals->SwathHeightYPerState[i][j][k] = locals->MinSwathHeightY[k]; + locals->SwathHeightCPerState[i][j][k] = locals->MinSwathHeightC[k]; + } + + if (locals->BytePerPixelInDETC[k] == 0) { + locals->LinesInDETLuma = locals->DETBufferSizeInKByte * 1024 / locals->BytePerPixelInDETY[k] / locals->SwathWidthYPerState[i][j][k]; + locals->LinesInDETChroma = 0; + } else if (locals->SwathHeightYPerState[i][j][k] <= locals->SwathHeightCPerState[i][j][k]) { + locals->LinesInDETLuma = locals->DETBufferSizeInKByte * 1024 / 2 / locals->BytePerPixelInDETY[k] / + locals->SwathWidthYPerState[i][j][k]; + locals->LinesInDETChroma = locals->DETBufferSizeInKByte * 1024 / 2 / locals->BytePerPixelInDETC[k] / (locals->SwathWidthYPerState[i][j][k] / 2); + } else { + locals->LinesInDETLuma = locals->DETBufferSizeInKByte * 1024 * 2 / 3 / locals->BytePerPixelInDETY[k] / locals->SwathWidthYPerState[i][j][k]; + locals->LinesInDETChroma = locals->DETBufferSizeInKByte * 1024 / 3 / locals->BytePerPixelInDETY[k] / (locals->SwathWidthYPerState[i][j][k] / 2); + } + + locals->EffectiveLBLatencyHidingSourceLinesLuma = dml_min(locals->MaxLineBufferLines, + dml_floor(locals->LineBufferSize / locals->LBBitPerPixel[k] / (locals->SwathWidthYPerState[i][j][k] + / dml_max(locals->HRatio[k], 1)), 1)) - (locals->vtaps[k] - 1); + + locals->EffectiveLBLatencyHidingSourceLinesChroma = dml_min(locals->MaxLineBufferLines, + dml_floor(locals->LineBufferSize / locals->LBBitPerPixel[k] + / (locals->SwathWidthYPerState[i][j][k] / 2 + / dml_max(locals->HRatio[k] / 2, 1)), 1)) - (locals->VTAPsChroma[k] - 1); + + locals->EffectiveDETLBLinesLuma = dml_floor(locals->LinesInDETLuma + dml_min( + locals->LinesInDETLuma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETY[k] * + locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i], + locals->EffectiveLBLatencyHidingSourceLinesLuma), + locals->SwathHeightYPerState[i][j][k]); + + locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min( + locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] * + locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i], + locals->EffectiveLBLatencyHidingSourceLinesChroma), + locals->SwathHeightCPerState[i][j][k]); + + if (locals->BytePerPixelInDETC[k] == 0) { + locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) + / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * + dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]); + } else { + locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min( + locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) + / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * + dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]), + locals->EffectiveDETLBLinesChroma * (locals->HTotal[k] / locals->PixelClock[k]) / (locals->VRatio[k] / 2) - + locals->EffectiveDETLBLinesChroma * locals->SwathWidthYPerState[i][j][k] / 2 * + dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k])); + } + } + } + } + + for (i = 0; i <= locals->soc.num_states; i++) { + for (j = 0; j < 2; j++) { + locals->UrgentLatencySupport[i][j] = true; + for (k = 0; k < locals->NumberOfActivePlanes; k++) { + if (locals->UrgentLatencySupportUsPerState[i][j][k] < locals->UrgentLatency) + locals->UrgentLatencySupport[i][j] = false; + } + } + } + + + /*Prefetch Check*/ + for (i = 0; i <= locals->soc.num_states; i++) { + for (j = 0; j < 2; j++) { + locals->TotalNumberOfDCCActiveDPP[i][j] = 0; + for (k = 0; k < locals->NumberOfActivePlanes; k++) { + if (locals->DCCEnable[k] == true) { + locals->TotalNumberOfDCCActiveDPP[i][j] = + locals->TotalNumberOfDCCActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; + } + } + } + } + + CalculateMinAndMaxPrefetchMode(locals->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &locals->MinPrefetchMode, &locals->MaxPrefetchMode); + + locals->MaxTotalVActiveRDBandwidth = 0; + for (k = 0; k < locals->NumberOfActivePlanes; k++) { + locals->MaxTotalVActiveRDBandwidth = locals->MaxTotalVActiveRDBandwidth + locals->ReadBandwidth[k]; + } + + for (i = 0; i <= locals->soc.num_states; i++) { + for (j = 0; j < 2; j++) { + for (k = 0; k < locals->NumberOfActivePlanes; k++) { + locals->NoOfDPPThisState[k] = locals->NoOfDPP[i][j][k]; + locals->RequiredDPPCLKThisState[k] = locals->RequiredDPPCLK[i][j][k]; + locals->SwathHeightYThisState[k] = locals->SwathHeightYPerState[i][j][k]; + locals->SwathHeightCThisState[k] = locals->SwathHeightCPerState[i][j][k]; + locals->SwathWidthYThisState[k] = locals->SwathWidthYPerState[i][j][k]; + mode_lib->vba.ProjectedDCFCLKDeepSleep = dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.PixelClock[k] / 16.0); + if (mode_lib->vba.BytePerPixelInDETC[k] == 0.0) { + if (mode_lib->vba.VRatio[k] <= 1.0) { + mode_lib->vba.ProjectedDCFCLKDeepSleep = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep, + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETY[k], + 1.0) + / 64.0 + * mode_lib->vba.HRatio[k] + * mode_lib->vba.PixelClock[k] + / mode_lib->vba.NoOfDPP[i][j][k]); + } else { + mode_lib->vba.ProjectedDCFCLKDeepSleep = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep, + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETY[k], + 1.0) + / 64.0 + * mode_lib->vba.PSCL_FACTOR[k] + * mode_lib->vba.RequiredDPPCLK[i][j][k]); + } + } else { + if (mode_lib->vba.VRatio[k] <= 1.0) { + mode_lib->vba.ProjectedDCFCLKDeepSleep = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep, + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETY[k], + 1.0) + / 32.0 + * mode_lib->vba.HRatio[k] + * mode_lib->vba.PixelClock[k] + / mode_lib->vba.NoOfDPP[i][j][k]); + } else { + mode_lib->vba.ProjectedDCFCLKDeepSleep = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep, + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETY[k], + 1.0) + / 32.0 + * mode_lib->vba.PSCL_FACTOR[k] + * mode_lib->vba.RequiredDPPCLK[i][j][k]); + } + if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) { + mode_lib->vba.ProjectedDCFCLKDeepSleep = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep, + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETC[k], + 2.0) + / 32.0 + * mode_lib->vba.HRatio[k] + / 2.0 + * mode_lib->vba.PixelClock[k] + / mode_lib->vba.NoOfDPP[i][j][k]); + } else { + mode_lib->vba.ProjectedDCFCLKDeepSleep = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep, + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETC[k], + 2.0) + / 32.0 + * mode_lib->vba.PSCL_FACTOR_CHROMA[k] + * mode_lib->vba.RequiredDPPCLK[i][j][k]); + } + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( + mode_lib, + mode_lib->vba.DCCEnable[k], + mode_lib->vba.Read256BlockHeightY[k], + mode_lib->vba.Read256BlockWidthY[k], + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(mode_lib->vba.BytePerPixelInDETY[k], 1.0), + mode_lib->vba.SourceScan[k], + mode_lib->vba.ViewportWidth[k], + mode_lib->vba.ViewportHeight[k], + mode_lib->vba.SwathWidthYPerState[i][j][k], + mode_lib->vba.GPUVMEnable, + mode_lib->vba.VMMPageSize, + mode_lib->vba.PTEBufferSizeInRequestsLuma, + mode_lib->vba.PDEProcessingBufIn64KBReqs, + mode_lib->vba.PitchY[k], + mode_lib->vba.DCCMetaPitchY[k], + &mode_lib->vba.MacroTileWidthY[k], + &mode_lib->vba.MetaRowBytesY, + &mode_lib->vba.DPTEBytesPerRowY, + &mode_lib->vba.PTEBufferSizeNotExceededY[i][j][k], + &mode_lib->vba.dpte_row_height[k], + &mode_lib->vba.meta_row_height[k]); + mode_lib->vba.PrefetchLinesY[k] = CalculatePrefetchSourceLines( + mode_lib, + mode_lib->vba.VRatio[k], + mode_lib->vba.vtaps[k], + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.SwathHeightYPerState[i][j][k], + mode_lib->vba.ViewportYStartY[k], + &mode_lib->vba.PrefillY[k], + &mode_lib->vba.MaxNumSwY[k]); + if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8)) { + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( + mode_lib, + mode_lib->vba.DCCEnable[k], + mode_lib->vba.Read256BlockHeightY[k], + mode_lib->vba.Read256BlockWidthY[k], + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(mode_lib->vba.BytePerPixelInDETC[k], 2.0), + mode_lib->vba.SourceScan[k], + mode_lib->vba.ViewportWidth[k] / 2.0, + mode_lib->vba.ViewportHeight[k] / 2.0, + mode_lib->vba.SwathWidthYPerState[i][j][k] / 2.0, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.VMMPageSize, + mode_lib->vba.PTEBufferSizeInRequestsLuma, + mode_lib->vba.PDEProcessingBufIn64KBReqs, + mode_lib->vba.PitchC[k], + 0.0, + &mode_lib->vba.MacroTileWidthC[k], + &mode_lib->vba.MetaRowBytesC, + &mode_lib->vba.DPTEBytesPerRowC, + &mode_lib->vba.PTEBufferSizeNotExceededC[i][j][k], + &mode_lib->vba.dpte_row_height_chroma[k], + &mode_lib->vba.meta_row_height_chroma[k]); + mode_lib->vba.PrefetchLinesC[k] = CalculatePrefetchSourceLines( + mode_lib, + mode_lib->vba.VRatio[k] / 2.0, + mode_lib->vba.VTAPsChroma[k], + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.SwathHeightCPerState[i][j][k], + mode_lib->vba.ViewportYStartC[k], + &mode_lib->vba.PrefillC[k], + &mode_lib->vba.MaxNumSwC[k]); + } else { + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0; + mode_lib->vba.MetaRowBytesC = 0.0; + mode_lib->vba.DPTEBytesPerRowC = 0.0; + locals->PrefetchLinesC[k] = 0.0; + locals->PTEBufferSizeNotExceededC[i][j][k] = true; + locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma; + } + locals->PDEAndMetaPTEBytesPerFrame[k] = + mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC; + locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; + locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; + + CalculateActiveRowBandwidth( + mode_lib->vba.GPUVMEnable, + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.VRatio[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + mode_lib->vba.MetaRowBytesY, + mode_lib->vba.MetaRowBytesC, + mode_lib->vba.meta_row_height[k], + mode_lib->vba.meta_row_height_chroma[k], + mode_lib->vba.DPTEBytesPerRowY, + mode_lib->vba.DPTEBytesPerRowC, + mode_lib->vba.dpte_row_height[k], + mode_lib->vba.dpte_row_height_chroma[k], + &mode_lib->vba.meta_row_bw[k], + &mode_lib->vba.dpte_row_bw[k], + &mode_lib->vba.qual_row_bw[k]); + } + mode_lib->vba.ExtraLatency = + mode_lib->vba.UrgentRoundTripAndOutOfOrderLatencyPerState[i] + + (mode_lib->vba.TotalNumberOfActiveDPP[i][j] + * mode_lib->vba.PixelChunkSizeInKByte + + mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j] + * mode_lib->vba.MetaChunkSize) + * 1024.0 + / mode_lib->vba.ReturnBWPerState[i]; + if (mode_lib->vba.GPUVMEnable == true) { + mode_lib->vba.ExtraLatency = mode_lib->vba.ExtraLatency + + mode_lib->vba.TotalNumberOfActiveDPP[i][j] + * mode_lib->vba.PTEGroupSize + / mode_lib->vba.ReturnBWPerState[i]; + } + mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep; + + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if (mode_lib->vba.WritebackEnable[k] == true) { + locals->WritebackDelay[i][k] = mode_lib->vba.WritebackLatency + + CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackLumaHTaps[k], + mode_lib->vba.WritebackLumaVTaps[k], + mode_lib->vba.WritebackChromaHTaps[k], + mode_lib->vba.WritebackChromaVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k]) / locals->RequiredDISPCLK[i][j]; + } else { + locals->WritebackDelay[i][k] = 0.0; + } + for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { + if (mode_lib->vba.BlendingAndTiming[m] == k + && mode_lib->vba.WritebackEnable[m] + == true) { + locals->WritebackDelay[i][k] = dml_max(locals->WritebackDelay[i][k], + mode_lib->vba.WritebackLatency + CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[m], + mode_lib->vba.WritebackHRatio[m], + mode_lib->vba.WritebackVRatio[m], + mode_lib->vba.WritebackLumaHTaps[m], + mode_lib->vba.WritebackLumaVTaps[m], + mode_lib->vba.WritebackChromaHTaps[m], + mode_lib->vba.WritebackChromaVTaps[m], + mode_lib->vba.WritebackDestinationWidth[m]) / locals->RequiredDISPCLK[i][j]); + } + } + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { + if (mode_lib->vba.BlendingAndTiming[k] == m) { + locals->WritebackDelay[i][k] = locals->WritebackDelay[i][m]; + } + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + for (m = 0; m < locals->NumberOfCursors[k]; m++) + locals->cursor_bw[k] = locals->NumberOfCursors[k] * locals->CursorWidth[k][m] * locals->CursorBPP[k][m] + / 8 / (locals->HTotal[k] / locals->PixelClock[k]) * locals->VRatio[k]; + } + + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] + - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0)); + } + + mode_lib->vba.NextPrefetchMode = mode_lib->vba.MinPrefetchMode; + do { + mode_lib->vba.PrefetchMode[i][j] = mode_lib->vba.NextPrefetchMode; + mode_lib->vba.NextPrefetchMode = mode_lib->vba.NextPrefetchMode + 1; + + mode_lib->vba.TWait = CalculateTWait( + mode_lib->vba.PrefetchMode[i][j], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.UrgentLatency, + mode_lib->vba.SREnterPlusExitTime); + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + + if (mode_lib->vba.XFCEnabled[k] == true) { + mode_lib->vba.XFCRemoteSurfaceFlipDelay = + CalculateRemoteSurfaceFlipDelay( + mode_lib, + mode_lib->vba.VRatio[k], + locals->SwathWidthYPerState[i][j][k], + dml_ceil(locals->BytePerPixelInDETY[k], 1.0), + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + mode_lib->vba.XFCTSlvVupdateOffset, + mode_lib->vba.XFCTSlvVupdateWidth, + mode_lib->vba.XFCTSlvVreadyOffset, + mode_lib->vba.XFCXBUFLatencyTolerance, + mode_lib->vba.XFCFillBWOverhead, + mode_lib->vba.XFCSlvChunkSize, + mode_lib->vba.XFCBusTransportTime, + mode_lib->vba.TimeCalc, + mode_lib->vba.TWait, + &mode_lib->vba.SrcActiveDrainRate, + &mode_lib->vba.TInitXFill, + &mode_lib->vba.TslvChk); + } else { + mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0; + } + + CalculateDelayAfterScaler(mode_lib, mode_lib->vba.ReturnBWPerState[i], mode_lib->vba.ReadBandwidthLuma[k], mode_lib->vba.ReadBandwidthChroma[k], mode_lib->vba.MaxTotalVActiveRDBandwidth, + mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k], + mode_lib->vba.RequiredDPPCLK[i][j][k], mode_lib->vba.RequiredDISPCLK[i][j], mode_lib->vba.PixelClock[k], mode_lib->vba.DSCDelayPerState[i][k], mode_lib->vba.NoOfDPP[i][j][k], mode_lib->vba.ScalerEnabled[k], mode_lib->vba.NumberOfCursors[k], + mode_lib->vba.DPPCLKDelaySubtotal, mode_lib->vba.DPPCLKDelaySCL, mode_lib->vba.DPPCLKDelaySCLLBOnly, mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelayCNVCCursor, mode_lib->vba.DISPCLKDelaySubtotal, + mode_lib->vba.SwathWidthYPerState[i][j][k] / mode_lib->vba.HRatio[k], mode_lib->vba.OutputFormat[k], mode_lib->vba.HTotal[k], + mode_lib->vba.SwathWidthYSingleDPP[k], mode_lib->vba.BytePerPixelInDETY[k], mode_lib->vba.BytePerPixelInDETC[k], mode_lib->vba.SwathHeightYThisState[k], mode_lib->vba.SwathHeightCThisState[k], mode_lib->vba.Interlace[k], mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + &mode_lib->vba.DSTXAfterScaler[k], &mode_lib->vba.DSTYAfterScaler[k]); + + mode_lib->vba.IsErrorResult[i][j][k] = + CalculatePrefetchSchedule( + mode_lib, + mode_lib->vba.RequiredDPPCLK[i][j][k], + mode_lib->vba.RequiredDISPCLK[i][j], + mode_lib->vba.PixelClock[k], + mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.NoOfDPP[i][j][k], + mode_lib->vba.NumberOfCursors[k], + mode_lib->vba.VTotal[k] + - mode_lib->vba.VActive[k], + mode_lib->vba.HTotal[k], + mode_lib->vba.MaxInterDCNTileRepeaters, + mode_lib->vba.MaximumVStartup[k], + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.DynamicMetadataEnable[k], + mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], + mode_lib->vba.DynamicMetadataTransmittedBytes[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.ExtraLatency, + mode_lib->vba.TimeCalc, + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k], + mode_lib->vba.MetaRowBytes[k], + mode_lib->vba.DPTEBytesPerRow[k], + mode_lib->vba.PrefetchLinesY[k], + mode_lib->vba.SwathWidthYPerState[i][j][k], + mode_lib->vba.BytePerPixelInDETY[k], + mode_lib->vba.PrefillY[k], + mode_lib->vba.MaxNumSwY[k], + mode_lib->vba.PrefetchLinesC[k], + mode_lib->vba.BytePerPixelInDETC[k], + mode_lib->vba.PrefillC[k], + mode_lib->vba.MaxNumSwC[k], + mode_lib->vba.SwathHeightYPerState[i][j][k], + mode_lib->vba.SwathHeightCPerState[i][j][k], + mode_lib->vba.TWait, + mode_lib->vba.XFCEnabled[k], + mode_lib->vba.XFCRemoteSurfaceFlipDelay, + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.DSTXAfterScaler[k], + mode_lib->vba.DSTYAfterScaler[k], + &mode_lib->vba.LineTimesForPrefetch[k], + &mode_lib->vba.PrefetchBW[k], + &mode_lib->vba.LinesForMetaPTE[k], + &mode_lib->vba.LinesForMetaAndDPTERow[k], + &mode_lib->vba.VRatioPreY[i][j][k], + &mode_lib->vba.VRatioPreC[i][j][k], + &mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k], + &mode_lib->vba.Tno_bw[k], + &mode_lib->vba.VUpdateOffsetPix[k], + &mode_lib->vba.VUpdateWidthPix[k], + &mode_lib->vba.VReadyOffsetPix[k]); + } + mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = 0.0; + mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0; + locals->prefetch_vm_bw_valid = true; + locals->prefetch_row_bw_valid = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->PDEAndMetaPTEBytesPerFrame[k] == 0) + locals->prefetch_vm_bw[k] = 0; + else if (locals->LinesForMetaPTE[k] > 0) + locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[k] + / (locals->LinesForMetaPTE[k] * locals->HTotal[k] / locals->PixelClock[k]); + else { + locals->prefetch_vm_bw[k] = 0; + locals->prefetch_vm_bw_valid = false; + } + if (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k] == 0) + locals->prefetch_row_bw[k] = 0; + else if (locals->LinesForMetaAndDPTERow[k] > 0) + locals->prefetch_row_bw[k] = (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k]) + / (locals->LinesForMetaAndDPTERow[k] * locals->HTotal[k] / locals->PixelClock[k]); + else { + locals->prefetch_row_bw[k] = 0; + locals->prefetch_row_bw_valid = false; + } + + mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = mode_lib->vba.MaximumReadBandwidthWithPrefetch + + mode_lib->vba.cursor_bw[k] + mode_lib->vba.ReadBandwidth[k] + mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k]; + mode_lib->vba.MaximumReadBandwidthWithPrefetch = + mode_lib->vba.MaximumReadBandwidthWithPrefetch + + mode_lib->vba.cursor_bw[k] + + dml_max3( + mode_lib->vba.prefetch_vm_bw[k], + mode_lib->vba.prefetch_row_bw[k], + dml_max(mode_lib->vba.ReadBandwidth[k], + mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k]) + + mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k]); + } + locals->BandwidthWithoutPrefetchSupported[i] = true; + if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i]) { + locals->BandwidthWithoutPrefetchSupported[i] = false; + } + + locals->PrefetchSupported[i][j] = true; + if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i]) { + locals->PrefetchSupported[i][j] = false; + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->LineTimesForPrefetch[k] < 2.0 + || locals->LinesForMetaPTE[k] >= 8.0 + || locals->LinesForMetaAndDPTERow[k] >= 16.0 + || mode_lib->vba.IsErrorResult[i][j][k] == true) { + locals->PrefetchSupported[i][j] = false; + } + } + locals->VRatioInPrefetchSupported[i][j] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->VRatioPreY[i][j][k] > 4.0 + || locals->VRatioPreC[i][j][k] > 4.0 + || mode_lib->vba.IsErrorResult[i][j][k] == true) { + locals->VRatioInPrefetchSupported[i][j] = false; + } + } + } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true) + && mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode); + + if (mode_lib->vba.PrefetchSupported[i][j] == true + && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true) { + mode_lib->vba.BandwidthAvailableForImmediateFlip = + mode_lib->vba.ReturnBWPerState[i]; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.BandwidthAvailableForImmediateFlip = + mode_lib->vba.BandwidthAvailableForImmediateFlip + - mode_lib->vba.cursor_bw[k] + - dml_max( + mode_lib->vba.ReadBandwidth[k] + mode_lib->vba.qual_row_bw[k], + mode_lib->vba.PrefetchBW[k]); + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.ImmediateFlipBytes[k] = 0.0; + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { + mode_lib->vba.ImmediateFlipBytes[k] = + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k] + + mode_lib->vba.MetaRowBytes[k] + + mode_lib->vba.DPTEBytesPerRow[k]; + } + } + mode_lib->vba.TotImmediateFlipBytes = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { + mode_lib->vba.TotImmediateFlipBytes = + mode_lib->vba.TotImmediateFlipBytes + + mode_lib->vba.ImmediateFlipBytes[k]; + } + } + + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + CalculateFlipSchedule( + mode_lib, + mode_lib->vba.ExtraLatency, + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.BandwidthAvailableForImmediateFlip, + mode_lib->vba.TotImmediateFlipBytes, + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.ImmediateFlipBytes[k], + mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k], + mode_lib->vba.VRatio[k], + mode_lib->vba.Tno_bw[k], + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k], + mode_lib->vba.MetaRowBytes[k], + mode_lib->vba.DPTEBytesPerRow[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.dpte_row_height[k], + mode_lib->vba.meta_row_height[k], + mode_lib->vba.qual_row_bw[k], + &mode_lib->vba.DestinationLinesToRequestVMInImmediateFlip[k], + &mode_lib->vba.DestinationLinesToRequestRowInImmediateFlip[k], + &mode_lib->vba.final_flip_bw[k], + &mode_lib->vba.ImmediateFlipSupportedForPipe[k]); + } + mode_lib->vba.total_dcn_read_bw_with_flip = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.total_dcn_read_bw_with_flip = + mode_lib->vba.total_dcn_read_bw_with_flip + + mode_lib->vba.cursor_bw[k] + + dml_max3( + mode_lib->vba.prefetch_vm_bw[k], + mode_lib->vba.prefetch_row_bw[k], + mode_lib->vba.final_flip_bw[k] + + dml_max( + mode_lib->vba.ReadBandwidth[k], + mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k])); + } + mode_lib->vba.ImmediateFlipSupportedForState[i][j] = true; + if (mode_lib->vba.total_dcn_read_bw_with_flip + > mode_lib->vba.ReturnBWPerState[i]) { + mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.ImmediateFlipSupportedForPipe[k] == false) { + mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; + } + } + } else { + mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; + } + } + } + + /*Vertical Active BW support*/ + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min(mode_lib->vba.ReturnBusWidth * + mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.FabricAndDRAMBandwidthPerState[i] * 1000) * + mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100; + if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i]) + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = true; + else + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = false; + } + + /*PTE Buffer Size Check*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (j = 0; j < 2; j++) { + locals->PTEBufferSizeNotExceeded[i][j] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->PTEBufferSizeNotExceededY[i][j][k] == false + || locals->PTEBufferSizeNotExceededC[i][j][k] == false) { + locals->PTEBufferSizeNotExceeded[i][j] = false; + } + } + } + } + /*Cursor Support Check*/ + mode_lib->vba.CursorSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + for (j = 0; j < 2; j++) { + if (mode_lib->vba.CursorWidth[k][j] > 0.0) { + if (dml_floor( + dml_floor( + mode_lib->vba.CursorBufferSize + - mode_lib->vba.CursorChunkSize, + mode_lib->vba.CursorChunkSize) * 1024.0 + / (mode_lib->vba.CursorWidth[k][j] + * mode_lib->vba.CursorBPP[k][j] + / 8.0), + 1.0) + * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + / mode_lib->vba.VRatio[k] < mode_lib->vba.UrgentLatencyPixelDataOnly + || (mode_lib->vba.CursorBPP[k][j] == 64.0 + && mode_lib->vba.Cursor64BppSupport == false)) { + mode_lib->vba.CursorSupport = false; + } + } + } + } + /*Valid Pitch Check*/ + + mode_lib->vba.PitchSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->AlignedYPitch[k] = dml_ceil( + dml_max(mode_lib->vba.PitchY[k], mode_lib->vba.ViewportWidth[k]), + locals->MacroTileWidthY[k]); + if (locals->AlignedYPitch[k] > mode_lib->vba.PitchY[k]) { + mode_lib->vba.PitchSupport = false; + } + if (mode_lib->vba.DCCEnable[k] == true) { + locals->AlignedDCCMetaPitch[k] = dml_ceil( + dml_max( + mode_lib->vba.DCCMetaPitchY[k], + mode_lib->vba.ViewportWidth[k]), + 64.0 * locals->Read256BlockWidthY[k]); + } else { + locals->AlignedDCCMetaPitch[k] = mode_lib->vba.DCCMetaPitchY[k]; + } + if (locals->AlignedDCCMetaPitch[k] > mode_lib->vba.DCCMetaPitchY[k]) { + mode_lib->vba.PitchSupport = false; + } + if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) { + locals->AlignedCPitch[k] = dml_ceil( + dml_max( + mode_lib->vba.PitchC[k], + mode_lib->vba.ViewportWidth[k] / 2.0), + locals->MacroTileWidthC[k]); + } else { + locals->AlignedCPitch[k] = mode_lib->vba.PitchC[k]; + } + if (locals->AlignedCPitch[k] > mode_lib->vba.PitchC[k]) { + mode_lib->vba.PitchSupport = false; + } + } + /*Mode Support, Voltage State and SOC Configuration*/ + + for (i = mode_lib->vba.soc.num_states; i >= 0; i--) { + for (j = 0; j < 2; j++) { + enum dm_validation_status status = DML_VALIDATION_OK; + + if (mode_lib->vba.ScaleRatioAndTapsSupport != true) { + status = DML_FAIL_SCALE_RATIO_TAP; + } else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) { + status = DML_FAIL_SOURCE_PIXEL_FORMAT; + } else if (locals->ViewportSizeSupport[i] != true) { + status = DML_FAIL_VIEWPORT_SIZE; + } else if (locals->DIOSupport[i] != true) { + status = DML_FAIL_DIO_SUPPORT; + } else if (locals->NotEnoughDSCUnits[i] != false) { + status = DML_FAIL_NOT_ENOUGH_DSC; + } else if (locals->DSCCLKRequiredMoreThanSupported[i] != false) { + status = DML_FAIL_DSC_CLK_REQUIRED; + } else if (locals->UrgentLatencySupport[i][j] != true) { + status = DML_FAIL_URGENT_LATENCY; + } else if (locals->ROBSupport[i] != true) { + status = DML_FAIL_REORDERING_BUFFER; + } else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) { + status = DML_FAIL_DISPCLK_DPPCLK; + } else if (locals->TotalAvailablePipesSupport[i][j] != true) { + status = DML_FAIL_TOTAL_AVAILABLE_PIPES; + } else if (mode_lib->vba.NumberOfOTGSupport != true) { + status = DML_FAIL_NUM_OTG; + } else if (mode_lib->vba.WritebackModeSupport != true) { + status = DML_FAIL_WRITEBACK_MODE; + } else if (mode_lib->vba.WritebackLatencySupport != true) { + status = DML_FAIL_WRITEBACK_LATENCY; + } else if (mode_lib->vba.WritebackScaleRatioAndTapsSupport != true) { + status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP; + } else if (mode_lib->vba.CursorSupport != true) { + status = DML_FAIL_CURSOR_SUPPORT; + } else if (mode_lib->vba.PitchSupport != true) { + status = DML_FAIL_PITCH_SUPPORT; + } else if (locals->PrefetchSupported[i][j] != true) { + status = DML_FAIL_PREFETCH_SUPPORT; + } else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) { + status = DML_FAIL_TOTAL_V_ACTIVE_BW; + } else if (locals->VRatioInPrefetchSupported[i][j] != true) { + status = DML_FAIL_V_RATIO_PREFETCH; + } else if (locals->PTEBufferSizeNotExceeded[i][j] != true) { + status = DML_FAIL_PTE_BUFFER_SIZE; + } else if (mode_lib->vba.NonsupportedDSCInputBPC != false) { + status = DML_FAIL_DSC_INPUT_BPC; + } + + if (status == DML_VALIDATION_OK) { + locals->ModeSupport[i][j] = true; + } else { + locals->ModeSupport[i][j] = false; + } + locals->ValidationStatus[i] = status; + } + } + { + unsigned int MaximumMPCCombine = 0; + mode_lib->vba.VoltageLevel = mode_lib->vba.soc.num_states + 1; + for (i = mode_lib->vba.VoltageOverrideLevel; i <= mode_lib->vba.soc.num_states; i++) { + if (locals->ModeSupport[i][0] == true || locals->ModeSupport[i][1] == true) { + mode_lib->vba.VoltageLevel = i; + if (locals->ModeSupport[i][1] == true && (locals->ModeSupport[i][0] == false + || mode_lib->vba.WhenToDoMPCCombine == dm_mpc_always_when_possible)) { + MaximumMPCCombine = 1; + } else { + MaximumMPCCombine = 0; + } + break; + } + } + mode_lib->vba.ImmediateFlipSupport = + locals->ImmediateFlipSupportedForState[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.DPPPerPlane[k] = locals->NoOfDPP[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; + locals->DPPCLK[k] = locals->RequiredDPPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; + } + mode_lib->vba.DISPCLK = locals->RequiredDISPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; + mode_lib->vba.maxMpcComb = MaximumMPCCombine; + } + mode_lib->vba.DCFCLK = mode_lib->vba.DCFCLKPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.FabricAndDRAMBandwidth = locals->FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel]; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + mode_lib->vba.ODMCombineEnabled[k] = + locals->ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k]; + } else { + mode_lib->vba.ODMCombineEnabled[k] = 0; + } + mode_lib->vba.DSCEnabled[k] = + locals->RequiresDSC[mode_lib->vba.VoltageLevel][k]; + mode_lib->vba.OutputBpp[k] = + locals->OutputBppPerState[mode_lib->vba.VoltageLevel][k]; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.h new file mode 100644 index 000000000000..a989d3ca1e99 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.h @@ -0,0 +1,32 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DCN20V2_DISPLAY_MODE_VBA_H_ +#define _DCN20V2_DISPLAY_MODE_VBA_H_ + +void dml20v2_recalculate(struct display_mode_lib *mode_lib); +void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c new file mode 100644 index 000000000000..ed8bf5f723c9 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c @@ -0,0 +1,1701 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "../display_mode_lib.h" +#include "../display_mode_vba.h" +#include "display_rq_dlg_calc_20v2.h" + +// Function: dml20v2_rq_dlg_get_rq_params +// Calculate requestor related parameters that register definition agnostic +// (i.e. this layer does try to separate real values from register definition) +// Input: +// pipe_src_param - pipe source configuration (e.g. vp, pitch, etc.) +// Output: +// rq_param - values that can be used to setup RQ (e.g. swath_height, plane1_addr, etc.) +// +static void dml20v2_rq_dlg_get_rq_params( + struct display_mode_lib *mode_lib, + display_rq_params_st * rq_param, + const display_pipe_source_params_st pipe_src_param); + +// Function: dml20v2_rq_dlg_get_dlg_params +// Calculate deadline related parameters +// +static void dml20v2_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib, + const display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + display_dlg_regs_st *disp_dlg_regs, + display_ttu_regs_st *disp_ttu_regs, + const display_rq_dlg_params_st rq_dlg_param, + const display_dlg_sys_params_st dlg_sys_param, + const bool cstate_en, + const bool pstate_en); +/* + * NOTE: + * This file is gcc-parseable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + +static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, + double *refcyc_per_req_delivery_pre_cur, + double *refcyc_per_req_delivery_cur, + double refclk_freq_in_mhz, + double ref_freq_to_pix_freq, + double hscale_pixel_rate_l, + double hscl_ratio, + double vratio_pre_l, + double vratio_l, + unsigned int cur_width, + enum cursor_bpp cur_bpp); + +#include "../dml_inline_defs.h" + +static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) +{ + unsigned int ret_val = 0; + + if (source_format == dm_444_16) { + if (!is_chroma) + ret_val = 2; + } else if (source_format == dm_444_32) { + if (!is_chroma) + ret_val = 4; + } else if (source_format == dm_444_64) { + if (!is_chroma) + ret_val = 8; + } else if (source_format == dm_420_8) { + if (is_chroma) + ret_val = 2; + else + ret_val = 1; + } else if (source_format == dm_420_10) { + if (is_chroma) + ret_val = 4; + else + ret_val = 2; + } else if (source_format == dm_444_8) { + ret_val = 1; + } + return ret_val; +} + +static bool is_dual_plane(enum source_format_class source_format) +{ + bool ret_val = 0; + + if ((source_format == dm_420_8) || (source_format == dm_420_10)) + ret_val = 1; + + return ret_val; +} + +static double get_refcyc_per_delivery(struct display_mode_lib *mode_lib, + double refclk_freq_in_mhz, + double pclk_freq_in_mhz, + bool odm_combine, + unsigned int recout_width, + unsigned int hactive, + double vratio, + double hscale_pixel_rate, + unsigned int delivery_width, + unsigned int req_per_swath_ub) +{ + double refcyc_per_delivery = 0.0; + + if (vratio <= 1.0) { + if (odm_combine) + refcyc_per_delivery = (double) refclk_freq_in_mhz + * dml_min((double) recout_width, (double) hactive / 2.0) + / pclk_freq_in_mhz / (double) req_per_swath_ub; + else + refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) recout_width + / pclk_freq_in_mhz / (double) req_per_swath_ub; + } else { + refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) delivery_width + / (double) hscale_pixel_rate / (double) req_per_swath_ub; + } + + dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); + dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); + dml_print("DML_DLG: %s: recout_width = %d\n", __func__, recout_width); + dml_print("DML_DLG: %s: vratio = %3.2f\n", __func__, vratio); + dml_print("DML_DLG: %s: req_per_swath_ub = %d\n", __func__, req_per_swath_ub); + dml_print("DML_DLG: %s: refcyc_per_delivery= %3.2f\n", __func__, refcyc_per_delivery); + + return refcyc_per_delivery; + +} + +static unsigned int get_blk_size_bytes(const enum source_macro_tile_size tile_size) +{ + if (tile_size == dm_256k_tile) + return (256 * 1024); + else if (tile_size == dm_64k_tile) + return (64 * 1024); + else + return (4 * 1024); +} + +static void extract_rq_sizing_regs(struct display_mode_lib *mode_lib, + display_data_rq_regs_st *rq_regs, + const display_data_rq_sizing_params_st rq_sizing) +{ + dml_print("DML_DLG: %s: rq_sizing param\n", __func__); + print__data_rq_sizing_params_st(mode_lib, rq_sizing); + + rq_regs->chunk_size = dml_log2(rq_sizing.chunk_bytes) - 10; + + if (rq_sizing.min_chunk_bytes == 0) + rq_regs->min_chunk_size = 0; + else + rq_regs->min_chunk_size = dml_log2(rq_sizing.min_chunk_bytes) - 8 + 1; + + rq_regs->meta_chunk_size = dml_log2(rq_sizing.meta_chunk_bytes) - 10; + if (rq_sizing.min_meta_chunk_bytes == 0) + rq_regs->min_meta_chunk_size = 0; + else + rq_regs->min_meta_chunk_size = dml_log2(rq_sizing.min_meta_chunk_bytes) - 6 + 1; + + rq_regs->dpte_group_size = dml_log2(rq_sizing.dpte_group_bytes) - 6; + rq_regs->mpte_group_size = dml_log2(rq_sizing.mpte_group_bytes) - 6; +} + +static void extract_rq_regs(struct display_mode_lib *mode_lib, + display_rq_regs_st *rq_regs, + const display_rq_params_st rq_param) +{ + unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; + unsigned int detile_buf_plane1_addr = 0; + + extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), rq_param.sizing.rq_l); + + rq_regs->rq_regs_l.pte_row_height_linear = dml_floor(dml_log2(rq_param.dlg.rq_l.dpte_row_height), + 1) - 3; + + if (rq_param.yuv420) { + extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), rq_param.sizing.rq_c); + rq_regs->rq_regs_c.pte_row_height_linear = dml_floor(dml_log2(rq_param.dlg.rq_c.dpte_row_height), + 1) - 3; + } + + rq_regs->rq_regs_l.swath_height = dml_log2(rq_param.dlg.rq_l.swath_height); + rq_regs->rq_regs_c.swath_height = dml_log2(rq_param.dlg.rq_c.swath_height); + + // FIXME: take the max between luma, chroma chunk size? + // okay for now, as we are setting chunk_bytes to 8kb anyways + if (rq_param.sizing.rq_l.chunk_bytes >= 32 * 1024) { //32kb + rq_regs->drq_expansion_mode = 0; + } else { + rq_regs->drq_expansion_mode = 2; + } + rq_regs->prq_expansion_mode = 1; + rq_regs->mrq_expansion_mode = 1; + rq_regs->crq_expansion_mode = 1; + + if (rq_param.yuv420) { + if ((double) rq_param.misc.rq_l.stored_swath_bytes + / (double) rq_param.misc.rq_c.stored_swath_bytes <= 1.5) { + detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 64.0); // half to chroma + } else { + detile_buf_plane1_addr = dml_round_to_multiple((unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0), + 256, + 0) / 64.0; // 2/3 to chroma + } + } + rq_regs->plane1_base_address = detile_buf_plane1_addr; +} + +static void handle_det_buf_split(struct display_mode_lib *mode_lib, + display_rq_params_st *rq_param, + const display_pipe_source_params_st pipe_src_param) +{ + unsigned int total_swath_bytes = 0; + unsigned int swath_bytes_l = 0; + unsigned int swath_bytes_c = 0; + unsigned int full_swath_bytes_packed_l = 0; + unsigned int full_swath_bytes_packed_c = 0; + bool req128_l = 0; + bool req128_c = 0; + bool surf_linear = (pipe_src_param.sw_mode == dm_sw_linear); + bool surf_vert = (pipe_src_param.source_scan == dm_vert); + unsigned int log2_swath_height_l = 0; + unsigned int log2_swath_height_c = 0; + unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; + + full_swath_bytes_packed_l = rq_param->misc.rq_l.full_swath_bytes; + full_swath_bytes_packed_c = rq_param->misc.rq_c.full_swath_bytes; + + if (rq_param->yuv420_10bpc) { + full_swath_bytes_packed_l = dml_round_to_multiple(rq_param->misc.rq_l.full_swath_bytes * 2 / 3, + 256, + 1) + 256; + full_swath_bytes_packed_c = dml_round_to_multiple(rq_param->misc.rq_c.full_swath_bytes * 2 / 3, + 256, + 1) + 256; + } + + if (rq_param->yuv420) { + total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c; + + if (total_swath_bytes <= detile_buf_size_in_bytes) { //full 256b request + req128_l = 0; + req128_c = 0; + swath_bytes_l = full_swath_bytes_packed_l; + swath_bytes_c = full_swath_bytes_packed_c; + } else { //128b request (for luma only for yuv420 8bpc) + req128_l = 1; + req128_c = 0; + swath_bytes_l = full_swath_bytes_packed_l / 2; + swath_bytes_c = full_swath_bytes_packed_c; + } + // Note: assumption, the config that pass in will fit into + // the detiled buffer. + } else { + total_swath_bytes = 2 * full_swath_bytes_packed_l; + + if (total_swath_bytes <= detile_buf_size_in_bytes) + req128_l = 0; + else + req128_l = 1; + + swath_bytes_l = total_swath_bytes; + swath_bytes_c = 0; + } + rq_param->misc.rq_l.stored_swath_bytes = swath_bytes_l; + rq_param->misc.rq_c.stored_swath_bytes = swath_bytes_c; + + if (surf_linear) { + log2_swath_height_l = 0; + log2_swath_height_c = 0; + } else if (!surf_vert) { + log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_height) - req128_l; + log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_height) - req128_c; + } else { + log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_width) - req128_l; + log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_width) - req128_c; + } + rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l; + rq_param->dlg.rq_c.swath_height = 1 << log2_swath_height_c; + + dml_print("DML_DLG: %s: req128_l = %0d\n", __func__, req128_l); + dml_print("DML_DLG: %s: req128_c = %0d\n", __func__, req128_c); + dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d\n", + __func__, + full_swath_bytes_packed_l); + dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d\n", + __func__, + full_swath_bytes_packed_c); +} + +static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib, + display_data_rq_dlg_params_st *rq_dlg_param, + display_data_rq_misc_params_st *rq_misc_param, + display_data_rq_sizing_params_st *rq_sizing_param, + unsigned int vp_width, + unsigned int vp_height, + unsigned int data_pitch, + unsigned int meta_pitch, + unsigned int source_format, + unsigned int tiling, + unsigned int macro_tile_size, + unsigned int source_scan, + unsigned int is_chroma) +{ + bool surf_linear = (tiling == dm_sw_linear); + bool surf_vert = (source_scan == dm_vert); + + unsigned int bytes_per_element; + unsigned int bytes_per_element_y = get_bytes_per_element((enum source_format_class)(source_format), + false); + unsigned int bytes_per_element_c = get_bytes_per_element((enum source_format_class)(source_format), + true); + + unsigned int blk256_width = 0; + unsigned int blk256_height = 0; + + unsigned int blk256_width_y = 0; + unsigned int blk256_height_y = 0; + unsigned int blk256_width_c = 0; + unsigned int blk256_height_c = 0; + unsigned int log2_bytes_per_element; + unsigned int log2_blk256_width; + unsigned int log2_blk256_height; + unsigned int blk_bytes; + unsigned int log2_blk_bytes; + unsigned int log2_blk_height; + unsigned int log2_blk_width; + unsigned int log2_meta_req_bytes; + unsigned int log2_meta_req_height; + unsigned int log2_meta_req_width; + unsigned int meta_req_width; + unsigned int meta_req_height; + unsigned int log2_meta_row_height; + unsigned int meta_row_width_ub; + unsigned int log2_meta_chunk_bytes; + unsigned int log2_meta_chunk_height; + + //full sized meta chunk width in unit of data elements + unsigned int log2_meta_chunk_width; + unsigned int log2_min_meta_chunk_bytes; + unsigned int min_meta_chunk_width; + unsigned int meta_chunk_width; + unsigned int meta_chunk_per_row_int; + unsigned int meta_row_remainder; + unsigned int meta_chunk_threshold; + unsigned int meta_blk_bytes; + unsigned int meta_blk_height; + unsigned int meta_blk_width; + unsigned int meta_surface_bytes; + unsigned int vmpg_bytes; + unsigned int meta_pte_req_per_frame_ub; + unsigned int meta_pte_bytes_per_frame_ub; + const unsigned int log2_vmpg_bytes = dml_log2(mode_lib->soc.vmm_page_size_bytes); + const unsigned int dpte_buf_in_pte_reqs = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; + const unsigned int pde_proc_buffer_size_64k_reqs = + mode_lib->ip.pde_proc_buffer_size_64k_reqs; + + unsigned int log2_vmpg_height = 0; + unsigned int log2_vmpg_width = 0; + unsigned int log2_dpte_req_height_ptes = 0; + unsigned int log2_dpte_req_height = 0; + unsigned int log2_dpte_req_width = 0; + unsigned int log2_dpte_row_height_linear = 0; + unsigned int log2_dpte_row_height = 0; + unsigned int log2_dpte_group_width = 0; + unsigned int dpte_row_width_ub = 0; + unsigned int dpte_req_height = 0; + unsigned int dpte_req_width = 0; + unsigned int dpte_group_width = 0; + unsigned int log2_dpte_group_bytes = 0; + unsigned int log2_dpte_group_length = 0; + unsigned int pde_buf_entries; + bool yuv420 = (source_format == dm_420_8 || source_format == dm_420_10); + + Calculate256BBlockSizes((enum source_format_class)(source_format), + (enum dm_swizzle_mode)(tiling), + bytes_per_element_y, + bytes_per_element_c, + &blk256_height_y, + &blk256_height_c, + &blk256_width_y, + &blk256_width_c); + + if (!is_chroma) { + blk256_width = blk256_width_y; + blk256_height = blk256_height_y; + bytes_per_element = bytes_per_element_y; + } else { + blk256_width = blk256_width_c; + blk256_height = blk256_height_c; + bytes_per_element = bytes_per_element_c; + } + + log2_bytes_per_element = dml_log2(bytes_per_element); + + dml_print("DML_DLG: %s: surf_linear = %d\n", __func__, surf_linear); + dml_print("DML_DLG: %s: surf_vert = %d\n", __func__, surf_vert); + dml_print("DML_DLG: %s: blk256_width = %d\n", __func__, blk256_width); + dml_print("DML_DLG: %s: blk256_height = %d\n", __func__, blk256_height); + + log2_blk256_width = dml_log2((double) blk256_width); + log2_blk256_height = dml_log2((double) blk256_height); + blk_bytes = surf_linear ? + 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size); + log2_blk_bytes = dml_log2((double) blk_bytes); + log2_blk_height = 0; + log2_blk_width = 0; + + // remember log rule + // "+" in log is multiply + // "-" in log is divide + // "/2" is like square root + // blk is vertical biased + if (tiling != dm_sw_linear) + log2_blk_height = log2_blk256_height + + dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1); + else + log2_blk_height = 0; // blk height of 1 + + log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height; + + if (!surf_vert) { + rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_width - 1, blk256_width, 1) + + blk256_width; + rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_width; + } else { + rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_height - 1, blk256_height, 1) + + blk256_height; + rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_height; + } + + if (!surf_vert) + rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_height + * bytes_per_element; + else + rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_width + * bytes_per_element; + + rq_misc_param->blk256_height = blk256_height; + rq_misc_param->blk256_width = blk256_width; + + // ------- + // meta + // ------- + log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element + + // each 64b meta request for dcn is 8x8 meta elements and + // a meta element covers one 256b block of the the data surface. + log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256 + log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element + - log2_meta_req_height; + meta_req_width = 1 << log2_meta_req_width; + meta_req_height = 1 << log2_meta_req_height; + log2_meta_row_height = 0; + meta_row_width_ub = 0; + + // the dimensions of a meta row are meta_row_width x meta_row_height in elements. + // calculate upper bound of the meta_row_width + if (!surf_vert) { + log2_meta_row_height = log2_meta_req_height; + meta_row_width_ub = dml_round_to_multiple(vp_width - 1, meta_req_width, 1) + + meta_req_width; + rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_width; + } else { + log2_meta_row_height = log2_meta_req_width; + meta_row_width_ub = dml_round_to_multiple(vp_height - 1, meta_req_height, 1) + + meta_req_height; + rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_height; + } + rq_dlg_param->meta_bytes_per_row_ub = rq_dlg_param->meta_req_per_row_ub * 64; + + rq_dlg_param->meta_row_height = 1 << log2_meta_row_height; + + log2_meta_chunk_bytes = dml_log2(rq_sizing_param->meta_chunk_bytes); + log2_meta_chunk_height = log2_meta_row_height; + + //full sized meta chunk width in unit of data elements + log2_meta_chunk_width = log2_meta_chunk_bytes + 8 - log2_bytes_per_element + - log2_meta_chunk_height; + log2_min_meta_chunk_bytes = dml_log2(rq_sizing_param->min_meta_chunk_bytes); + min_meta_chunk_width = 1 + << (log2_min_meta_chunk_bytes + 8 - log2_bytes_per_element + - log2_meta_chunk_height); + meta_chunk_width = 1 << log2_meta_chunk_width; + meta_chunk_per_row_int = (unsigned int) (meta_row_width_ub / meta_chunk_width); + meta_row_remainder = meta_row_width_ub % meta_chunk_width; + meta_chunk_threshold = 0; + meta_blk_bytes = 4096; + meta_blk_height = blk256_height * 64; + meta_blk_width = meta_blk_bytes * 256 / bytes_per_element / meta_blk_height; + meta_surface_bytes = meta_pitch + * (dml_round_to_multiple(vp_height - 1, meta_blk_height, 1) + meta_blk_height) + * bytes_per_element / 256; + vmpg_bytes = mode_lib->soc.vmm_page_size_bytes; + meta_pte_req_per_frame_ub = (dml_round_to_multiple(meta_surface_bytes - vmpg_bytes, + 8 * vmpg_bytes, + 1) + 8 * vmpg_bytes) / (8 * vmpg_bytes); + meta_pte_bytes_per_frame_ub = meta_pte_req_per_frame_ub * 64; //64B mpte request + rq_dlg_param->meta_pte_bytes_per_frame_ub = meta_pte_bytes_per_frame_ub; + + dml_print("DML_DLG: %s: meta_blk_height = %d\n", __func__, meta_blk_height); + dml_print("DML_DLG: %s: meta_blk_width = %d\n", __func__, meta_blk_width); + dml_print("DML_DLG: %s: meta_surface_bytes = %d\n", __func__, meta_surface_bytes); + dml_print("DML_DLG: %s: meta_pte_req_per_frame_ub = %d\n", + __func__, + meta_pte_req_per_frame_ub); + dml_print("DML_DLG: %s: meta_pte_bytes_per_frame_ub = %d\n", + __func__, + meta_pte_bytes_per_frame_ub); + + if (!surf_vert) + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width; + else + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height; + + if (meta_row_remainder <= meta_chunk_threshold) + rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; + else + rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; + + // ------ + // dpte + // ------ + if (surf_linear) { + log2_vmpg_height = 0; // one line high + } else { + log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height; + } + log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height; + + // only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4. + if (surf_linear) { //one 64B PTE request returns 8 PTEs + log2_dpte_req_height_ptes = 0; + log2_dpte_req_width = log2_vmpg_width + 3; + log2_dpte_req_height = 0; + } else if (log2_blk_bytes == 12) { //4KB tile means 4kB page size + //one 64B req gives 8x1 PTEs for 4KB tile + log2_dpte_req_height_ptes = 0; + log2_dpte_req_width = log2_blk_width + 3; + log2_dpte_req_height = log2_blk_height + 0; + } else if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) { // tile block >= 64KB + //two 64B reqs of 2x4 PTEs give 16 PTEs to cover 64KB + log2_dpte_req_height_ptes = 4; + log2_dpte_req_width = log2_blk256_width + 4; // log2_64KB_width + log2_dpte_req_height = log2_blk256_height + 4; // log2_64KB_height + } else { //64KB page size and must 64KB tile block + //one 64B req gives 8x1 PTEs for 64KB tile + log2_dpte_req_height_ptes = 0; + log2_dpte_req_width = log2_blk_width + 3; + log2_dpte_req_height = log2_blk_height + 0; + } + + // The dpte request dimensions in data elements is dpte_req_width x dpte_req_height + // log2_vmpg_width is how much 1 pte represent, now calculating how much a 64b pte req represent + // That depends on the pte shape (i.e. 8x1, 4x2, 2x4) + //log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes; + //log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes; + dpte_req_height = 1 << log2_dpte_req_height; + dpte_req_width = 1 << log2_dpte_req_width; + + // calculate pitch dpte row buffer can hold + // round the result down to a power of two. + pde_buf_entries = yuv420 ? (pde_proc_buffer_size_64k_reqs >> 1) : pde_proc_buffer_size_64k_reqs; + if (surf_linear) { + unsigned int dpte_row_height; + + log2_dpte_row_height_linear = dml_floor(dml_log2(dml_min(64 * 1024 * pde_buf_entries + / bytes_per_element, + dpte_buf_in_pte_reqs + * dpte_req_width) + / data_pitch), + 1); + + ASSERT(log2_dpte_row_height_linear >= 3); + + if (log2_dpte_row_height_linear > 7) + log2_dpte_row_height_linear = 7; + + log2_dpte_row_height = log2_dpte_row_height_linear; + // For linear, the dpte row is pitch dependent and the pte requests wrap at the pitch boundary. + // the dpte_row_width_ub is the upper bound of data_pitch*dpte_row_height in elements with this unique buffering. + dpte_row_height = 1 << log2_dpte_row_height; + dpte_row_width_ub = dml_round_to_multiple(data_pitch * dpte_row_height - 1, + dpte_req_width, + 1) + dpte_req_width; + rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; + } else { + // the upper bound of the dpte_row_width without dependency on viewport position follows. + // for tiled mode, row height is the same as req height and row store up to vp size upper bound + if (!surf_vert) { + log2_dpte_row_height = log2_dpte_req_height; + dpte_row_width_ub = dml_round_to_multiple(vp_width - 1, dpte_req_width, 1) + + dpte_req_width; + rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; + } else { + log2_dpte_row_height = + (log2_blk_width < log2_dpte_req_width) ? + log2_blk_width : log2_dpte_req_width; + dpte_row_width_ub = dml_round_to_multiple(vp_height - 1, dpte_req_height, 1) + + dpte_req_height; + rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_height; + } + } + if (log2_blk_bytes >= 16 && log2_vmpg_bytes == 12) // tile block >= 64KB + rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 128; //2*64B dpte request + else + rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 64; //64B dpte request + + rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height; + + // the dpte_group_bytes is reduced for the specific case of vertical + // access of a tile surface that has dpte request of 8x1 ptes. + if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group + rq_sizing_param->dpte_group_bytes = 512; + else + //full size + rq_sizing_param->dpte_group_bytes = 2048; + + //since pte request size is 64byte, the number of data pte requests per full sized group is as follows. + log2_dpte_group_bytes = dml_log2(rq_sizing_param->dpte_group_bytes); + log2_dpte_group_length = log2_dpte_group_bytes - 6; //length in 64b requests + + // full sized data pte group width in elements + if (!surf_vert) + log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_width; + else + log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_height; + + //But if the tile block >=64KB and the page size is 4KB, then each dPTE request is 2*64B + if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) // tile block >= 64KB + log2_dpte_group_width = log2_dpte_group_width - 1; + + dpte_group_width = 1 << log2_dpte_group_width; + + // since dpte groups are only aligned to dpte_req_width and not dpte_group_width, + // the upper bound for the dpte groups per row is as follows. + rq_dlg_param->dpte_groups_per_row_ub = dml_ceil((double) dpte_row_width_ub / dpte_group_width, + 1); +} + +static void get_surf_rq_param(struct display_mode_lib *mode_lib, + display_data_rq_sizing_params_st *rq_sizing_param, + display_data_rq_dlg_params_st *rq_dlg_param, + display_data_rq_misc_params_st *rq_misc_param, + const display_pipe_source_params_st pipe_src_param, + bool is_chroma) +{ + bool mode_422 = 0; + unsigned int vp_width = 0; + unsigned int vp_height = 0; + unsigned int data_pitch = 0; + unsigned int meta_pitch = 0; + unsigned int ppe = mode_422 ? 2 : 1; + + // FIXME check if ppe apply for both luma and chroma in 422 case + if (is_chroma) { + vp_width = pipe_src_param.viewport_width_c / ppe; + vp_height = pipe_src_param.viewport_height_c; + data_pitch = pipe_src_param.data_pitch_c; + meta_pitch = pipe_src_param.meta_pitch_c; + } else { + vp_width = pipe_src_param.viewport_width / ppe; + vp_height = pipe_src_param.viewport_height; + data_pitch = pipe_src_param.data_pitch; + meta_pitch = pipe_src_param.meta_pitch; + } + + rq_sizing_param->chunk_bytes = 8192; + + if (rq_sizing_param->chunk_bytes == 64 * 1024) + rq_sizing_param->min_chunk_bytes = 0; + else + rq_sizing_param->min_chunk_bytes = 1024; + + rq_sizing_param->meta_chunk_bytes = 2048; + rq_sizing_param->min_meta_chunk_bytes = 256; + + rq_sizing_param->mpte_group_bytes = 2048; + + get_meta_and_pte_attr(mode_lib, + rq_dlg_param, + rq_misc_param, + rq_sizing_param, + vp_width, + vp_height, + data_pitch, + meta_pitch, + pipe_src_param.source_format, + pipe_src_param.sw_mode, + pipe_src_param.macro_tile_size, + pipe_src_param.source_scan, + is_chroma); +} + +static void dml20v2_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib, + display_rq_params_st *rq_param, + const display_pipe_source_params_st pipe_src_param) +{ + // get param for luma surface + rq_param->yuv420 = pipe_src_param.source_format == dm_420_8 + || pipe_src_param.source_format == dm_420_10; + rq_param->yuv420_10bpc = pipe_src_param.source_format == dm_420_10; + + get_surf_rq_param(mode_lib, + &(rq_param->sizing.rq_l), + &(rq_param->dlg.rq_l), + &(rq_param->misc.rq_l), + pipe_src_param, + 0); + + if (is_dual_plane((enum source_format_class)(pipe_src_param.source_format))) { + // get param for chroma surface + get_surf_rq_param(mode_lib, + &(rq_param->sizing.rq_c), + &(rq_param->dlg.rq_c), + &(rq_param->misc.rq_c), + pipe_src_param, + 1); + } + + // calculate how to split the det buffer space between luma and chroma + handle_det_buf_split(mode_lib, rq_param, pipe_src_param); + print__rq_params_st(mode_lib, *rq_param); +} + +void dml20v2_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib, + display_rq_regs_st *rq_regs, + const display_pipe_params_st pipe_param) +{ + display_rq_params_st rq_param = {0}; + + memset(rq_regs, 0, sizeof(*rq_regs)); + dml20v2_rq_dlg_get_rq_params(mode_lib, &rq_param, pipe_param.src); + extract_rq_regs(mode_lib, rq_regs, rq_param); + + print__rq_regs_st(mode_lib, *rq_regs); +} + +// Note: currently taken in as is. +// Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma. +static void dml20v2_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib, + const display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + display_dlg_regs_st *disp_dlg_regs, + display_ttu_regs_st *disp_ttu_regs, + const display_rq_dlg_params_st rq_dlg_param, + const display_dlg_sys_params_st dlg_sys_param, + const bool cstate_en, + const bool pstate_en) +{ + const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src; + const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest; + const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout; + const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg; + const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth; + const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps; + + // ------------------------- + // Section 1.15.2.1: OTG dependent Params + // ------------------------- + // Timing + unsigned int htotal = dst->htotal; +// unsigned int hblank_start = dst.hblank_start; // TODO: Remove + unsigned int hblank_end = dst->hblank_end; + unsigned int vblank_start = dst->vblank_start; + unsigned int vblank_end = dst->vblank_end; + unsigned int min_vblank = mode_lib->ip.min_vblank_lines; + + double dppclk_freq_in_mhz = clks->dppclk_mhz; + double dispclk_freq_in_mhz = clks->dispclk_mhz; + double refclk_freq_in_mhz = clks->refclk_mhz; + double pclk_freq_in_mhz = dst->pixel_rate_mhz; + bool interlaced = dst->interlaced; + + double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz; + + double min_dcfclk_mhz; + double t_calc_us; + double min_ttu_vblank; + + double min_dst_y_ttu_vblank; + unsigned int dlg_vblank_start; + bool dual_plane; + bool mode_422; + unsigned int access_dir; + unsigned int vp_height_l; + unsigned int vp_width_l; + unsigned int vp_height_c; + unsigned int vp_width_c; + + // Scaling + unsigned int htaps_l; + unsigned int htaps_c; + double hratio_l; + double hratio_c; + double vratio_l; + double vratio_c; + bool scl_enable; + + double line_time_in_us; + // double vinit_l; + // double vinit_c; + // double vinit_bot_l; + // double vinit_bot_c; + + // unsigned int swath_height_l; + unsigned int swath_width_ub_l; + // unsigned int dpte_bytes_per_row_ub_l; + unsigned int dpte_groups_per_row_ub_l; + // unsigned int meta_pte_bytes_per_frame_ub_l; + // unsigned int meta_bytes_per_row_ub_l; + + // unsigned int swath_height_c; + unsigned int swath_width_ub_c; + // unsigned int dpte_bytes_per_row_ub_c; + unsigned int dpte_groups_per_row_ub_c; + + unsigned int meta_chunks_per_row_ub_l; + unsigned int meta_chunks_per_row_ub_c; + unsigned int vupdate_offset; + unsigned int vupdate_width; + unsigned int vready_offset; + + unsigned int dppclk_delay_subtotal; + unsigned int dispclk_delay_subtotal; + unsigned int pixel_rate_delay_subtotal; + + unsigned int vstartup_start; + unsigned int dst_x_after_scaler; + unsigned int dst_y_after_scaler; + double line_wait; + double dst_y_prefetch; + double dst_y_per_vm_vblank; + double dst_y_per_row_vblank; + double dst_y_per_vm_flip; + double dst_y_per_row_flip; + double min_dst_y_per_vm_vblank; + double min_dst_y_per_row_vblank; + double lsw; + double vratio_pre_l; + double vratio_pre_c; + unsigned int req_per_swath_ub_l; + unsigned int req_per_swath_ub_c; + unsigned int meta_row_height_l; + unsigned int meta_row_height_c; + unsigned int swath_width_pixels_ub_l; + unsigned int swath_width_pixels_ub_c; + unsigned int scaler_rec_in_width_l; + unsigned int scaler_rec_in_width_c; + unsigned int dpte_row_height_l; + unsigned int dpte_row_height_c; + double hscale_pixel_rate_l; + double hscale_pixel_rate_c; + double min_hratio_fact_l; + double min_hratio_fact_c; + double refcyc_per_line_delivery_pre_l; + double refcyc_per_line_delivery_pre_c; + double refcyc_per_line_delivery_l; + double refcyc_per_line_delivery_c; + + double refcyc_per_req_delivery_pre_l; + double refcyc_per_req_delivery_pre_c; + double refcyc_per_req_delivery_l; + double refcyc_per_req_delivery_c; + + unsigned int full_recout_width; + double xfc_transfer_delay; + double xfc_precharge_delay; + double xfc_remote_surface_flip_latency; + double xfc_dst_y_delta_drq_limit; + double xfc_prefetch_margin; + double refcyc_per_req_delivery_pre_cur0; + double refcyc_per_req_delivery_cur0; + double refcyc_per_req_delivery_pre_cur1; + double refcyc_per_req_delivery_cur1; + + memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs)); + memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs)); + + dml_print("DML_DLG: %s: cstate_en = %d\n", __func__, cstate_en); + dml_print("DML_DLG: %s: pstate_en = %d\n", __func__, pstate_en); + + dml_print("DML_DLG: %s: dppclk_freq_in_mhz = %3.2f\n", __func__, dppclk_freq_in_mhz); + dml_print("DML_DLG: %s: dispclk_freq_in_mhz = %3.2f\n", __func__, dispclk_freq_in_mhz); + dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); + dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); + dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced); + ASSERT(ref_freq_to_pix_freq < 4.0); + + disp_dlg_regs->ref_freq_to_pix_freq = + (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19)); + disp_dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal + * dml_pow(2, 8)); + disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits + disp_dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end + * (double) ref_freq_to_pix_freq); + ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int) dml_pow(2, 13)); + + min_dcfclk_mhz = dlg_sys_param.deepsleep_dcfclk_mhz; + t_calc_us = get_tcalc(mode_lib, e2e_pipe_param, num_pipes); + min_ttu_vblank = get_min_ttu_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + min_dst_y_ttu_vblank = min_ttu_vblank * pclk_freq_in_mhz / (double) htotal; + dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start; + + disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start + + min_dst_y_ttu_vblank) * dml_pow(2, 2)); + ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int) dml_pow(2, 18)); + + dml_print("DML_DLG: %s: min_dcfclk_mhz = %3.2f\n", + __func__, + min_dcfclk_mhz); + dml_print("DML_DLG: %s: min_ttu_vblank = %3.2f\n", + __func__, + min_ttu_vblank); + dml_print("DML_DLG: %s: min_dst_y_ttu_vblank = %3.2f\n", + __func__, + min_dst_y_ttu_vblank); + dml_print("DML_DLG: %s: t_calc_us = %3.2f\n", + __func__, + t_calc_us); + dml_print("DML_DLG: %s: disp_dlg_regs->min_dst_y_next_start = 0x%0x\n", + __func__, + disp_dlg_regs->min_dst_y_next_start); + dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", + __func__, + ref_freq_to_pix_freq); + + // ------------------------- + // Section 1.15.2.2: Prefetch, Active and TTU + // ------------------------- + // Prefetch Calc + // Source +// dcc_en = src.dcc; + dual_plane = is_dual_plane((enum source_format_class)(src->source_format)); + mode_422 = 0; // FIXME + access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed +// bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0); +// bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1); + vp_height_l = src->viewport_height; + vp_width_l = src->viewport_width; + vp_height_c = src->viewport_height_c; + vp_width_c = src->viewport_width_c; + + // Scaling + htaps_l = taps->htaps; + htaps_c = taps->htaps_c; + hratio_l = scl->hscl_ratio; + hratio_c = scl->hscl_ratio_c; + vratio_l = scl->vscl_ratio; + vratio_c = scl->vscl_ratio_c; + scl_enable = scl->scl_enable; + + line_time_in_us = (htotal / pclk_freq_in_mhz); +// vinit_l = scl.vinit; +// vinit_c = scl.vinit_c; +// vinit_bot_l = scl.vinit_bot; +// vinit_bot_c = scl.vinit_bot_c; + +// unsigned int swath_height_l = rq_dlg_param.rq_l.swath_height; + swath_width_ub_l = rq_dlg_param.rq_l.swath_width_ub; +// unsigned int dpte_bytes_per_row_ub_l = rq_dlg_param.rq_l.dpte_bytes_per_row_ub; + dpte_groups_per_row_ub_l = rq_dlg_param.rq_l.dpte_groups_per_row_ub; +// unsigned int meta_pte_bytes_per_frame_ub_l = rq_dlg_param.rq_l.meta_pte_bytes_per_frame_ub; +// unsigned int meta_bytes_per_row_ub_l = rq_dlg_param.rq_l.meta_bytes_per_row_ub; + +// unsigned int swath_height_c = rq_dlg_param.rq_c.swath_height; + swath_width_ub_c = rq_dlg_param.rq_c.swath_width_ub; + // dpte_bytes_per_row_ub_c = rq_dlg_param.rq_c.dpte_bytes_per_row_ub; + dpte_groups_per_row_ub_c = rq_dlg_param.rq_c.dpte_groups_per_row_ub; + + meta_chunks_per_row_ub_l = rq_dlg_param.rq_l.meta_chunks_per_row_ub; + meta_chunks_per_row_ub_c = rq_dlg_param.rq_c.meta_chunks_per_row_ub; + vupdate_offset = dst->vupdate_offset; + vupdate_width = dst->vupdate_width; + vready_offset = dst->vready_offset; + + dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal; + dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal; + + if (scl_enable) + dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl; + else + dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only; + + dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter + + src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor; + + if (dout->dsc_enable) { + double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + dispclk_delay_subtotal += dsc_delay; + } + + pixel_rate_delay_subtotal = dppclk_delay_subtotal * pclk_freq_in_mhz / dppclk_freq_in_mhz + + dispclk_delay_subtotal * pclk_freq_in_mhz / dispclk_freq_in_mhz; + + vstartup_start = dst->vstartup_start; + if (interlaced) { + if (vstartup_start / 2.0 + - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal + <= vblank_end / 2.0) + disp_dlg_regs->vready_after_vcount0 = 1; + else + disp_dlg_regs->vready_after_vcount0 = 0; + } else { + if (vstartup_start + - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal + <= vblank_end) + disp_dlg_regs->vready_after_vcount0 = 1; + else + disp_dlg_regs->vready_after_vcount0 = 0; + } + + // TODO: Where is this coming from? + if (interlaced) + vstartup_start = vstartup_start / 2; + + // TODO: What if this min_vblank doesn't match the value in the dml_config_settings.cpp? + if (vstartup_start >= min_vblank) { + dml_print("WARNING: DML_DLG: %s: vblank_start=%d vblank_end=%d\n", + __func__, + vblank_start, + vblank_end); + dml_print("WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n", + __func__, + vstartup_start, + min_vblank); + min_vblank = vstartup_start + 1; + dml_print("WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n", + __func__, + vstartup_start, + min_vblank); + } + + dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal); + dml_print("DML_DLG: %s: pixel_rate_delay_subtotal = %d\n", + __func__, + pixel_rate_delay_subtotal); + dml_print("DML_DLG: %s: dst_x_after_scaler = %d\n", + __func__, + dst_x_after_scaler); + dml_print("DML_DLG: %s: dst_y_after_scaler = %d\n", + __func__, + dst_y_after_scaler); + + // Lwait + line_wait = mode_lib->soc.urgent_latency_us; + if (cstate_en) + line_wait = dml_max(mode_lib->soc.sr_enter_plus_exit_time_us, line_wait); + if (pstate_en) + line_wait = dml_max(mode_lib->soc.dram_clock_change_latency_us + + mode_lib->soc.urgent_latency_us, + line_wait); + line_wait = line_wait / line_time_in_us; + + dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch); + + dst_y_per_vm_vblank = get_dst_y_per_vm_vblank(mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx); + dst_y_per_row_vblank = get_dst_y_per_row_vblank(mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx); + dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + min_dst_y_per_vm_vblank = 8.0; + min_dst_y_per_row_vblank = 16.0; + + // magic! + if (htotal <= 75) { + min_vblank = 300; + min_dst_y_per_vm_vblank = 100.0; + min_dst_y_per_row_vblank = 100.0; + } + + dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank); + dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank); + + ASSERT(dst_y_per_vm_vblank < min_dst_y_per_vm_vblank); + ASSERT(dst_y_per_row_vblank < min_dst_y_per_row_vblank); + + ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank)); + lsw = dst_y_prefetch - (dst_y_per_vm_vblank + dst_y_per_row_vblank); + + dml_print("DML_DLG: %s: lsw = %3.2f\n", __func__, lsw); + + vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + dml_print("DML_DLG: %s: vratio_pre_l=%3.2f\n", __func__, vratio_pre_l); + dml_print("DML_DLG: %s: vratio_pre_c=%3.2f\n", __func__, vratio_pre_c); + + // Active + req_per_swath_ub_l = rq_dlg_param.rq_l.req_per_swath_ub; + req_per_swath_ub_c = rq_dlg_param.rq_c.req_per_swath_ub; + meta_row_height_l = rq_dlg_param.rq_l.meta_row_height; + meta_row_height_c = rq_dlg_param.rq_c.meta_row_height; + swath_width_pixels_ub_l = 0; + swath_width_pixels_ub_c = 0; + scaler_rec_in_width_l = 0; + scaler_rec_in_width_c = 0; + dpte_row_height_l = rq_dlg_param.rq_l.dpte_row_height; + dpte_row_height_c = rq_dlg_param.rq_c.dpte_row_height; + + if (mode_422) { + swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element + swath_width_pixels_ub_c = swath_width_ub_c * 2; + } else { + swath_width_pixels_ub_l = swath_width_ub_l * 1; + swath_width_pixels_ub_c = swath_width_ub_c * 1; + } + + hscale_pixel_rate_l = 0.; + hscale_pixel_rate_c = 0.; + min_hratio_fact_l = 1.0; + min_hratio_fact_c = 1.0; + + if (htaps_l <= 1) + min_hratio_fact_l = 2.0; + else if (htaps_l <= 6) { + if ((hratio_l * 2.0) > 4.0) + min_hratio_fact_l = 4.0; + else + min_hratio_fact_l = hratio_l * 2.0; + } else { + if (hratio_l > 4.0) + min_hratio_fact_l = 4.0; + else + min_hratio_fact_l = hratio_l; + } + + hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz; + + if (htaps_c <= 1) + min_hratio_fact_c = 2.0; + else if (htaps_c <= 6) { + if ((hratio_c * 2.0) > 4.0) + min_hratio_fact_c = 4.0; + else + min_hratio_fact_c = hratio_c * 2.0; + } else { + if (hratio_c > 4.0) + min_hratio_fact_c = 4.0; + else + min_hratio_fact_c = hratio_c; + } + + hscale_pixel_rate_c = min_hratio_fact_c * dppclk_freq_in_mhz; + + refcyc_per_line_delivery_pre_l = 0.; + refcyc_per_line_delivery_pre_c = 0.; + refcyc_per_line_delivery_l = 0.; + refcyc_per_line_delivery_c = 0.; + + refcyc_per_req_delivery_pre_l = 0.; + refcyc_per_req_delivery_pre_c = 0.; + refcyc_per_req_delivery_l = 0.; + refcyc_per_req_delivery_c = 0.; + + full_recout_width = 0; + // In ODM + if (src->is_hsplit) { + // This "hack" is only allowed (and valid) for MPC combine. In ODM + // combine, you MUST specify the full_recout_width...according to Oswin + if (dst->full_recout_width == 0 && !dst->odm_combine) { + dml_print("DML_DLG: %s: Warning: full_recout_width not set in hsplit mode\n", + __func__); + full_recout_width = dst->recout_width * 2; // assume half split for dcn1 + } else + full_recout_width = dst->full_recout_width; + } else + full_recout_width = dst->recout_width; + + // As of DCN2, mpc_combine and odm_combine are mutually exclusive + refcyc_per_line_delivery_pre_l = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_l, + hscale_pixel_rate_l, + swath_width_pixels_ub_l, + 1); // per line + + refcyc_per_line_delivery_l = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_l, + hscale_pixel_rate_l, + swath_width_pixels_ub_l, + 1); // per line + + dml_print("DML_DLG: %s: full_recout_width = %d\n", + __func__, + full_recout_width); + dml_print("DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n", + __func__, + hscale_pixel_rate_l); + dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", + __func__, + refcyc_per_line_delivery_pre_l); + dml_print("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", + __func__, + refcyc_per_line_delivery_l); + + if (dual_plane) { + refcyc_per_line_delivery_pre_c = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_c, + hscale_pixel_rate_c, + swath_width_pixels_ub_c, + 1); // per line + + refcyc_per_line_delivery_c = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_c, + hscale_pixel_rate_c, + swath_width_pixels_ub_c, + 1); // per line + + dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", + __func__, + refcyc_per_line_delivery_pre_c); + dml_print("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", + __func__, + refcyc_per_line_delivery_c); + } + + // TTU - Luma / Chroma + if (access_dir) { // vertical access + scaler_rec_in_width_l = vp_height_l; + scaler_rec_in_width_c = vp_height_c; + } else { + scaler_rec_in_width_l = vp_width_l; + scaler_rec_in_width_c = vp_width_c; + } + + refcyc_per_req_delivery_pre_l = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_l, + hscale_pixel_rate_l, + scaler_rec_in_width_l, + req_per_swath_ub_l); // per req + refcyc_per_req_delivery_l = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_l, + hscale_pixel_rate_l, + scaler_rec_in_width_l, + req_per_swath_ub_l); // per req + + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", + __func__, + refcyc_per_req_delivery_pre_l); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", + __func__, + refcyc_per_req_delivery_l); + + ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13)); + ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13)); + + if (dual_plane) { + refcyc_per_req_delivery_pre_c = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_c, + hscale_pixel_rate_c, + scaler_rec_in_width_c, + req_per_swath_ub_c); // per req + refcyc_per_req_delivery_c = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_c, + hscale_pixel_rate_c, + scaler_rec_in_width_c, + req_per_swath_ub_c); // per req + + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", + __func__, + refcyc_per_req_delivery_pre_c); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", + __func__, + refcyc_per_req_delivery_c); + + ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13)); + ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13)); + } + + // XFC + xfc_transfer_delay = get_xfc_transfer_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + xfc_precharge_delay = get_xfc_precharge_delay(mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx); + xfc_remote_surface_flip_latency = get_xfc_remote_surface_flip_latency(mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx); + xfc_dst_y_delta_drq_limit = xfc_remote_surface_flip_latency; + xfc_prefetch_margin = get_xfc_prefetch_margin(mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx); + + // TTU - Cursor + refcyc_per_req_delivery_pre_cur0 = 0.0; + refcyc_per_req_delivery_cur0 = 0.0; + if (src->num_cursors > 0) { + calculate_ttu_cursor(mode_lib, + &refcyc_per_req_delivery_pre_cur0, + &refcyc_per_req_delivery_cur0, + refclk_freq_in_mhz, + ref_freq_to_pix_freq, + hscale_pixel_rate_l, + scl->hscl_ratio, + vratio_pre_l, + vratio_l, + src->cur0_src_width, + (enum cursor_bpp)(src->cur0_bpp)); + } + + refcyc_per_req_delivery_pre_cur1 = 0.0; + refcyc_per_req_delivery_cur1 = 0.0; + if (src->num_cursors > 1) { + calculate_ttu_cursor(mode_lib, + &refcyc_per_req_delivery_pre_cur1, + &refcyc_per_req_delivery_cur1, + refclk_freq_in_mhz, + ref_freq_to_pix_freq, + hscale_pixel_rate_l, + scl->hscl_ratio, + vratio_pre_l, + vratio_l, + src->cur1_src_width, + (enum cursor_bpp)(src->cur1_bpp)); + } + + // TTU - Misc + // all hard-coded + + // Assignment to register structures + disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line + disp_dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk + ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int) dml_pow(2, 13)); + disp_dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2)); + + disp_dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19)); + disp_dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19)); + + disp_dlg_regs->refcyc_per_pte_group_vblank_l = + (unsigned int) (dst_y_per_row_vblank * (double) htotal + * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l); + ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int) dml_pow(2, 13)); + + if (dual_plane) { + disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int) (dst_y_per_row_vblank + * (double) htotal * ref_freq_to_pix_freq + / (double) dpte_groups_per_row_ub_c); + ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c + < (unsigned int) dml_pow(2, 13)); + } + + disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = + (unsigned int) (dst_y_per_row_vblank * (double) htotal + * ref_freq_to_pix_freq / (double) meta_chunks_per_row_ub_l); + ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int) dml_pow(2, 13)); + + disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = + disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; // dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now + + disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int) (dst_y_per_row_flip * htotal + * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_l; + disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int) (dst_y_per_row_flip * htotal + * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_l; + + if (dual_plane) { + disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int) (dst_y_per_row_flip + * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_c; + disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int) (dst_y_per_row_flip + * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_c; + } + + disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int) ((double) dpte_row_height_l + / (double) vratio_l * dml_pow(2, 2)); + ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int) dml_pow(2, 17)); + + if (dual_plane) { + disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int) ((double) dpte_row_height_c + / (double) vratio_c * dml_pow(2, 2)); + if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) { + dml_print("DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u larger than supported by register format U15.2 %u\n", + __func__, + disp_dlg_regs->dst_y_per_pte_row_nom_c, + (unsigned int) dml_pow(2, 17) - 1); + } + } + + disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int) ((double) meta_row_height_l + / (double) vratio_l * dml_pow(2, 2)); + ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int) dml_pow(2, 17)); + + disp_dlg_regs->dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_l; // TODO: dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now + + disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int) ((double) dpte_row_height_l + / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq + / (double) dpte_groups_per_row_ub_l); + if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1; + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int) ((double) meta_row_height_l + / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq + / (double) meta_chunks_per_row_ub_l); + if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1; + + if (dual_plane) { + disp_dlg_regs->refcyc_per_pte_group_nom_c = + (unsigned int) ((double) dpte_row_height_c / (double) vratio_c + * (double) htotal * ref_freq_to_pix_freq + / (double) dpte_groups_per_row_ub_c); + if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1; + + // TODO: Is this the right calculation? Does htotal need to be halved? + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = + (unsigned int) ((double) meta_row_height_c / (double) vratio_c + * (double) htotal * ref_freq_to_pix_freq + / (double) meta_chunks_per_row_ub_c); + if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1; + } + + disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_l, + 1); + disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor(refcyc_per_line_delivery_l, + 1); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int) dml_pow(2, 13)); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int) dml_pow(2, 13)); + + disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_c, + 1); + disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor(refcyc_per_line_delivery_c, + 1); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int) dml_pow(2, 13)); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int) dml_pow(2, 13)); + + disp_dlg_regs->chunk_hdl_adjust_cur0 = 3; + disp_dlg_regs->dst_y_offset_cur0 = 0; + disp_dlg_regs->chunk_hdl_adjust_cur1 = 3; + disp_dlg_regs->dst_y_offset_cur1 = 0; + + disp_dlg_regs->xfc_reg_transfer_delay = xfc_transfer_delay; + disp_dlg_regs->xfc_reg_precharge_delay = xfc_precharge_delay; + disp_dlg_regs->xfc_reg_remote_surface_flip_latency = xfc_remote_surface_flip_latency; + disp_dlg_regs->xfc_reg_prefetch_margin = dml_ceil(xfc_prefetch_margin * refclk_freq_in_mhz, + 1); + + // slave has to have this value also set to off + if (src->xfc_enable && !src->xfc_slave) + disp_dlg_regs->dst_y_delta_drq_limit = dml_ceil(xfc_dst_y_delta_drq_limit, 1); + else + disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off + + disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 = + (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0 + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 = + (unsigned int) (refcyc_per_req_delivery_pre_cur1 * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_cur1 = (unsigned int) (refcyc_per_req_delivery_cur1 + * dml_pow(2, 10)); + disp_ttu_regs->qos_level_low_wm = 0; + ASSERT(disp_ttu_regs->qos_level_low_wm < dml_pow(2, 14)); + disp_ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal + * ref_freq_to_pix_freq); + /*ASSERT(disp_ttu_regs->qos_level_high_wm < dml_pow(2, 14));*/ + + disp_ttu_regs->qos_level_flip = 14; + disp_ttu_regs->qos_level_fixed_l = 8; + disp_ttu_regs->qos_level_fixed_c = 8; + disp_ttu_regs->qos_level_fixed_cur0 = 8; + disp_ttu_regs->qos_ramp_disable_l = 0; + disp_ttu_regs->qos_ramp_disable_c = 0; + disp_ttu_regs->qos_ramp_disable_cur0 = 0; + + disp_ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz; + ASSERT(disp_ttu_regs->min_ttu_vblank < dml_pow(2, 24)); + + print__ttu_regs_st(mode_lib, *disp_ttu_regs); + print__dlg_regs_st(mode_lib, *disp_dlg_regs); +} + +void dml20v2_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib, + display_dlg_regs_st *dlg_regs, + display_ttu_regs_st *ttu_regs, + display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + const bool cstate_en, + const bool pstate_en, + const bool vm_en, + const bool ignore_viewport_pos, + const bool immediate_flip_support) +{ + display_rq_params_st rq_param = {0}; + display_dlg_sys_params_st dlg_sys_param = {0}; + + // Get watermark and Tex. + dlg_sys_param.t_urg_wm_us = get_wm_urgent(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.deepsleep_dcfclk_mhz = get_clk_dcf_deepsleep(mode_lib, + e2e_pipe_param, + num_pipes); + dlg_sys_param.t_extra_us = get_urgent_extra_latency(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.mem_trip_us = get_wm_memory_trip(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.t_mclk_wm_us = get_wm_dram_clock_change(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.t_sr_wm_us = get_wm_stutter_enter_exit(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.total_flip_bw = get_total_immediate_flip_bw(mode_lib, + e2e_pipe_param, + num_pipes); + dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib, + e2e_pipe_param, + num_pipes); + dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency + / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated + + print__dlg_sys_params_st(mode_lib, dlg_sys_param); + + // system parameter calculation done + + dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx); + dml20v2_rq_dlg_get_rq_params(mode_lib, &rq_param, e2e_pipe_param[pipe_idx].pipe.src); + dml20v2_rq_dlg_get_dlg_params(mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx, + dlg_regs, + ttu_regs, + rq_param.dlg, + dlg_sys_param, + cstate_en, + pstate_en); + dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx); +} + +static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, + double *refcyc_per_req_delivery_pre_cur, + double *refcyc_per_req_delivery_cur, + double refclk_freq_in_mhz, + double ref_freq_to_pix_freq, + double hscale_pixel_rate_l, + double hscl_ratio, + double vratio_pre_l, + double vratio_l, + unsigned int cur_width, + enum cursor_bpp cur_bpp) +{ + unsigned int cur_src_width = cur_width; + unsigned int cur_req_size = 0; + unsigned int cur_req_width = 0; + double cur_width_ub = 0.0; + double cur_req_per_width = 0.0; + double hactive_cur = 0.0; + + ASSERT(cur_src_width <= 256); + + *refcyc_per_req_delivery_pre_cur = 0.0; + *refcyc_per_req_delivery_cur = 0.0; + if (cur_src_width > 0) { + unsigned int cur_bit_per_pixel = 0; + + if (cur_bpp == dm_cur_2bit) { + cur_req_size = 64; // byte + cur_bit_per_pixel = 2; + } else { // 32bit + cur_bit_per_pixel = 32; + if (cur_src_width >= 1 && cur_src_width <= 16) + cur_req_size = 64; + else if (cur_src_width >= 17 && cur_src_width <= 31) + cur_req_size = 128; + else + cur_req_size = 256; + } + + cur_req_width = (double) cur_req_size / ((double) cur_bit_per_pixel / 8.0); + cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1) + * (double) cur_req_width; + cur_req_per_width = cur_width_ub / (double) cur_req_width; + hactive_cur = (double) cur_src_width / hscl_ratio; // FIXME: oswin to think about what to do for cursor + + if (vratio_pre_l <= 1.0) { + *refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq + / (double) cur_req_per_width; + } else { + *refcyc_per_req_delivery_pre_cur = (double) refclk_freq_in_mhz + * (double) cur_src_width / hscale_pixel_rate_l + / (double) cur_req_per_width; + } + + ASSERT(*refcyc_per_req_delivery_pre_cur < dml_pow(2, 13)); + + if (vratio_l <= 1.0) { + *refcyc_per_req_delivery_cur = hactive_cur * ref_freq_to_pix_freq + / (double) cur_req_per_width; + } else { + *refcyc_per_req_delivery_cur = (double) refclk_freq_in_mhz + * (double) cur_src_width / hscale_pixel_rate_l + / (double) cur_req_per_width; + } + + dml_print("DML_DLG: %s: cur_req_width = %d\n", + __func__, + cur_req_width); + dml_print("DML_DLG: %s: cur_width_ub = %3.2f\n", + __func__, + cur_width_ub); + dml_print("DML_DLG: %s: cur_req_per_width = %3.2f\n", + __func__, + cur_req_per_width); + dml_print("DML_DLG: %s: hactive_cur = %3.2f\n", + __func__, + hactive_cur); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur = %3.2f\n", + __func__, + *refcyc_per_req_delivery_pre_cur); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur = %3.2f\n", + __func__, + *refcyc_per_req_delivery_cur); + + ASSERT(*refcyc_per_req_delivery_cur < dml_pow(2, 13)); + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h new file mode 100644 index 000000000000..0378406bf7e7 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h @@ -0,0 +1,74 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML20V2_DISPLAY_RQ_DLG_CALC_H__ +#define __DML20V2_DISPLAY_RQ_DLG_CALC_H__ + +#include "../dml_common_defs.h" +#include "../display_rq_dlg_helpers.h" + +struct display_mode_lib; + + +// Function: dml_rq_dlg_get_rq_reg +// Main entry point for test to get the register values out of this DML class. +// This function calls <get_rq_param> and <extract_rq_regs> fucntions to calculate +// and then populate the rq_regs struct +// Input: +// pipe_src_param - pipe source configuration (e.g. vp, pitch, etc.) +// Output: +// rq_regs - struct that holds all the RQ registers field value. +// See also: <display_rq_regs_st> +void dml20v2_rq_dlg_get_rq_reg( + struct display_mode_lib *mode_lib, + display_rq_regs_st *rq_regs, + const display_pipe_params_st pipe_param); + + +// Function: dml_rq_dlg_get_dlg_reg +// Calculate and return DLG and TTU register struct given the system setting +// Output: +// dlg_regs - output DLG register struct +// ttu_regs - output DLG TTU register struct +// Input: +// e2e_pipe_param - "compacted" array of e2e pipe param struct +// num_pipes - num of active "pipe" or "route" +// pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg +// cstate - 0: when calculate min_ttu_vblank it is assumed cstate is not required. 1: Normal mode, cstate is considered. +// Added for legacy or unrealistic timing tests. +void dml20v2_rq_dlg_get_dlg_reg( + struct display_mode_lib *mode_lib, + display_dlg_regs_st *dlg_regs, + display_ttu_regs_st *ttu_regs, + display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + const bool cstate_en, + const bool pstate_en, + const bool vm_en, + const bool ignore_viewport_pos, + const bool immediate_flip_support); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c new file mode 100644 index 000000000000..456cd0e3289c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c @@ -0,0 +1,6123 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifdef CONFIG_DRM_AMD_DC_DCN2_0 + +#include "../display_mode_lib.h" +#include "../dml_inline_defs.h" +#include "../display_mode_vba.h" +#include "display_mode_vba_21.h" + + +/* + * NOTE: + * This file is gcc-parsable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + +typedef unsigned int uint; + +typedef struct { + double DPPCLK; + double DISPCLK; + double PixelClock; + double DCFCLKDeepSleep; + unsigned int DPPPerPlane; + bool ScalerEnabled; + enum scan_direction_class SourceScan; + unsigned int BlockWidth256BytesY; + unsigned int BlockHeight256BytesY; + unsigned int BlockWidth256BytesC; + unsigned int BlockHeight256BytesC; + unsigned int InterlaceEnable; + unsigned int NumberOfCursors; + unsigned int VBlank; + unsigned int HTotal; +} Pipe; + +typedef struct { + bool Enable; + unsigned int MaxPageTableLevels; + unsigned int CachedPageTableLevels; +} HostVM; + +#define BPP_INVALID 0 +#define BPP_BLENDED_PIPE 0xffffffff + +static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); +static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( + struct display_mode_lib *mode_lib); +static unsigned int dscceComputeDelay( + unsigned int bpc, + double bpp, + unsigned int sliceWidth, + unsigned int numSlices, + enum output_format_class pixelFormat); +static unsigned int dscComputeDelay(enum output_format_class pixelFormat); +// Super monster function with some 45 argument +static bool CalculatePrefetchSchedule( + struct display_mode_lib *mode_lib, + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, + Pipe *myPipe, + unsigned int DSCDelay, + double DPPCLKDelaySubtotal, + double DPPCLKDelaySCL, + double DPPCLKDelaySCLLBOnly, + double DPPCLKDelayCNVCFormater, + double DPPCLKDelayCNVCCursor, + double DISPCLKDelaySubtotal, + unsigned int ScalerRecoutWidth, + enum output_format_class OutputFormat, + unsigned int MaxInterDCNTileRepeaters, + unsigned int VStartup, + unsigned int MaxVStartup, + unsigned int GPUVMPageTableLevels, + bool GPUVMEnable, + HostVM *myHostVM, + bool DynamicMetadataEnable, + int DynamicMetadataLinesBeforeActiveRequired, + unsigned int DynamicMetadataTransmittedBytes, + bool DCCEnable, + double UrgentLatency, + double UrgentExtraLatency, + double TCalc, + unsigned int PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + double PrefetchSourceLinesY, + unsigned int SwathWidthY, + double BytePerPixelDETY, + double VInitPreFillY, + unsigned int MaxNumSwathY, + double PrefetchSourceLinesC, + double BytePerPixelDETC, + double VInitPreFillC, + unsigned int MaxNumSwathC, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double TWait, + bool XFCEnabled, + double XFCRemoteSurfaceFlipDelay, + bool ProgressiveToInterlaceUnitInOPP, + double *DSTXAfterScaler, + double *DSTYAfterScaler, + double *DestinationLinesForPrefetch, + double *PrefetchBandwidth, + double *DestinationLinesToRequestVMInVBlank, + double *DestinationLinesToRequestRowInVBlank, + double *VRatioPrefetchY, + double *VRatioPrefetchC, + double *RequiredPrefetchPixDataBWLuma, + double *RequiredPrefetchPixDataBWChroma, + unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, + double *Tno_bw, + double *prefetch_vmrow_bw, + unsigned int *swath_width_luma_ub, + unsigned int *swath_width_chroma_ub, + unsigned int *VUpdateOffsetPix, + double *VUpdateWidthPix, + double *VReadyOffsetPix); +static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); +static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); +static double CalculateDCCConfiguration( + bool DCCEnabled, + bool DCCProgrammingAssumesScanDirectionUnknown, + unsigned int ViewportWidth, + unsigned int ViewportHeight, + double DETBufferSize, + unsigned int RequestHeight256Byte, + unsigned int SwathHeight, + enum dm_swizzle_mode TilingFormat, + unsigned int BytePerPixel, + enum scan_direction_class ScanOrientation, + unsigned int *MaxUncompressedBlock, + unsigned int *MaxCompressedBlock, + unsigned int *Independent64ByteBlock); +static double CalculatePrefetchSourceLines( + struct display_mode_lib *mode_lib, + double VRatio, + double vtaps, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + unsigned int SwathHeight, + unsigned int ViewportYStart, + double *VInitPreFill, + unsigned int *MaxNumSwath); +static unsigned int CalculateVMAndRowBytes( + struct display_mode_lib *mode_lib, + bool DCCEnable, + unsigned int BlockHeight256Bytes, + unsigned int BlockWidth256Bytes, + enum source_format_class SourcePixelFormat, + unsigned int SurfaceTiling, + unsigned int BytePerPixel, + enum scan_direction_class ScanDirection, + unsigned int ViewportWidth, + unsigned int ViewportHeight, + unsigned int SwathWidthY, + bool GPUVMEnable, + bool HostVMEnable, + unsigned int HostVMMaxPageTableLevels, + unsigned int HostVMCachedPageTableLevels, + unsigned int VMMPageSize, + unsigned int PTEBufferSizeInRequests, + unsigned int Pitch, + unsigned int DCCMetaPitch, + unsigned int *MacroTileWidth, + unsigned int *MetaRowByte, + unsigned int *PixelPTEBytesPerRow, + bool *PTEBufferSizeNotExceeded, + unsigned int *dpte_row_width_ub, + unsigned int *dpte_row_height, + unsigned int *MetaRequestWidth, + unsigned int *MetaRequestHeight, + unsigned int *meta_row_width, + unsigned int *meta_row_height, + unsigned int *vm_group_bytes, + long *dpte_group_bytes, + unsigned int *PixelPTEReqWidth, + unsigned int *PixelPTEReqHeight, + unsigned int *PTERequestSize, + unsigned int *DPDE0BytesFrame, + unsigned int *MetaPTEBytesFrame); + +static double CalculateTWait( + unsigned int PrefetchMode, + double DRAMClockChangeLatency, + double UrgentLatency, + double SREnterPlusExitTime); +static double CalculateRemoteSurfaceFlipDelay( + struct display_mode_lib *mode_lib, + double VRatio, + double SwathWidth, + double Bpp, + double LineTime, + double XFCTSlvVupdateOffset, + double XFCTSlvVupdateWidth, + double XFCTSlvVreadyOffset, + double XFCXBUFLatencyTolerance, + double XFCFillBWOverhead, + double XFCSlvChunkSize, + double XFCBusTransportTime, + double TCalc, + double TWait, + double *SrcActiveDrainRate, + double *TInitXFill, + double *TslvChk); +static void CalculateActiveRowBandwidth( + bool GPUVMEnable, + enum source_format_class SourcePixelFormat, + double VRatio, + bool DCCEnable, + double LineTime, + unsigned int MetaRowByteLuma, + unsigned int MetaRowByteChroma, + unsigned int meta_row_height_luma, + unsigned int meta_row_height_chroma, + unsigned int PixelPTEBytesPerRowLuma, + unsigned int PixelPTEBytesPerRowChroma, + unsigned int dpte_row_height_luma, + unsigned int dpte_row_height_chroma, + double *meta_row_bw, + double *dpte_row_bw); +static void CalculateFlipSchedule( + struct display_mode_lib *mode_lib, + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, + double UrgentExtraLatency, + double UrgentLatency, + unsigned int GPUVMMaxPageTableLevels, + bool HostVMEnable, + unsigned int HostVMMaxPageTableLevels, + unsigned int HostVMCachedPageTableLevels, + bool GPUVMEnable, + double PDEAndMetaPTEBytesPerFrame, + double MetaRowBytes, + double DPTEBytesPerRow, + double BandwidthAvailableForImmediateFlip, + unsigned int TotImmediateFlipBytes, + enum source_format_class SourcePixelFormat, + double LineTime, + double VRatio, + double Tno_bw, + bool DCCEnable, + unsigned int dpte_row_height, + unsigned int meta_row_height, + unsigned int dpte_row_height_chroma, + unsigned int meta_row_height_chroma, + double *DestinationLinesToRequestVMInImmediateFlip, + double *DestinationLinesToRequestRowInImmediateFlip, + double *final_flip_bw, + bool *ImmediateFlipSupportedForPipe); +static double CalculateWriteBackDelay( + enum source_format_class WritebackPixelFormat, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackLumaHTaps, + unsigned int WritebackLumaVTaps, + unsigned int WritebackChromaHTaps, + unsigned int WritebackChromaVTaps, + unsigned int WritebackDestinationWidth); +static void CalculateWatermarksAndDRAMSpeedChangeSupport( + struct display_mode_lib *mode_lib, + unsigned int PrefetchMode, + unsigned int NumberOfActivePlanes, + unsigned int MaxLineBufferLines, + unsigned int LineBufferSize, + unsigned int DPPOutputBufferPixels, + double DETBufferSizeInKByte, + unsigned int WritebackInterfaceLumaBufferSize, + unsigned int WritebackInterfaceChromaBufferSize, + double DCFCLK, + double UrgentOutOfOrderReturn, + double ReturnBW, + bool GPUVMEnable, + long dpte_group_bytes[], + unsigned int MetaChunkSize, + double UrgentLatency, + double ExtraLatency, + double WritebackLatency, + double WritebackChunkSize, + double SOCCLK, + double DRAMClockChangeLatency, + double SRExitTime, + double SREnterPlusExitTime, + double DCFCLKDeepSleep, + int DPPPerPlane[], + bool DCCEnable[], + double DPPCLK[], + unsigned int SwathWidthSingleDPPY[], + unsigned int SwathHeightY[], + double ReadBandwidthPlaneLuma[], + unsigned int SwathHeightC[], + double ReadBandwidthPlaneChroma[], + unsigned int LBBitPerPixel[], + unsigned int SwathWidthY[], + double HRatio[], + unsigned int vtaps[], + unsigned int VTAPsChroma[], + double VRatio[], + unsigned int HTotal[], + double PixelClock[], + unsigned int BlendingAndTiming[], + double BytePerPixelDETY[], + double BytePerPixelDETC[], + bool WritebackEnable[], + enum source_format_class WritebackPixelFormat[], + double WritebackDestinationWidth[], + double WritebackDestinationHeight[], + double WritebackSourceHeight[], + enum clock_change_support *DRAMClockChangeSupport, + double *UrgentWatermark, + double *WritebackUrgentWatermark, + double *DRAMClockChangeWatermark, + double *WritebackDRAMClockChangeWatermark, + double *StutterExitWatermark, + double *StutterEnterPlusExitWatermark, + double *MinActiveDRAMClockChangeLatencySupported); +static void CalculateDCFCLKDeepSleep( + struct display_mode_lib *mode_lib, + unsigned int NumberOfActivePlanes, + double BytePerPixelDETY[], + double BytePerPixelDETC[], + double VRatio[], + unsigned int SwathWidthY[], + int DPPPerPlane[], + double HRatio[], + double PixelClock[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double DPPCLK[], + double *DCFCLKDeepSleep); +static void CalculateDETBufferSize( + double DETBufferSizeInKByte, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double *DETBufferSizeY, + double *DETBufferSizeC); +static void CalculateUrgentBurstFactor( + unsigned int DETBufferSizeInKByte, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + unsigned int SwathWidthY, + double LineTime, + double UrgentLatency, + double CursorBufferSize, + unsigned int CursorWidth, + unsigned int CursorBPP, + double VRatio, + double VRatioPreY, + double VRatioPreC, + double BytePerPixelInDETY, + double BytePerPixelInDETC, + double *UrgentBurstFactorCursor, + double *UrgentBurstFactorCursorPre, + double *UrgentBurstFactorLuma, + double *UrgentBurstFactorLumaPre, + double *UrgentBurstFactorChroma, + double *UrgentBurstFactorChromaPre, + unsigned int *NotEnoughUrgentLatencyHiding, + unsigned int *NotEnoughUrgentLatencyHidingPre); + +static void CalculatePixelDeliveryTimes( + unsigned int NumberOfActivePlanes, + double VRatio[], + double VRatioPrefetchY[], + double VRatioPrefetchC[], + unsigned int swath_width_luma_ub[], + unsigned int swath_width_chroma_ub[], + int DPPPerPlane[], + double HRatio[], + double PixelClock[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double DPPCLK[], + double BytePerPixelDETC[], + enum scan_direction_class SourceScan[], + unsigned int BlockWidth256BytesY[], + unsigned int BlockHeight256BytesY[], + unsigned int BlockWidth256BytesC[], + unsigned int BlockHeight256BytesC[], + double DisplayPipeLineDeliveryTimeLuma[], + double DisplayPipeLineDeliveryTimeChroma[], + double DisplayPipeLineDeliveryTimeLumaPrefetch[], + double DisplayPipeLineDeliveryTimeChromaPrefetch[], + double DisplayPipeRequestDeliveryTimeLuma[], + double DisplayPipeRequestDeliveryTimeChroma[], + double DisplayPipeRequestDeliveryTimeLumaPrefetch[], + double DisplayPipeRequestDeliveryTimeChromaPrefetch[]); + +static void CalculateMetaAndPTETimes( + unsigned int NumberOfActivePlanes, + bool GPUVMEnable, + unsigned int MetaChunkSize, + unsigned int MinMetaChunkSizeBytes, + unsigned int GPUVMMaxPageTableLevels, + unsigned int HTotal[], + double VRatio[], + double VRatioPrefetchY[], + double VRatioPrefetchC[], + double DestinationLinesToRequestRowInVBlank[], + double DestinationLinesToRequestRowInImmediateFlip[], + double DestinationLinesToRequestVMInVBlank[], + double DestinationLinesToRequestVMInImmediateFlip[], + bool DCCEnable[], + double PixelClock[], + double BytePerPixelDETY[], + double BytePerPixelDETC[], + enum scan_direction_class SourceScan[], + unsigned int dpte_row_height[], + unsigned int dpte_row_height_chroma[], + unsigned int meta_row_width[], + unsigned int meta_row_height[], + unsigned int meta_req_width[], + unsigned int meta_req_height[], + long dpte_group_bytes[], + unsigned int PTERequestSizeY[], + unsigned int PTERequestSizeC[], + unsigned int PixelPTEReqWidthY[], + unsigned int PixelPTEReqHeightY[], + unsigned int PixelPTEReqWidthC[], + unsigned int PixelPTEReqHeightC[], + unsigned int dpte_row_width_luma_ub[], + unsigned int dpte_row_width_chroma_ub[], + unsigned int vm_group_bytes[], + unsigned int dpde0_bytes_per_frame_ub_l[], + unsigned int dpde0_bytes_per_frame_ub_c[], + unsigned int meta_pte_bytes_per_frame_ub_l[], + unsigned int meta_pte_bytes_per_frame_ub_c[], + double DST_Y_PER_PTE_ROW_NOM_L[], + double DST_Y_PER_PTE_ROW_NOM_C[], + double DST_Y_PER_META_ROW_NOM_L[], + double TimePerMetaChunkNominal[], + double TimePerMetaChunkVBlank[], + double TimePerMetaChunkFlip[], + double time_per_pte_group_nom_luma[], + double time_per_pte_group_vblank_luma[], + double time_per_pte_group_flip_luma[], + double time_per_pte_group_nom_chroma[], + double time_per_pte_group_vblank_chroma[], + double time_per_pte_group_flip_chroma[], + double TimePerVMGroupVBlank[], + double TimePerVMGroupFlip[], + double TimePerVMRequestVBlank[], + double TimePerVMRequestFlip[]); + +static double CalculateExtraLatency( + double UrgentRoundTripAndOutOfOrderLatency, + int TotalNumberOfActiveDPP, + int PixelChunkSizeInKByte, + int TotalNumberOfDCCActiveDPP, + int MetaChunkSize, + double ReturnBW, + bool GPUVMEnable, + bool HostVMEnable, + int NumberOfActivePlanes, + int NumberOfDPP[], + long dpte_group_bytes[], + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, + int HostVMMaxPageTableLevels, + int HostVMCachedPageTableLevels); + +void dml21_recalculate(struct display_mode_lib *mode_lib) +{ + ModeSupportAndSystemConfiguration(mode_lib); + PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); + DisplayPipeConfiguration(mode_lib); + DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); +} + +static unsigned int dscceComputeDelay( + unsigned int bpc, + double bpp, + unsigned int sliceWidth, + unsigned int numSlices, + enum output_format_class pixelFormat) +{ + // valid bpc = source bits per component in the set of {8, 10, 12} + // valid bpp = increments of 1/16 of a bit + // min = 6/7/8 in N420/N422/444, respectively + // max = such that compression is 1:1 + //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) + //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} + //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} + + // fixed value + unsigned int rcModelSize = 8192; + + // N422/N420 operate at 2 pixels per clock + unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, S, ix, wx, p, l0, a, ax, l, + Delay, pixels; + + if (pixelFormat == dm_n422 || pixelFormat == dm_420) + pixelsPerClock = 2; + // #all other modes operate at 1 pixel per clock + else + pixelsPerClock = 1; + + //initial transmit delay as per PPS + initalXmitDelay = dml_round(rcModelSize / 2.0 / bpp / pixelsPerClock); + + //compute ssm delay + if (bpc == 8) + D = 81; + else if (bpc == 10) + D = 89; + else + D = 113; + + //divide by pixel per cycle to compute slice width as seen by DSC + w = sliceWidth / pixelsPerClock; + + //422 mode has an additional cycle of delay + if (pixelFormat == dm_s422) + S = 1; + else + S = 0; + + //main calculation for the dscce + ix = initalXmitDelay + 45; + wx = (w + 2) / 3; + p = 3 * wx - w; + l0 = ix / w; + a = ix + p * l0; + ax = (a + 2) / 3 + D + 6 + 1; + l = (ax + wx - 1) / wx; + if ((ix % w) == 0 && p != 0) + lstall = 1; + else + lstall = 0; + Delay = l * wx * (numSlices - 1) + ax + S + lstall + 22; + + //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels + pixels = Delay * 3 * pixelsPerClock; + return pixels; +} + +static unsigned int dscComputeDelay(enum output_format_class pixelFormat) +{ + unsigned int Delay = 0; + + if (pixelFormat == dm_420) { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 0; + // dscc - input deserializer + Delay = Delay + 3; + // dscc gets pixels every other cycle + Delay = Delay + 2; + // dscc - input cdc fifo + Delay = Delay + 12; + // dscc gets pixels every other cycle + Delay = Delay + 13; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 7; + // dscc gets pixels every other cycle + Delay = Delay + 3; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output serializer + Delay = Delay + 1; + // sft + Delay = Delay + 1; + } else if (pixelFormat == dm_n422) { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 1; + // dscc - input deserializer + Delay = Delay + 5; + // dscc - input cdc fifo + Delay = Delay + 25; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 10; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output serializer + Delay = Delay + 1; + // sft + Delay = Delay + 1; + } else { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 0; + // dscc - input deserializer + Delay = Delay + 3; + // dscc - input cdc fifo + Delay = Delay + 12; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 7; + // dscc - output serializer + Delay = Delay + 1; + // dscc - cdc uncertainty + Delay = Delay + 2; + // sft + Delay = Delay + 1; + } + + return Delay; +} + +static bool CalculatePrefetchSchedule( + struct display_mode_lib *mode_lib, + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, + Pipe *myPipe, + unsigned int DSCDelay, + double DPPCLKDelaySubtotal, + double DPPCLKDelaySCL, + double DPPCLKDelaySCLLBOnly, + double DPPCLKDelayCNVCFormater, + double DPPCLKDelayCNVCCursor, + double DISPCLKDelaySubtotal, + unsigned int ScalerRecoutWidth, + enum output_format_class OutputFormat, + unsigned int MaxInterDCNTileRepeaters, + unsigned int VStartup, + unsigned int MaxVStartup, + unsigned int GPUVMPageTableLevels, + bool GPUVMEnable, + HostVM *myHostVM, + bool DynamicMetadataEnable, + int DynamicMetadataLinesBeforeActiveRequired, + unsigned int DynamicMetadataTransmittedBytes, + bool DCCEnable, + double UrgentLatency, + double UrgentExtraLatency, + double TCalc, + unsigned int PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + double PrefetchSourceLinesY, + unsigned int SwathWidthY, + double BytePerPixelDETY, + double VInitPreFillY, + unsigned int MaxNumSwathY, + double PrefetchSourceLinesC, + double BytePerPixelDETC, + double VInitPreFillC, + unsigned int MaxNumSwathC, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double TWait, + bool XFCEnabled, + double XFCRemoteSurfaceFlipDelay, + bool ProgressiveToInterlaceUnitInOPP, + double *DSTXAfterScaler, + double *DSTYAfterScaler, + double *DestinationLinesForPrefetch, + double *PrefetchBandwidth, + double *DestinationLinesToRequestVMInVBlank, + double *DestinationLinesToRequestRowInVBlank, + double *VRatioPrefetchY, + double *VRatioPrefetchC, + double *RequiredPrefetchPixDataBWLuma, + double *RequiredPrefetchPixDataBWChroma, + unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, + double *Tno_bw, + double *prefetch_vmrow_bw, + unsigned int *swath_width_luma_ub, + unsigned int *swath_width_chroma_ub, + unsigned int *VUpdateOffsetPix, + double *VUpdateWidthPix, + double *VReadyOffsetPix) +{ + bool MyError = false; + unsigned int DPPCycles, DISPCLKCycles; + double DSTTotalPixelsAfterScaler, TotalRepeaterDelayTime; + double Tdm, LineTime, Tsetup; + double dst_y_prefetch_equ; + double Tsw_oto; + double prefetch_bw_oto; + double Tvm_oto; + double Tr0_oto; + double Tvm_oto_lines; + double Tr0_oto_lines; + double Tsw_oto_lines; + double dst_y_prefetch_oto; + double TimeForFetchingMetaPTE = 0; + double TimeForFetchingRowInVBlank = 0; + double LinesToRequestPrefetchPixelData = 0; + double HostVMInefficiencyFactor; + unsigned int HostVMDynamicLevels; + + if (GPUVMEnable == true && myHostVM->Enable == true) { + HostVMInefficiencyFactor = + PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData + / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly; + HostVMDynamicLevels = myHostVM->MaxPageTableLevels + - myHostVM->CachedPageTableLevels; + } else { + HostVMInefficiencyFactor = 1; + HostVMDynamicLevels = 0; + } + + if (myPipe->ScalerEnabled) + DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCL; + else + DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCLLBOnly; + + DPPCycles = DPPCycles + DPPCLKDelayCNVCFormater + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; + + DISPCLKCycles = DISPCLKDelaySubtotal; + + if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0) + return true; + + *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay; + + if (myPipe->DPPPerPlane > 1) + *DSTXAfterScaler = *DSTXAfterScaler + ScalerRecoutWidth; + + if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP)) + *DSTYAfterScaler = 1; + else + *DSTYAfterScaler = 0; + + DSTTotalPixelsAfterScaler = ((double) (*DSTYAfterScaler * myPipe->HTotal)) + *DSTXAfterScaler; + *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); + *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); + + *VUpdateOffsetPix = dml_ceil(myPipe->HTotal / 4.0, 1); + TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2.0 / myPipe->DPPCLK + 3.0 / myPipe->DISPCLK); + *VUpdateWidthPix = (14.0 / myPipe->DCFCLKDeepSleep + 12.0 / myPipe->DPPCLK + TotalRepeaterDelayTime) + * myPipe->PixelClock; + + *VReadyOffsetPix = dml_max( + 150.0 / myPipe->DPPCLK, + TotalRepeaterDelayTime + 20.0 / myPipe->DCFCLKDeepSleep + 10.0 / myPipe->DPPCLK) + * myPipe->PixelClock; + + Tsetup = (double) (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / myPipe->PixelClock; + + LineTime = (double) myPipe->HTotal / myPipe->PixelClock; + + if (DynamicMetadataEnable) { + double Tdmbf, Tdmec, Tdmsks; + + Tdm = dml_max(0.0, UrgentExtraLatency - TCalc); + Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / myPipe->DISPCLK; + Tdmec = LineTime; + if (DynamicMetadataLinesBeforeActiveRequired == -1) + Tdmsks = myPipe->VBlank * LineTime / 2.0; + else + Tdmsks = DynamicMetadataLinesBeforeActiveRequired * LineTime; + if (myPipe->InterlaceEnable && !ProgressiveToInterlaceUnitInOPP) + Tdmsks = Tdmsks / 2; + if (VStartup * LineTime + < Tsetup + TWait + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) { + MyError = true; + *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = (Tsetup + TWait + + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) / LineTime; + } else + *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = 0.0; + } else + Tdm = 0; + + if (GPUVMEnable) { + if (GPUVMPageTableLevels >= 3) + *Tno_bw = UrgentExtraLatency + UrgentLatency * ((GPUVMPageTableLevels - 2) * (myHostVM->MaxPageTableLevels + 1) - 1); + else + *Tno_bw = 0; + } else if (!DCCEnable) + *Tno_bw = LineTime; + else + *Tno_bw = LineTime / 4; + + dst_y_prefetch_equ = VStartup - dml_max(TCalc + TWait, XFCRemoteSurfaceFlipDelay) / LineTime + - (Tsetup + Tdm) / LineTime + - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); + + Tsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; + + if (myPipe->SourceScan == dm_horz) { + *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockWidth256BytesY) + myPipe->BlockWidth256BytesY; + *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockWidth256BytesC) + myPipe->BlockWidth256BytesC; + } else { + *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockHeight256BytesY) + myPipe->BlockHeight256BytesY; + *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockHeight256BytesC) + myPipe->BlockHeight256BytesC; + } + + prefetch_bw_oto = (PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) / Tsw_oto; + + + if (GPUVMEnable == true) { + Tvm_oto = dml_max(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, + dml_max(UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1), + LineTime / 4.0)); + } else + Tvm_oto = LineTime / 4.0; + + if ((GPUVMEnable == true || DCCEnable == true)) { + Tr0_oto = dml_max( + (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, + dml_max(UrgentLatency * (HostVMDynamicLevels + 1), dml_max(LineTime - Tvm_oto, LineTime / 4))); + } else + Tr0_oto = (LineTime - Tvm_oto) / 2.0; + + Tvm_oto_lines = dml_ceil(4 * Tvm_oto / LineTime, 1) / 4.0; + Tr0_oto_lines = dml_ceil(4 * Tr0_oto / LineTime, 1) / 4.0; + Tsw_oto_lines = dml_ceil(4 * Tsw_oto / LineTime, 1) / 4.0; + dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Tsw_oto_lines + 0.75; + + dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; + + if (dst_y_prefetch_oto < dst_y_prefetch_equ) + *DestinationLinesForPrefetch = dst_y_prefetch_oto; + else + *DestinationLinesForPrefetch = dst_y_prefetch_equ; + + dml_print("DML: VStartup: %d\n", VStartup); + dml_print("DML: TCalc: %f\n", TCalc); + dml_print("DML: TWait: %f\n", TWait); + dml_print("DML: XFCRemoteSurfaceFlipDelay: %f\n", XFCRemoteSurfaceFlipDelay); + dml_print("DML: LineTime: %f\n", LineTime); + dml_print("DML: Tsetup: %f\n", Tsetup); + dml_print("DML: Tdm: %f\n", Tdm); + dml_print("DML: DSTYAfterScaler: %f\n", *DSTYAfterScaler); + dml_print("DML: DSTXAfterScaler: %f\n", *DSTXAfterScaler); + dml_print("DML: HTotal: %d\n", myPipe->HTotal); + + *PrefetchBandwidth = 0; + *DestinationLinesToRequestVMInVBlank = 0; + *DestinationLinesToRequestRowInVBlank = 0; + *VRatioPrefetchY = 0; + *VRatioPrefetchC = 0; + *RequiredPrefetchPixDataBWLuma = 0; + if (*DestinationLinesForPrefetch > 1) { + double PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor + + PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) + / (*DestinationLinesForPrefetch * LineTime - *Tno_bw); + + double PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * + HostVMInefficiencyFactor + PrefetchSourceLinesY * + *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + + PrefetchSourceLinesC * *swath_width_chroma_ub * + dml_ceil(BytePerPixelDETC, 2)) / + (*DestinationLinesForPrefetch * LineTime - *Tno_bw - 2 * + UrgentLatency * (1 + HostVMDynamicLevels)); + + double PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow + * HostVMInefficiencyFactor + PrefetchSourceLinesY * + *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + + PrefetchSourceLinesC * *swath_width_chroma_ub * + dml_ceil(BytePerPixelDETC, 2)) / + (*DestinationLinesForPrefetch * LineTime - + UrgentExtraLatency - UrgentLatency * (GPUVMPageTableLevels + * (HostVMDynamicLevels + 1) - 1)); + + double PrefetchBandwidth4 = (PrefetchSourceLinesY * *swath_width_luma_ub * + dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC * + *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) / + (*DestinationLinesForPrefetch * LineTime - + UrgentExtraLatency - UrgentLatency * (GPUVMPageTableLevels + * (HostVMDynamicLevels + 1) - 1) - 2 * UrgentLatency * + (1 + HostVMDynamicLevels)); + + if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (*DestinationLinesForPrefetch - dml_ceil(Tsw_oto_lines, 1) / 4.0 - 0.75) * LineTime - *Tno_bw > 0) { + PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / ((*DestinationLinesForPrefetch - dml_ceil(Tsw_oto_lines, 1) / 4.0 - 0.75) * LineTime - *Tno_bw); + } + if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= UrgentLatency * (1 + HostVMDynamicLevels)) { + *PrefetchBandwidth = PrefetchBandwidth1; + } else if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < UrgentLatency * (1 + HostVMDynamicLevels)) { + *PrefetchBandwidth = PrefetchBandwidth2; + } else if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= UrgentLatency * (1 + HostVMDynamicLevels)) { + *PrefetchBandwidth = PrefetchBandwidth3; + } else { + *PrefetchBandwidth = PrefetchBandwidth4; + } + + if (GPUVMEnable) { + TimeForFetchingMetaPTE = dml_max(*Tno_bw + (double) PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / *PrefetchBandwidth, + dml_max(UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1), LineTime / 4)); + } else { +// 5/30/2018 - This was an optimization requested from Sy but now NumberOfCursors is no longer a factor +// so if this needs to be reinstated, then it should be officially done in the VBA code as well. +// if (mode_lib->NumberOfCursors > 0 || XFCEnabled) + TimeForFetchingMetaPTE = LineTime / 4; +// else +// TimeForFetchingMetaPTE = 0.0; + } + + if ((GPUVMEnable == true || DCCEnable == true)) { + TimeForFetchingRowInVBlank = + dml_max( + (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) + / *PrefetchBandwidth, + dml_max( + UrgentLatency * (1 + HostVMDynamicLevels), + dml_max( + (LineTime + - TimeForFetchingMetaPTE) / 2.0, + LineTime + / 4.0))); + } else { +// See note above dated 5/30/2018 +// if (NumberOfCursors > 0 || XFCEnabled) + TimeForFetchingRowInVBlank = (LineTime - TimeForFetchingMetaPTE) / 2.0; +// else // TODO: Did someone else add this?? +// TimeForFetchingRowInVBlank = 0.0; + } + + *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; + + *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; + + LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch +// See note above dated 5/30/2018 +// - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ? + - ((GPUVMEnable || DCCEnable) ? + (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : + 0.0); // TODO: Did someone else add this?? + + if (LinesToRequestPrefetchPixelData > 0) { + + *VRatioPrefetchY = (double) PrefetchSourceLinesY + / LinesToRequestPrefetchPixelData; + *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); + if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { + if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { + *VRatioPrefetchY = + dml_max( + (double) PrefetchSourceLinesY + / LinesToRequestPrefetchPixelData, + (double) MaxNumSwathY + * SwathHeightY + / (LinesToRequestPrefetchPixelData + - (VInitPreFillY + - 3.0) + / 2.0)); + *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); + } else { + MyError = true; + *VRatioPrefetchY = 0; + } + } + + *VRatioPrefetchC = (double) PrefetchSourceLinesC + / LinesToRequestPrefetchPixelData; + *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); + + if ((SwathHeightC > 4)) { + if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { + *VRatioPrefetchC = + dml_max( + *VRatioPrefetchC, + (double) MaxNumSwathC + * SwathHeightC + / (LinesToRequestPrefetchPixelData + - (VInitPreFillC + - 3.0) + / 2.0)); + *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); + } else { + MyError = true; + *VRatioPrefetchC = 0; + } + } + + *RequiredPrefetchPixDataBWLuma = myPipe->DPPPerPlane + * (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData + * dml_ceil(BytePerPixelDETY, 1) + * *swath_width_luma_ub / LineTime; + *RequiredPrefetchPixDataBWChroma = myPipe->DPPPerPlane + * (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData + * dml_ceil(BytePerPixelDETC, 2) + * *swath_width_chroma_ub / LineTime; + } else { + MyError = true; + *VRatioPrefetchY = 0; + *VRatioPrefetchC = 0; + *RequiredPrefetchPixDataBWLuma = 0; + *RequiredPrefetchPixDataBWChroma = 0; + } + + dml_print("DML: Tvm: %fus\n", TimeForFetchingMetaPTE); + dml_print("DML: Tr0: %fus\n", TimeForFetchingRowInVBlank); + dml_print("DML: Tsw: %fus\n", (double)(*DestinationLinesForPrefetch) * LineTime - TimeForFetchingMetaPTE - TimeForFetchingRowInVBlank); + dml_print("DML: Tpre: %fus\n", (double)(*DestinationLinesForPrefetch) * LineTime); + dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); + + } else { + MyError = true; + } + + { + double prefetch_vm_bw; + double prefetch_row_bw; + + if (PDEAndMetaPTEBytesFrame == 0) { + prefetch_vm_bw = 0; + } else if (*DestinationLinesToRequestVMInVBlank > 0) { + prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime); + } else { + prefetch_vm_bw = 0; + MyError = true; + } + if (MetaRowByte + PixelPTEBytesPerRow == 0) { + prefetch_row_bw = 0; + } else if (*DestinationLinesToRequestRowInVBlank > 0) { + prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime); + } else { + prefetch_row_bw = 0; + MyError = true; + } + + *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); + } + + if (MyError) { + *PrefetchBandwidth = 0; + TimeForFetchingMetaPTE = 0; + TimeForFetchingRowInVBlank = 0; + *DestinationLinesToRequestVMInVBlank = 0; + *DestinationLinesToRequestRowInVBlank = 0; + *DestinationLinesForPrefetch = 0; + LinesToRequestPrefetchPixelData = 0; + *VRatioPrefetchY = 0; + *VRatioPrefetchC = 0; + *RequiredPrefetchPixDataBWLuma = 0; + *RequiredPrefetchPixDataBWChroma = 0; + } + + return MyError; +} + +static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) +{ + return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); +} + +static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) +{ + return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4 / Clock, 1); +} + +static double CalculateDCCConfiguration( + bool DCCEnabled, + bool DCCProgrammingAssumesScanDirectionUnknown, + unsigned int ViewportWidth, + unsigned int ViewportHeight, + double DETBufferSize, + unsigned int RequestHeight256Byte, + unsigned int SwathHeight, + enum dm_swizzle_mode TilingFormat, + unsigned int BytePerPixel, + enum scan_direction_class ScanOrientation, + unsigned int *MaxUncompressedBlock, + unsigned int *MaxCompressedBlock, + unsigned int *Independent64ByteBlock) +{ + double MaximumDCCCompressionSurface = 0.0; + enum { + REQ_256Bytes, + REQ_128BytesNonContiguous, + REQ_128BytesContiguous, + REQ_NA + } Request = REQ_NA; + + if (DCCEnabled == true) { + if (DCCProgrammingAssumesScanDirectionUnknown == true) { + if (DETBufferSize >= RequestHeight256Byte * ViewportWidth * BytePerPixel + && DETBufferSize + >= 256 / RequestHeight256Byte + * ViewportHeight) { + Request = REQ_256Bytes; + } else if ((DETBufferSize + < RequestHeight256Byte * ViewportWidth * BytePerPixel + && (BytePerPixel == 2 || BytePerPixel == 4)) + || (DETBufferSize + < 256 / RequestHeight256Byte + * ViewportHeight + && BytePerPixel == 8 + && (TilingFormat == dm_sw_4kb_d + || TilingFormat + == dm_sw_4kb_d_x + || TilingFormat + == dm_sw_var_d + || TilingFormat + == dm_sw_var_d_x + || TilingFormat + == dm_sw_64kb_d + || TilingFormat + == dm_sw_64kb_d_x + || TilingFormat + == dm_sw_64kb_d_t + || TilingFormat + == dm_sw_64kb_r_x))) { + Request = REQ_128BytesNonContiguous; + } else { + Request = REQ_128BytesContiguous; + } + } else { + if (BytePerPixel == 1) { + if (ScanOrientation == dm_vert || SwathHeight == 16) { + Request = REQ_256Bytes; + } else { + Request = REQ_128BytesContiguous; + } + } else if (BytePerPixel == 2) { + if ((ScanOrientation == dm_vert && SwathHeight == 16) || (ScanOrientation != dm_vert && SwathHeight == 8)) { + Request = REQ_256Bytes; + } else if (ScanOrientation == dm_vert) { + Request = REQ_128BytesContiguous; + } else { + Request = REQ_128BytesNonContiguous; + } + } else if (BytePerPixel == 4) { + if (SwathHeight == 8) { + Request = REQ_256Bytes; + } else if (ScanOrientation == dm_vert) { + Request = REQ_128BytesContiguous; + } else { + Request = REQ_128BytesNonContiguous; + } + } else if (BytePerPixel == 8) { + if (TilingFormat == dm_sw_4kb_d || TilingFormat == dm_sw_4kb_d_x + || TilingFormat == dm_sw_var_d + || TilingFormat == dm_sw_var_d_x + || TilingFormat == dm_sw_64kb_d + || TilingFormat == dm_sw_64kb_d_x + || TilingFormat == dm_sw_64kb_d_t + || TilingFormat == dm_sw_64kb_r_x) { + if ((ScanOrientation == dm_vert && SwathHeight == 8) + || (ScanOrientation != dm_vert + && SwathHeight == 4)) { + Request = REQ_256Bytes; + } else if (ScanOrientation != dm_vert) { + Request = REQ_128BytesContiguous; + } else { + Request = REQ_128BytesNonContiguous; + } + } else { + if (ScanOrientation != dm_vert || SwathHeight == 8) { + Request = REQ_256Bytes; + } else { + Request = REQ_128BytesContiguous; + } + } + } + } + } else { + Request = REQ_NA; + } + + if (Request == REQ_256Bytes) { + *MaxUncompressedBlock = 256; + *MaxCompressedBlock = 256; + *Independent64ByteBlock = false; + MaximumDCCCompressionSurface = 4.0; + } else if (Request == REQ_128BytesContiguous) { + *MaxUncompressedBlock = 128; + *MaxCompressedBlock = 128; + *Independent64ByteBlock = false; + MaximumDCCCompressionSurface = 2.0; + } else if (Request == REQ_128BytesNonContiguous) { + *MaxUncompressedBlock = 256; + *MaxCompressedBlock = 64; + *Independent64ByteBlock = true; + MaximumDCCCompressionSurface = 4.0; + } else { + *MaxUncompressedBlock = 0; + *MaxCompressedBlock = 0; + *Independent64ByteBlock = 0; + MaximumDCCCompressionSurface = 0.0; + } + + return MaximumDCCCompressionSurface; +} + +static double CalculatePrefetchSourceLines( + struct display_mode_lib *mode_lib, + double VRatio, + double vtaps, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + unsigned int SwathHeight, + unsigned int ViewportYStart, + double *VInitPreFill, + unsigned int *MaxNumSwath) +{ + unsigned int MaxPartialSwath; + + if (ProgressiveToInterlaceUnitInOPP) + *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); + else + *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); + + if (!mode_lib->vba.IgnoreViewportPositioning) { + + *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; + + if (*VInitPreFill > 1.0) + MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; + else + MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) + % SwathHeight; + MaxPartialSwath = dml_max(1U, MaxPartialSwath); + + } else { + + if (ViewportYStart != 0) + dml_print( + "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); + + *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); + + if (*VInitPreFill > 1.0) + MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; + else + MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) + % SwathHeight; + } + + return *MaxNumSwath * SwathHeight + MaxPartialSwath; +} + +static unsigned int CalculateVMAndRowBytes( + struct display_mode_lib *mode_lib, + bool DCCEnable, + unsigned int BlockHeight256Bytes, + unsigned int BlockWidth256Bytes, + enum source_format_class SourcePixelFormat, + unsigned int SurfaceTiling, + unsigned int BytePerPixel, + enum scan_direction_class ScanDirection, + unsigned int ViewportWidth, + unsigned int ViewportHeight, + unsigned int SwathWidth, + bool GPUVMEnable, + bool HostVMEnable, + unsigned int HostVMMaxPageTableLevels, + unsigned int HostVMCachedPageTableLevels, + unsigned int VMMPageSize, + unsigned int PTEBufferSizeInRequests, + unsigned int Pitch, + unsigned int DCCMetaPitch, + unsigned int *MacroTileWidth, + unsigned int *MetaRowByte, + unsigned int *PixelPTEBytesPerRow, + bool *PTEBufferSizeNotExceeded, + unsigned int *dpte_row_width_ub, + unsigned int *dpte_row_height, + unsigned int *MetaRequestWidth, + unsigned int *MetaRequestHeight, + unsigned int *meta_row_width, + unsigned int *meta_row_height, + unsigned int *vm_group_bytes, + long *dpte_group_bytes, + unsigned int *PixelPTEReqWidth, + unsigned int *PixelPTEReqHeight, + unsigned int *PTERequestSize, + unsigned int *DPDE0BytesFrame, + unsigned int *MetaPTEBytesFrame) +{ + unsigned int MPDEBytesFrame; + unsigned int DCCMetaSurfaceBytes; + unsigned int MacroTileSizeBytes; + unsigned int MacroTileHeight; + unsigned int ExtraDPDEBytesFrame; + unsigned int PDEAndMetaPTEBytesFrame; + unsigned int PixelPTEReqHeightPTEs; + + if (DCCEnable == true) { + *MetaRequestHeight = 8 * BlockHeight256Bytes; + *MetaRequestWidth = 8 * BlockWidth256Bytes; + if (ScanDirection == dm_horz) { + *meta_row_height = *MetaRequestHeight; + *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + + *MetaRequestWidth; + *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; + } else { + *meta_row_height = *MetaRequestWidth; + *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + + *MetaRequestHeight; + *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; + } + if (ScanDirection == dm_horz) { + DCCMetaSurfaceBytes = DCCMetaPitch + * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + + 64 * BlockHeight256Bytes) * BytePerPixel + / 256; + } else { + DCCMetaSurfaceBytes = DCCMetaPitch + * (dml_ceil( + (double) ViewportHeight - 1, + 64 * BlockHeight256Bytes) + + 64 * BlockHeight256Bytes) * BytePerPixel + / 256; + } + if (GPUVMEnable == true) { + *MetaPTEBytesFrame = (dml_ceil( + (double) (DCCMetaSurfaceBytes - VMMPageSize) + / (8 * VMMPageSize), + 1) + 1) * 64; + MPDEBytesFrame = 128 * ((mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) - 2); + } else { + *MetaPTEBytesFrame = 0; + MPDEBytesFrame = 0; + } + } else { + *MetaPTEBytesFrame = 0; + MPDEBytesFrame = 0; + *MetaRowByte = 0; + } + + if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) { + MacroTileSizeBytes = 256; + MacroTileHeight = BlockHeight256Bytes; + } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x + || SurfaceTiling == dm_sw_4kb_d || SurfaceTiling == dm_sw_4kb_d_x) { + MacroTileSizeBytes = 4096; + MacroTileHeight = 4 * BlockHeight256Bytes; + } else if (SurfaceTiling == dm_sw_64kb_s || SurfaceTiling == dm_sw_64kb_s_t + || SurfaceTiling == dm_sw_64kb_s_x || SurfaceTiling == dm_sw_64kb_d + || SurfaceTiling == dm_sw_64kb_d_t || SurfaceTiling == dm_sw_64kb_d_x + || SurfaceTiling == dm_sw_64kb_r_x) { + MacroTileSizeBytes = 65536; + MacroTileHeight = 16 * BlockHeight256Bytes; + } else { + MacroTileSizeBytes = 262144; + MacroTileHeight = 32 * BlockHeight256Bytes; + } + *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; + + if (GPUVMEnable == true && (mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) > 2) { + if (ScanDirection == dm_horz) { + *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1); + } else { + *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1); + } + ExtraDPDEBytesFrame = 128 * ((mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) - 3); + } else { + *DPDE0BytesFrame = 0; + ExtraDPDEBytesFrame = 0; + } + + PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + + ExtraDPDEBytesFrame; + + if (HostVMEnable == true) { + PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * (HostVMMaxPageTableLevels - HostVMCachedPageTableLevels)); + } + + if (GPUVMEnable == true) { + double FractionOfPTEReturnDrop; + + if (SurfaceTiling == dm_sw_linear) { + PixelPTEReqHeightPTEs = 1; + *PixelPTEReqHeight = 1; + *PixelPTEReqWidth = 8.0 * VMMPageSize / BytePerPixel; + *PTERequestSize = 64; + FractionOfPTEReturnDrop = 0; + } else if (MacroTileSizeBytes == 4096) { + PixelPTEReqHeightPTEs = 1; + *PixelPTEReqHeight = MacroTileHeight; + *PixelPTEReqWidth = 8 * *MacroTileWidth; + *PTERequestSize = 64; + if (ScanDirection == dm_horz) + FractionOfPTEReturnDrop = 0; + else + FractionOfPTEReturnDrop = 7 / 8; + } else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) { + PixelPTEReqHeightPTEs = 16; + *PixelPTEReqHeight = 16 * BlockHeight256Bytes; + *PixelPTEReqWidth = 16 * BlockWidth256Bytes; + *PTERequestSize = 128; + FractionOfPTEReturnDrop = 0; + } else { + PixelPTEReqHeightPTEs = 1; + *PixelPTEReqHeight = MacroTileHeight; + *PixelPTEReqWidth = 8 * *MacroTileWidth; + *PTERequestSize = 64; + FractionOfPTEReturnDrop = 0; + } + + if (SurfaceTiling == dm_sw_linear) { + *dpte_row_height = dml_min(128, + 1 << (unsigned int) dml_floor( + dml_log2( + (double) PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), + 1)); + *dpte_row_width_ub = (dml_ceil((double) (Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; + *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; + } else if (ScanDirection == dm_horz) { + *dpte_row_height = *PixelPTEReqHeight; + *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; + *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; + } else { + *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth); + *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight; + *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; + } + if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) + <= 64 * PTEBufferSizeInRequests) { + *PTEBufferSizeNotExceeded = true; + } else { + *PTEBufferSizeNotExceeded = false; + } + } else { + *PixelPTEBytesPerRow = 0; + *PTEBufferSizeNotExceeded = true; + } + dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %d\n", *MetaPTEBytesFrame); + + if (HostVMEnable == true) { + *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * (HostVMMaxPageTableLevels - HostVMCachedPageTableLevels)); + } + + if (HostVMEnable == true) { + *vm_group_bytes = 512; + *dpte_group_bytes = 512; + } else if (GPUVMEnable == true) { + *vm_group_bytes = 2048; + if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection != dm_horz) { + *dpte_group_bytes = 512; + } else { + *dpte_group_bytes = 2048; + } + } else { + *vm_group_bytes = 0; + *dpte_group_bytes = 0; + } + + return PDEAndMetaPTEBytesFrame; +} + +static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( + struct display_mode_lib *mode_lib) +{ + struct vba_vars_st *locals = &mode_lib->vba; + unsigned int j, k; + + mode_lib->vba.WritebackDISPCLK = 0.0; + mode_lib->vba.DISPCLKWithRamping = 0; + mode_lib->vba.DISPCLKWithoutRamping = 0; + mode_lib->vba.GlobalDPPCLK = 0.0; + + // DISPCLK and DPPCLK Calculation + // + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.WritebackEnable[k]) { + mode_lib->vba.WritebackDISPCLK = + dml_max( + mode_lib->vba.WritebackDISPCLK, + CalculateWriteBackDISPCLK( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.PixelClock[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackLumaHTaps[k], + mode_lib->vba.WritebackLumaVTaps[k], + mode_lib->vba.WritebackChromaHTaps[k], + mode_lib->vba.WritebackChromaVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k], + mode_lib->vba.HTotal[k], + mode_lib->vba.WritebackChromaLineBufferWidth)); + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.HRatio[k] > 1) { + locals->PSCL_THROUGHPUT_LUMA[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput + * mode_lib->vba.HRatio[k] + / dml_ceil( + mode_lib->vba.htaps[k] + / 6.0, + 1)); + } else { + locals->PSCL_THROUGHPUT_LUMA[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput); + } + + mode_lib->vba.DPPCLKUsingSingleDPPLuma = + mode_lib->vba.PixelClock[k] + * dml_max( + mode_lib->vba.vtaps[k] / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k]), + dml_max( + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / locals->PSCL_THROUGHPUT_LUMA[k], + 1.0)); + + if ((mode_lib->vba.htaps[k] > 6 || mode_lib->vba.vtaps[k] > 6) + && mode_lib->vba.DPPCLKUsingSingleDPPLuma + < 2 * mode_lib->vba.PixelClock[k]) { + mode_lib->vba.DPPCLKUsingSingleDPPLuma = 2 * mode_lib->vba.PixelClock[k]; + } + + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { + locals->PSCL_THROUGHPUT_CHROMA[k] = 0.0; + locals->DPPCLKUsingSingleDPP[k] = + mode_lib->vba.DPPCLKUsingSingleDPPLuma; + } else { + if (mode_lib->vba.HRatio[k] > 1) { + locals->PSCL_THROUGHPUT_CHROMA[k] = + dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput + * mode_lib->vba.HRatio[k] + / 2 + / dml_ceil( + mode_lib->vba.HTAPsChroma[k] + / 6.0, + 1.0)); + } else { + locals->PSCL_THROUGHPUT_CHROMA[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput); + } + mode_lib->vba.DPPCLKUsingSingleDPPChroma = + mode_lib->vba.PixelClock[k] + * dml_max( + mode_lib->vba.VTAPsChroma[k] + / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k] + / 2), + dml_max( + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / 4 + / locals->PSCL_THROUGHPUT_CHROMA[k], + 1.0)); + + if ((mode_lib->vba.HTAPsChroma[k] > 6 || mode_lib->vba.VTAPsChroma[k] > 6) + && mode_lib->vba.DPPCLKUsingSingleDPPChroma + < 2 * mode_lib->vba.PixelClock[k]) { + mode_lib->vba.DPPCLKUsingSingleDPPChroma = 2 + * mode_lib->vba.PixelClock[k]; + } + + locals->DPPCLKUsingSingleDPP[k] = dml_max( + mode_lib->vba.DPPCLKUsingSingleDPPLuma, + mode_lib->vba.DPPCLKUsingSingleDPPChroma); + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.BlendingAndTiming[k] != k) + continue; + if (mode_lib->vba.ODMCombineEnabled[k]) { + mode_lib->vba.DISPCLKWithRamping = + dml_max( + mode_lib->vba.DISPCLKWithRamping, + mode_lib->vba.PixelClock[k] / 2 + * (1 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100) + * (1 + + mode_lib->vba.DISPCLKRampingMargin + / 100)); + mode_lib->vba.DISPCLKWithoutRamping = + dml_max( + mode_lib->vba.DISPCLKWithoutRamping, + mode_lib->vba.PixelClock[k] / 2 + * (1 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100)); + } else if (!mode_lib->vba.ODMCombineEnabled[k]) { + mode_lib->vba.DISPCLKWithRamping = + dml_max( + mode_lib->vba.DISPCLKWithRamping, + mode_lib->vba.PixelClock[k] + * (1 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100) + * (1 + + mode_lib->vba.DISPCLKRampingMargin + / 100)); + mode_lib->vba.DISPCLKWithoutRamping = + dml_max( + mode_lib->vba.DISPCLKWithoutRamping, + mode_lib->vba.PixelClock[k] + * (1 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100)); + } + } + + mode_lib->vba.DISPCLKWithRamping = dml_max( + mode_lib->vba.DISPCLKWithRamping, + mode_lib->vba.WritebackDISPCLK); + mode_lib->vba.DISPCLKWithoutRamping = dml_max( + mode_lib->vba.DISPCLKWithoutRamping, + mode_lib->vba.WritebackDISPCLK); + + ASSERT(mode_lib->vba.DISPCLKDPPCLKVCOSpeed != 0); + mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( + mode_lib->vba.DISPCLKWithRamping, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( + mode_lib->vba.DISPCLKWithoutRamping, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + mode_lib->vba.MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( + mode_lib->vba.soc.clock_limits[mode_lib->vba.soc.num_states - 1].dispclk_mhz, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + if (mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity + > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { + mode_lib->vba.DISPCLK_calculated = + mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity; + } else if (mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity + > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { + mode_lib->vba.DISPCLK_calculated = mode_lib->vba.MaxDispclkRoundedToDFSGranularity; + } else { + mode_lib->vba.DISPCLK_calculated = + mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity; + } + DTRACE(" dispclk_mhz (calculated) = %f", mode_lib->vba.DISPCLK_calculated); + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.DPPCLK_calculated[k] = locals->DPPCLKUsingSingleDPP[k] + / mode_lib->vba.DPPPerPlane[k] + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100); + mode_lib->vba.GlobalDPPCLK = dml_max( + mode_lib->vba.GlobalDPPCLK, + mode_lib->vba.DPPCLK_calculated[k]); + } + mode_lib->vba.GlobalDPPCLK = RoundToDFSGranularityUp( + mode_lib->vba.GlobalDPPCLK, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.GlobalDPPCLK / 255 + * dml_ceil( + mode_lib->vba.DPPCLK_calculated[k] * 255 + / mode_lib->vba.GlobalDPPCLK, + 1); + DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, mode_lib->vba.DPPCLK_calculated[k]); + } + + // Urgent and B P-State/DRAM Clock Change Watermark + DTRACE(" dcfclk_mhz = %f", mode_lib->vba.DCFCLK); + DTRACE(" return_bw_to_dcn = %f", mode_lib->vba.ReturnBandwidthToDCN); + DTRACE(" return_bus_bw = %f", mode_lib->vba.ReturnBW); + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + bool MainPlaneDoesODMCombine = false; + + if (mode_lib->vba.SourceScan[k] == dm_horz) + locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportWidth[k]; + else + locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k]; + + if (mode_lib->vba.ODMCombineEnabled[k] == true) + MainPlaneDoesODMCombine = true; + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) + if (mode_lib->vba.BlendingAndTiming[k] == j + && mode_lib->vba.ODMCombineEnabled[j] == true) + MainPlaneDoesODMCombine = true; + + if (MainPlaneDoesODMCombine == true) + locals->SwathWidthY[k] = dml_min( + (double) locals->SwathWidthSingleDPPY[k], + dml_round( + mode_lib->vba.HActive[k] / 2.0 + * mode_lib->vba.HRatio[k])); + else + locals->SwathWidthY[k] = locals->SwathWidthSingleDPPY[k] + / mode_lib->vba.DPPPerPlane[k]; + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { + locals->BytePerPixelDETY[k] = 8; + locals->BytePerPixelDETC[k] = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { + locals->BytePerPixelDETY[k] = 4; + locals->BytePerPixelDETC[k] = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) { + locals->BytePerPixelDETY[k] = 2; + locals->BytePerPixelDETC[k] = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) { + locals->BytePerPixelDETY[k] = 1; + locals->BytePerPixelDETC[k] = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { + locals->BytePerPixelDETY[k] = 1; + locals->BytePerPixelDETC[k] = 2; + } else { // dm_420_10 + locals->BytePerPixelDETY[k] = 4.0 / 3.0; + locals->BytePerPixelDETC[k] = 8.0 / 3.0; + } + } + + mode_lib->vba.TotalDataReadBandwidth = 0.0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + locals->ReadBandwidthPlaneLuma[k] = locals->SwathWidthSingleDPPY[k] + * dml_ceil(locals->BytePerPixelDETY[k], 1) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + * mode_lib->vba.VRatio[k]; + locals->ReadBandwidthPlaneChroma[k] = locals->SwathWidthSingleDPPY[k] + / 2 * dml_ceil(locals->BytePerPixelDETC[k], 2) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + * mode_lib->vba.VRatio[k] / 2; + DTRACE( + " read_bw[%i] = %fBps", + k, + locals->ReadBandwidthPlaneLuma[k] + + locals->ReadBandwidthPlaneChroma[k]); + mode_lib->vba.TotalDataReadBandwidth += locals->ReadBandwidthPlaneLuma[k] + + locals->ReadBandwidthPlaneChroma[k]; + } + + // DCFCLK Deep Sleep + CalculateDCFCLKDeepSleep( + mode_lib, + mode_lib->vba.NumberOfActivePlanes, + locals->BytePerPixelDETY, + locals->BytePerPixelDETC, + mode_lib->vba.VRatio, + locals->SwathWidthY, + mode_lib->vba.DPPPerPlane, + mode_lib->vba.HRatio, + mode_lib->vba.PixelClock, + locals->PSCL_THROUGHPUT_LUMA, + locals->PSCL_THROUGHPUT_CHROMA, + locals->DPPCLK, + &mode_lib->vba.DCFCLKDeepSleep); + + // DSCCLK + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) { + locals->DSCCLK_calculated[k] = 0.0; + } else { + if (mode_lib->vba.OutputFormat[k] == dm_420 + || mode_lib->vba.OutputFormat[k] == dm_n422) + mode_lib->vba.DSCFormatFactor = 2; + else + mode_lib->vba.DSCFormatFactor = 1; + if (mode_lib->vba.ODMCombineEnabled[k]) + locals->DSCCLK_calculated[k] = + mode_lib->vba.PixelClockBackEnd[k] / 6 + / mode_lib->vba.DSCFormatFactor + / (1 + - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100); + else + locals->DSCCLK_calculated[k] = + mode_lib->vba.PixelClockBackEnd[k] / 3 + / mode_lib->vba.DSCFormatFactor + / (1 + - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100); + } + } + + // DSC Delay + // TODO + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + double bpp = mode_lib->vba.OutputBpp[k]; + unsigned int slices = mode_lib->vba.NumberOfDSCSlices[k]; + + if (mode_lib->vba.DSCEnabled[k] && bpp != 0) { + if (!mode_lib->vba.ODMCombineEnabled[k]) { + locals->DSCDelay[k] = + dscceComputeDelay( + mode_lib->vba.DSCInputBitPerComponent[k], + bpp, + dml_ceil( + (double) mode_lib->vba.HActive[k] + / mode_lib->vba.NumberOfDSCSlices[k], + 1), + slices, + mode_lib->vba.OutputFormat[k]) + + dscComputeDelay( + mode_lib->vba.OutputFormat[k]); + } else { + locals->DSCDelay[k] = + 2 + * (dscceComputeDelay( + mode_lib->vba.DSCInputBitPerComponent[k], + bpp, + dml_ceil( + (double) mode_lib->vba.HActive[k] + / mode_lib->vba.NumberOfDSCSlices[k], + 1), + slices / 2.0, + mode_lib->vba.OutputFormat[k]) + + dscComputeDelay( + mode_lib->vba.OutputFormat[k])); + } + locals->DSCDelay[k] = locals->DSCDelay[k] + * mode_lib->vba.PixelClock[k] + / mode_lib->vba.PixelClockBackEnd[k]; + } else { + locals->DSCDelay[k] = 0; + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) // NumberOfPlanes + if (j != k && mode_lib->vba.BlendingAndTiming[k] == j + && mode_lib->vba.DSCEnabled[j]) + locals->DSCDelay[k] = locals->DSCDelay[j]; + + // Prefetch + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + unsigned int PDEAndMetaPTEBytesFrameY; + unsigned int PixelPTEBytesPerRowY; + unsigned int MetaRowByteY; + unsigned int MetaRowByteC; + unsigned int PDEAndMetaPTEBytesFrameC; + unsigned int PixelPTEBytesPerRowC; + bool PTEBufferSizeNotExceededY; + bool PTEBufferSizeNotExceededC; + + Calculate256BBlockSizes( + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(locals->BytePerPixelDETY[k], 1), + dml_ceil(locals->BytePerPixelDETC[k], 2), + &locals->BlockHeight256BytesY[k], + &locals->BlockHeight256BytesC[k], + &locals->BlockWidth256BytesY[k], + &locals->BlockWidth256BytesC[k]); + + locals->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( + mode_lib, + mode_lib->vba.VRatio[k], + mode_lib->vba.vtaps[k], + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.SwathHeightY[k], + mode_lib->vba.ViewportYStartY[k], + &locals->VInitPreFillY[k], + &locals->MaxNumSwathY[k]); + + if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_8)) { + PDEAndMetaPTEBytesFrameC = + CalculateVMAndRowBytes( + mode_lib, + mode_lib->vba.DCCEnable[k], + locals->BlockHeight256BytesC[k], + locals->BlockWidth256BytesC[k], + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil( + locals->BytePerPixelDETC[k], + 2), + mode_lib->vba.SourceScan[k], + mode_lib->vba.ViewportWidth[k] / 2, + mode_lib->vba.ViewportHeight[k] / 2, + locals->SwathWidthY[k] / 2, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.HostVMEnable, + mode_lib->vba.HostVMMaxPageTableLevels, + mode_lib->vba.HostVMCachedPageTableLevels, + mode_lib->vba.VMMPageSize, + mode_lib->vba.PTEBufferSizeInRequestsChroma, + mode_lib->vba.PitchC[k], + mode_lib->vba.DCCMetaPitchC[k], + &locals->MacroTileWidthC[k], + &MetaRowByteC, + &PixelPTEBytesPerRowC, + &PTEBufferSizeNotExceededC, + &locals->dpte_row_width_chroma_ub[k], + &locals->dpte_row_height_chroma[k], + &locals->meta_req_width_chroma[k], + &locals->meta_req_height_chroma[k], + &locals->meta_row_width_chroma[k], + &locals->meta_row_height_chroma[k], + &locals->vm_group_bytes_chroma, + &locals->dpte_group_bytes_chroma, + &locals->PixelPTEReqWidthC[k], + &locals->PixelPTEReqHeightC[k], + &locals->PTERequestSizeC[k], + &locals->dpde0_bytes_per_frame_ub_c[k], + &locals->meta_pte_bytes_per_frame_ub_c[k]); + + locals->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( + mode_lib, + mode_lib->vba.VRatio[k] / 2, + mode_lib->vba.VTAPsChroma[k], + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.SwathHeightC[k], + mode_lib->vba.ViewportYStartC[k], + &locals->VInitPreFillC[k], + &locals->MaxNumSwathC[k]); + } else { + PixelPTEBytesPerRowC = 0; + PDEAndMetaPTEBytesFrameC = 0; + MetaRowByteC = 0; + locals->MaxNumSwathC[k] = 0; + locals->PrefetchSourceLinesC[k] = 0; + locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma; + } + + PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( + mode_lib, + mode_lib->vba.DCCEnable[k], + locals->BlockHeight256BytesY[k], + locals->BlockWidth256BytesY[k], + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(locals->BytePerPixelDETY[k], 1), + mode_lib->vba.SourceScan[k], + mode_lib->vba.ViewportWidth[k], + mode_lib->vba.ViewportHeight[k], + locals->SwathWidthY[k], + mode_lib->vba.GPUVMEnable, + mode_lib->vba.HostVMEnable, + mode_lib->vba.HostVMMaxPageTableLevels, + mode_lib->vba.HostVMCachedPageTableLevels, + mode_lib->vba.VMMPageSize, + locals->PTEBufferSizeInRequestsForLuma, + mode_lib->vba.PitchY[k], + mode_lib->vba.DCCMetaPitchY[k], + &locals->MacroTileWidthY[k], + &MetaRowByteY, + &PixelPTEBytesPerRowY, + &PTEBufferSizeNotExceededY, + &locals->dpte_row_width_luma_ub[k], + &locals->dpte_row_height[k], + &locals->meta_req_width[k], + &locals->meta_req_height[k], + &locals->meta_row_width[k], + &locals->meta_row_height[k], + &locals->vm_group_bytes[k], + &locals->dpte_group_bytes[k], + &locals->PixelPTEReqWidthY[k], + &locals->PixelPTEReqHeightY[k], + &locals->PTERequestSizeY[k], + &locals->dpde0_bytes_per_frame_ub_l[k], + &locals->meta_pte_bytes_per_frame_ub_l[k]); + + locals->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; + locals->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + + PDEAndMetaPTEBytesFrameC; + locals->MetaRowByte[k] = MetaRowByteY + MetaRowByteC; + + CalculateActiveRowBandwidth( + mode_lib->vba.GPUVMEnable, + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.VRatio[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + MetaRowByteY, + MetaRowByteC, + locals->meta_row_height[k], + locals->meta_row_height_chroma[k], + PixelPTEBytesPerRowY, + PixelPTEBytesPerRowC, + locals->dpte_row_height[k], + locals->dpte_row_height_chroma[k], + &locals->meta_row_bw[k], + &locals->dpte_row_bw[k]); + } + + mode_lib->vba.TotalDCCActiveDPP = 0; + mode_lib->vba.TotalActiveDPP = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + + mode_lib->vba.DPPPerPlane[k]; + if (mode_lib->vba.DCCEnable[k]) + mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + + mode_lib->vba.DPPPerPlane[k]; + } + + mode_lib->vba.UrgentOutOfOrderReturnPerChannel = dml_max3( + mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly, + mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, + mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly); + + mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency = + (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLK + + mode_lib->vba.UrgentOutOfOrderReturnPerChannel + * mode_lib->vba.NumberOfChannels + / mode_lib->vba.ReturnBW; + + mode_lib->vba.UrgentExtraLatency = CalculateExtraLatency( + mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency, + mode_lib->vba.TotalActiveDPP, + mode_lib->vba.PixelChunkSizeInKByte, + mode_lib->vba.TotalDCCActiveDPP, + mode_lib->vba.MetaChunkSize, + mode_lib->vba.ReturnBW, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.HostVMEnable, + mode_lib->vba.NumberOfActivePlanes, + mode_lib->vba.DPPPerPlane, + locals->dpte_group_bytes, + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, + mode_lib->vba.HostVMMaxPageTableLevels, + mode_lib->vba.HostVMCachedPageTableLevels); + + + mode_lib->vba.TCalc = 24.0 / mode_lib->vba.DCFCLKDeepSleep; + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if (mode_lib->vba.WritebackEnable[k] == true) { + locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] = + mode_lib->vba.WritebackLatency + + CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackLumaHTaps[k], + mode_lib->vba.WritebackLumaVTaps[k], + mode_lib->vba.WritebackChromaHTaps[k], + mode_lib->vba.WritebackChromaVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k]) + / mode_lib->vba.DISPCLK; + } else + locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0; + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { + if (mode_lib->vba.BlendingAndTiming[j] == k + && mode_lib->vba.WritebackEnable[j] == true) { + locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] = + dml_max( + locals->WritebackDelay[mode_lib->vba.VoltageLevel][k], + mode_lib->vba.WritebackLatency + + CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[j], + mode_lib->vba.WritebackHRatio[j], + mode_lib->vba.WritebackVRatio[j], + mode_lib->vba.WritebackLumaHTaps[j], + mode_lib->vba.WritebackLumaVTaps[j], + mode_lib->vba.WritebackChromaHTaps[j], + mode_lib->vba.WritebackChromaVTaps[j], + mode_lib->vba.WritebackDestinationWidth[j]) + / mode_lib->vba.DISPCLK); + } + } + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) + if (mode_lib->vba.BlendingAndTiming[k] == j) + locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] = + locals->WritebackDelay[mode_lib->vba.VoltageLevel][j]; + + mode_lib->vba.VStartupLines = 13; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + locals->MaxVStartupLines[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] - dml_max(1.0, dml_ceil(locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1)); + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) + locals->MaximumMaxVStartupLines = dml_max(locals->MaximumMaxVStartupLines, locals->MaxVStartupLines[k]); + + // We don't really care to iterate between the various prefetch modes + //mode_lib->vba.PrefetchERROR = CalculateMinAndMaxPrefetchMode(mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &mode_lib->vba.MinPrefetchMode, &mode_lib->vba.MaxPrefetchMode); + mode_lib->vba.UrgentLatency = dml_max3(mode_lib->vba.UrgentLatencyPixelDataOnly, mode_lib->vba.UrgentLatencyPixelMixedWithVMData, mode_lib->vba.UrgentLatencyVMDataOnly); + + do { + double MaxTotalRDBandwidth = 0; + double MaxTotalRDBandwidthNoUrgentBurst = 0; + bool DestinationLineTimesForPrefetchLessThan2 = false; + bool VRatioPrefetchMoreThan4 = false; + double TWait = CalculateTWait( + mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.UrgentLatency, + mode_lib->vba.SREnterPlusExitTime); + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + Pipe myPipe; + HostVM myHostVM; + + if (mode_lib->vba.XFCEnabled[k] == true) { + mode_lib->vba.XFCRemoteSurfaceFlipDelay = + CalculateRemoteSurfaceFlipDelay( + mode_lib, + mode_lib->vba.VRatio[k], + locals->SwathWidthY[k], + dml_ceil( + locals->BytePerPixelDETY[k], + 1), + mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k], + mode_lib->vba.XFCTSlvVupdateOffset, + mode_lib->vba.XFCTSlvVupdateWidth, + mode_lib->vba.XFCTSlvVreadyOffset, + mode_lib->vba.XFCXBUFLatencyTolerance, + mode_lib->vba.XFCFillBWOverhead, + mode_lib->vba.XFCSlvChunkSize, + mode_lib->vba.XFCBusTransportTime, + mode_lib->vba.TCalc, + TWait, + &mode_lib->vba.SrcActiveDrainRate, + &mode_lib->vba.TInitXFill, + &mode_lib->vba.TslvChk); + } else { + mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0; + } + + myPipe.DPPCLK = locals->DPPCLK[k]; + myPipe.DISPCLK = mode_lib->vba.DISPCLK; + myPipe.PixelClock = mode_lib->vba.PixelClock[k]; + myPipe.DCFCLKDeepSleep = mode_lib->vba.DCFCLKDeepSleep; + myPipe.DPPPerPlane = mode_lib->vba.DPPPerPlane[k]; + myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; + myPipe.SourceScan = mode_lib->vba.SourceScan[k]; + myPipe.BlockWidth256BytesY = locals->BlockWidth256BytesY[k]; + myPipe.BlockHeight256BytesY = locals->BlockHeight256BytesY[k]; + myPipe.BlockWidth256BytesC = locals->BlockWidth256BytesC[k]; + myPipe.BlockHeight256BytesC = locals->BlockHeight256BytesC[k]; + myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; + myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; + myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; + myPipe.HTotal = mode_lib->vba.HTotal[k]; + + + myHostVM.Enable = mode_lib->vba.HostVMEnable; + myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels; + myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels; + + mode_lib->vba.ErrorResult[k] = + CalculatePrefetchSchedule( + mode_lib, + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, + &myPipe, + locals->DSCDelay[k], + mode_lib->vba.DPPCLKDelaySubtotal, + mode_lib->vba.DPPCLKDelaySCL, + mode_lib->vba.DPPCLKDelaySCLLBOnly, + mode_lib->vba.DPPCLKDelayCNVCFormater, + mode_lib->vba.DPPCLKDelayCNVCCursor, + mode_lib->vba.DISPCLKDelaySubtotal, + (unsigned int) (locals->SwathWidthY[k] + / mode_lib->vba.HRatio[k]), + mode_lib->vba.OutputFormat[k], + mode_lib->vba.MaxInterDCNTileRepeaters, + dml_min(mode_lib->vba.VStartupLines, locals->MaxVStartupLines[k]), + locals->MaxVStartupLines[k], + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.GPUVMEnable, + &myHostVM, + mode_lib->vba.DynamicMetadataEnable[k], + mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], + mode_lib->vba.DynamicMetadataTransmittedBytes[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.UrgentLatency, + mode_lib->vba.UrgentExtraLatency, + mode_lib->vba.TCalc, + locals->PDEAndMetaPTEBytesFrame[k], + locals->MetaRowByte[k], + locals->PixelPTEBytesPerRow[k], + locals->PrefetchSourceLinesY[k], + locals->SwathWidthY[k], + locals->BytePerPixelDETY[k], + locals->VInitPreFillY[k], + locals->MaxNumSwathY[k], + locals->PrefetchSourceLinesC[k], + locals->BytePerPixelDETC[k], + locals->VInitPreFillC[k], + locals->MaxNumSwathC[k], + mode_lib->vba.SwathHeightY[k], + mode_lib->vba.SwathHeightC[k], + TWait, + mode_lib->vba.XFCEnabled[k], + mode_lib->vba.XFCRemoteSurfaceFlipDelay, + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + &locals->DSTXAfterScaler[k], + &locals->DSTYAfterScaler[k], + &locals->DestinationLinesForPrefetch[k], + &locals->PrefetchBandwidth[k], + &locals->DestinationLinesToRequestVMInVBlank[k], + &locals->DestinationLinesToRequestRowInVBlank[k], + &locals->VRatioPrefetchY[k], + &locals->VRatioPrefetchC[k], + &locals->RequiredPrefetchPixDataBWLuma[k], + &locals->RequiredPrefetchPixDataBWChroma[k], + &locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, + &locals->Tno_bw[k], + &locals->prefetch_vmrow_bw[k], + &locals->swath_width_luma_ub[k], + &locals->swath_width_chroma_ub[k], + &mode_lib->vba.VUpdateOffsetPix[k], + &mode_lib->vba.VUpdateWidthPix[k], + &mode_lib->vba.VReadyOffsetPix[k]); + if (mode_lib->vba.BlendingAndTiming[k] == k) { + locals->VStartup[k] = dml_min( + mode_lib->vba.VStartupLines, + locals->MaxVStartupLines[k]); + if (locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata + != 0) { + locals->VStartup[k] = + locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata; + } + } else { + locals->VStartup[k] = + dml_min( + mode_lib->vba.VStartupLines, + locals->MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]); + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + unsigned int m; + + locals->cursor_bw[k] = 0; + locals->cursor_bw_pre[k] = 0; + for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) { + locals->cursor_bw[k] += mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; + locals->cursor_bw_pre[k] += mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * locals->VRatioPrefetchY[k]; + } + + CalculateUrgentBurstFactor( + mode_lib->vba.DETBufferSizeInKByte, + mode_lib->vba.SwathHeightY[k], + mode_lib->vba.SwathHeightC[k], + locals->SwathWidthY[k], + mode_lib->vba.HTotal[k] / + mode_lib->vba.PixelClock[k], + mode_lib->vba.UrgentLatency, + mode_lib->vba.CursorBufferSize, + mode_lib->vba.CursorWidth[k][0] + mode_lib->vba.CursorWidth[k][1], + dml_max(mode_lib->vba.CursorBPP[k][0], mode_lib->vba.CursorBPP[k][1]), + mode_lib->vba.VRatio[k], + locals->VRatioPrefetchY[k], + locals->VRatioPrefetchC[k], + locals->BytePerPixelDETY[k], + locals->BytePerPixelDETC[k], + &locals->UrgentBurstFactorCursor[k], + &locals->UrgentBurstFactorCursorPre[k], + &locals->UrgentBurstFactorLuma[k], + &locals->UrgentBurstFactorLumaPre[k], + &locals->UrgentBurstFactorChroma[k], + &locals->UrgentBurstFactorChromaPre[k], + &locals->NotEnoughUrgentLatencyHiding, + &locals->NotEnoughUrgentLatencyHidingPre); + + if (mode_lib->vba.UseUrgentBurstBandwidth == false) { + locals->UrgentBurstFactorLuma[k] = 1; + locals->UrgentBurstFactorChroma[k] = 1; + locals->UrgentBurstFactorCursor[k] = 1; + locals->UrgentBurstFactorLumaPre[k] = 1; + locals->UrgentBurstFactorChromaPre[k] = 1; + locals->UrgentBurstFactorCursorPre[k] = 1; + } + + MaxTotalRDBandwidth = MaxTotalRDBandwidth + + dml_max3(locals->prefetch_vmrow_bw[k], + locals->ReadBandwidthPlaneLuma[k] * locals->UrgentBurstFactorLuma[k] + + locals->ReadBandwidthPlaneChroma[k] * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k] + * locals->UrgentBurstFactorCursor[k] + locals->meta_row_bw[k] + locals->dpte_row_bw[k], + locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] + locals->RequiredPrefetchPixDataBWChroma[k] + * locals->UrgentBurstFactorChromaPre[k] + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]); + + MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst + + dml_max3(locals->prefetch_vmrow_bw[k], + locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k] + locals->cursor_bw[k] + + locals->meta_row_bw[k] + locals->dpte_row_bw[k], + locals->RequiredPrefetchPixDataBWLuma[k] + locals->RequiredPrefetchPixDataBWChroma[k] + locals->cursor_bw_pre[k]); + + if (locals->DestinationLinesForPrefetch[k] < 2) + DestinationLineTimesForPrefetchLessThan2 = true; + if (locals->VRatioPrefetchY[k] > 4 || locals->VRatioPrefetchC[k] > 4) + VRatioPrefetchMoreThan4 = true; + } + mode_lib->vba.FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / mode_lib->vba.ReturnBW; + + if (MaxTotalRDBandwidth <= mode_lib->vba.ReturnBW && locals->NotEnoughUrgentLatencyHiding == 0 && locals->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4 + && !DestinationLineTimesForPrefetchLessThan2) + mode_lib->vba.PrefetchModeSupported = true; + else { + mode_lib->vba.PrefetchModeSupported = false; + dml_print( + "DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n"); + } + + if (mode_lib->vba.PrefetchModeSupported == true) { + mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.ReturnBW; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.BandwidthAvailableForImmediateFlip = + mode_lib->vba.BandwidthAvailableForImmediateFlip + - dml_max( + locals->ReadBandwidthPlaneLuma[k] * locals->UrgentBurstFactorLuma[k] + + locals->ReadBandwidthPlaneChroma[k] * locals->UrgentBurstFactorChroma[k] + + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k], + locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] + + locals->RequiredPrefetchPixDataBWChroma[k] * locals->UrgentBurstFactorChromaPre[k] + + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]); + } + + mode_lib->vba.TotImmediateFlipBytes = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes + locals->PDEAndMetaPTEBytesFrame[k] + locals->MetaRowByte[k] + locals->PixelPTEBytesPerRow[k]; + } + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + CalculateFlipSchedule( + mode_lib, + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, + mode_lib->vba.UrgentExtraLatency, + mode_lib->vba.UrgentLatency, + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.HostVMEnable, + mode_lib->vba.HostVMMaxPageTableLevels, + mode_lib->vba.HostVMCachedPageTableLevels, + mode_lib->vba.GPUVMEnable, + locals->PDEAndMetaPTEBytesFrame[k], + locals->MetaRowByte[k], + locals->PixelPTEBytesPerRow[k], + mode_lib->vba.BandwidthAvailableForImmediateFlip, + mode_lib->vba.TotImmediateFlipBytes, + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + mode_lib->vba.VRatio[k], + locals->Tno_bw[k], + mode_lib->vba.DCCEnable[k], + locals->dpte_row_height[k], + locals->meta_row_height[k], + locals->dpte_row_height_chroma[k], + locals->meta_row_height_chroma[k], + &locals->DestinationLinesToRequestVMInImmediateFlip[k], + &locals->DestinationLinesToRequestRowInImmediateFlip[k], + &locals->final_flip_bw[k], + &locals->ImmediateFlipSupportedForPipe[k]); + } + mode_lib->vba.total_dcn_read_bw_with_flip = 0.0; + mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst = 0.0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.total_dcn_read_bw_with_flip = + mode_lib->vba.total_dcn_read_bw_with_flip + dml_max3( + locals->prefetch_vmrow_bw[k], + locals->final_flip_bw[k] + locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k] + + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k], + locals->final_flip_bw[k] + locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] + + locals->RequiredPrefetchPixDataBWChroma[k] * locals->UrgentBurstFactorChromaPre[k] + + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]); + mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst = + mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst + + dml_max3(locals->prefetch_vmrow_bw[k], + locals->final_flip_bw[k] + locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k] + locals->cursor_bw[k], + locals->final_flip_bw[k] + locals->RequiredPrefetchPixDataBWLuma[k] + locals->RequiredPrefetchPixDataBWChroma[k] + locals->cursor_bw_pre[k]); + + } + mode_lib->vba.FractionOfUrgentBandwidthImmediateFlip = mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst / mode_lib->vba.ReturnBW; + + mode_lib->vba.ImmediateFlipSupported = true; + if (mode_lib->vba.total_dcn_read_bw_with_flip > mode_lib->vba.ReturnBW) { + mode_lib->vba.ImmediateFlipSupported = false; + } + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (locals->ImmediateFlipSupportedForPipe[k] == false) { + mode_lib->vba.ImmediateFlipSupported = false; + } + } + } else { + mode_lib->vba.ImmediateFlipSupported = false; + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.ErrorResult[k]) { + mode_lib->vba.PrefetchModeSupported = false; + dml_print( + "DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n"); + } + } + + mode_lib->vba.VStartupLines = mode_lib->vba.VStartupLines + 1; + } while (!((mode_lib->vba.PrefetchModeSupported + && ((!mode_lib->vba.ImmediateFlipSupport && !mode_lib->vba.HostVMEnable) + || mode_lib->vba.ImmediateFlipSupported)) + || locals->MaximumMaxVStartupLines < mode_lib->vba.VStartupLines)); + + //Watermarks and NB P-State/DRAM Clock Change Support + { + enum clock_change_support DRAMClockChangeSupport; // dummy + CalculateWatermarksAndDRAMSpeedChangeSupport( + mode_lib, + mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], + mode_lib->vba.NumberOfActivePlanes, + mode_lib->vba.MaxLineBufferLines, + mode_lib->vba.LineBufferSize, + mode_lib->vba.DPPOutputBufferPixels, + mode_lib->vba.DETBufferSizeInKByte, + mode_lib->vba.WritebackInterfaceLumaBufferSize, + mode_lib->vba.WritebackInterfaceChromaBufferSize, + mode_lib->vba.DCFCLK, + mode_lib->vba.UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels, + mode_lib->vba.ReturnBW, + mode_lib->vba.GPUVMEnable, + locals->dpte_group_bytes, + mode_lib->vba.MetaChunkSize, + mode_lib->vba.UrgentLatency, + mode_lib->vba.UrgentExtraLatency, + mode_lib->vba.WritebackLatency, + mode_lib->vba.WritebackChunkSize, + mode_lib->vba.SOCCLK, + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.SRExitTime, + mode_lib->vba.SREnterPlusExitTime, + mode_lib->vba.DCFCLKDeepSleep, + mode_lib->vba.DPPPerPlane, + mode_lib->vba.DCCEnable, + locals->DPPCLK, + locals->SwathWidthSingleDPPY, + mode_lib->vba.SwathHeightY, + locals->ReadBandwidthPlaneLuma, + mode_lib->vba.SwathHeightC, + locals->ReadBandwidthPlaneChroma, + mode_lib->vba.LBBitPerPixel, + locals->SwathWidthY, + mode_lib->vba.HRatio, + mode_lib->vba.vtaps, + mode_lib->vba.VTAPsChroma, + mode_lib->vba.VRatio, + mode_lib->vba.HTotal, + mode_lib->vba.PixelClock, + mode_lib->vba.BlendingAndTiming, + locals->BytePerPixelDETY, + locals->BytePerPixelDETC, + mode_lib->vba.WritebackEnable, + mode_lib->vba.WritebackPixelFormat, + mode_lib->vba.WritebackDestinationWidth, + mode_lib->vba.WritebackDestinationHeight, + mode_lib->vba.WritebackSourceHeight, + &DRAMClockChangeSupport, + &mode_lib->vba.UrgentWatermark, + &mode_lib->vba.WritebackUrgentWatermark, + &mode_lib->vba.DRAMClockChangeWatermark, + &mode_lib->vba.WritebackDRAMClockChangeWatermark, + &mode_lib->vba.StutterExitWatermark, + &mode_lib->vba.StutterEnterPlusExitWatermark, + &mode_lib->vba.MinActiveDRAMClockChangeLatencySupported); + } + + + //Display Pipeline Delivery Time in Prefetch, Groups + CalculatePixelDeliveryTimes( + mode_lib->vba.NumberOfActivePlanes, + mode_lib->vba.VRatio, + locals->VRatioPrefetchY, + locals->VRatioPrefetchC, + locals->swath_width_luma_ub, + locals->swath_width_chroma_ub, + mode_lib->vba.DPPPerPlane, + mode_lib->vba.HRatio, + mode_lib->vba.PixelClock, + locals->PSCL_THROUGHPUT_LUMA, + locals->PSCL_THROUGHPUT_CHROMA, + locals->DPPCLK, + locals->BytePerPixelDETC, + mode_lib->vba.SourceScan, + locals->BlockWidth256BytesY, + locals->BlockHeight256BytesY, + locals->BlockWidth256BytesC, + locals->BlockHeight256BytesC, + locals->DisplayPipeLineDeliveryTimeLuma, + locals->DisplayPipeLineDeliveryTimeChroma, + locals->DisplayPipeLineDeliveryTimeLumaPrefetch, + locals->DisplayPipeLineDeliveryTimeChromaPrefetch, + locals->DisplayPipeRequestDeliveryTimeLuma, + locals->DisplayPipeRequestDeliveryTimeChroma, + locals->DisplayPipeRequestDeliveryTimeLumaPrefetch, + locals->DisplayPipeRequestDeliveryTimeChromaPrefetch); + + CalculateMetaAndPTETimes( + mode_lib->vba.NumberOfActivePlanes, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.MetaChunkSize, + mode_lib->vba.MinMetaChunkSizeBytes, + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.HTotal, + mode_lib->vba.VRatio, + locals->VRatioPrefetchY, + locals->VRatioPrefetchC, + locals->DestinationLinesToRequestRowInVBlank, + locals->DestinationLinesToRequestRowInImmediateFlip, + locals->DestinationLinesToRequestVMInVBlank, + locals->DestinationLinesToRequestVMInImmediateFlip, + mode_lib->vba.DCCEnable, + mode_lib->vba.PixelClock, + locals->BytePerPixelDETY, + locals->BytePerPixelDETC, + mode_lib->vba.SourceScan, + locals->dpte_row_height, + locals->dpte_row_height_chroma, + locals->meta_row_width, + locals->meta_row_height, + locals->meta_req_width, + locals->meta_req_height, + locals->dpte_group_bytes, + locals->PTERequestSizeY, + locals->PTERequestSizeC, + locals->PixelPTEReqWidthY, + locals->PixelPTEReqHeightY, + locals->PixelPTEReqWidthC, + locals->PixelPTEReqHeightC, + locals->dpte_row_width_luma_ub, + locals->dpte_row_width_chroma_ub, + locals->vm_group_bytes, + locals->dpde0_bytes_per_frame_ub_l, + locals->dpde0_bytes_per_frame_ub_c, + locals->meta_pte_bytes_per_frame_ub_l, + locals->meta_pte_bytes_per_frame_ub_c, + locals->DST_Y_PER_PTE_ROW_NOM_L, + locals->DST_Y_PER_PTE_ROW_NOM_C, + locals->DST_Y_PER_META_ROW_NOM_L, + locals->TimePerMetaChunkNominal, + locals->TimePerMetaChunkVBlank, + locals->TimePerMetaChunkFlip, + locals->time_per_pte_group_nom_luma, + locals->time_per_pte_group_vblank_luma, + locals->time_per_pte_group_flip_luma, + locals->time_per_pte_group_nom_chroma, + locals->time_per_pte_group_vblank_chroma, + locals->time_per_pte_group_flip_chroma, + locals->TimePerVMGroupVBlank, + locals->TimePerVMGroupFlip, + locals->TimePerVMRequestVBlank, + locals->TimePerVMRequestFlip); + + + // Min TTUVBlank + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) { + locals->AllowDRAMClockChangeDuringVBlank[k] = true; + locals->AllowDRAMSelfRefreshDuringVBlank[k] = true; + locals->MinTTUVBlank[k] = dml_max( + mode_lib->vba.DRAMClockChangeWatermark, + dml_max( + mode_lib->vba.StutterEnterPlusExitWatermark, + mode_lib->vba.UrgentWatermark)); + } else if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 1) { + locals->AllowDRAMClockChangeDuringVBlank[k] = false; + locals->AllowDRAMSelfRefreshDuringVBlank[k] = true; + locals->MinTTUVBlank[k] = dml_max( + mode_lib->vba.StutterEnterPlusExitWatermark, + mode_lib->vba.UrgentWatermark); + } else { + locals->AllowDRAMClockChangeDuringVBlank[k] = false; + locals->AllowDRAMSelfRefreshDuringVBlank[k] = false; + locals->MinTTUVBlank[k] = mode_lib->vba.UrgentWatermark; + } + if (!mode_lib->vba.DynamicMetadataEnable[k]) + locals->MinTTUVBlank[k] = mode_lib->vba.TCalc + + locals->MinTTUVBlank[k]; + } + + // DCC Configuration + mode_lib->vba.ActiveDPPs = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + locals->MaximumDCCCompressionYSurface[k] = CalculateDCCConfiguration( + mode_lib->vba.DCCEnable[k], + false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown, + mode_lib->vba.ViewportWidth[k], + mode_lib->vba.ViewportHeight[k], + mode_lib->vba.DETBufferSizeInKByte * 1024, + locals->BlockHeight256BytesY[k], + mode_lib->vba.SwathHeightY[k], + mode_lib->vba.SurfaceTiling[k], + locals->BytePerPixelDETY[k], + mode_lib->vba.SourceScan[k], + &locals->DCCYMaxUncompressedBlock[k], + &locals->DCCYMaxCompressedBlock[k], + &locals->DCCYIndependent64ByteBlock[k]); + } + + //XFC Parameters: + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.XFCEnabled[k] == true) { + double TWait; + + locals->XFCSlaveVUpdateOffset[k] = mode_lib->vba.XFCTSlvVupdateOffset; + locals->XFCSlaveVupdateWidth[k] = mode_lib->vba.XFCTSlvVupdateWidth; + locals->XFCSlaveVReadyOffset[k] = mode_lib->vba.XFCTSlvVreadyOffset; + TWait = CalculateTWait( + mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.UrgentLatency, + mode_lib->vba.SREnterPlusExitTime); + mode_lib->vba.XFCRemoteSurfaceFlipDelay = CalculateRemoteSurfaceFlipDelay( + mode_lib, + mode_lib->vba.VRatio[k], + locals->SwathWidthY[k], + dml_ceil(locals->BytePerPixelDETY[k], 1), + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + mode_lib->vba.XFCTSlvVupdateOffset, + mode_lib->vba.XFCTSlvVupdateWidth, + mode_lib->vba.XFCTSlvVreadyOffset, + mode_lib->vba.XFCXBUFLatencyTolerance, + mode_lib->vba.XFCFillBWOverhead, + mode_lib->vba.XFCSlvChunkSize, + mode_lib->vba.XFCBusTransportTime, + mode_lib->vba.TCalc, + TWait, + &mode_lib->vba.SrcActiveDrainRate, + &mode_lib->vba.TInitXFill, + &mode_lib->vba.TslvChk); + locals->XFCRemoteSurfaceFlipLatency[k] = + dml_floor( + mode_lib->vba.XFCRemoteSurfaceFlipDelay + / (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]), + 1); + locals->XFCTransferDelay[k] = + dml_ceil( + mode_lib->vba.XFCBusTransportTime + / (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]), + 1); + locals->XFCPrechargeDelay[k] = + dml_ceil( + (mode_lib->vba.XFCBusTransportTime + + mode_lib->vba.TInitXFill + + mode_lib->vba.TslvChk) + / (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]), + 1); + mode_lib->vba.InitFillLevel = mode_lib->vba.XFCXBUFLatencyTolerance + * mode_lib->vba.SrcActiveDrainRate; + mode_lib->vba.FinalFillMargin = + (locals->DestinationLinesToRequestVMInVBlank[k] + + locals->DestinationLinesToRequestRowInVBlank[k]) + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k] + * mode_lib->vba.SrcActiveDrainRate + + mode_lib->vba.XFCFillConstant; + mode_lib->vba.FinalFillLevel = mode_lib->vba.XFCRemoteSurfaceFlipDelay + * mode_lib->vba.SrcActiveDrainRate + + mode_lib->vba.FinalFillMargin; + mode_lib->vba.RemainingFillLevel = dml_max( + 0.0, + mode_lib->vba.FinalFillLevel - mode_lib->vba.InitFillLevel); + mode_lib->vba.TFinalxFill = mode_lib->vba.RemainingFillLevel + / (mode_lib->vba.SrcActiveDrainRate + * mode_lib->vba.XFCFillBWOverhead / 100); + locals->XFCPrefetchMargin[k] = + mode_lib->vba.XFCRemoteSurfaceFlipDelay + + mode_lib->vba.TFinalxFill + + (locals->DestinationLinesToRequestVMInVBlank[k] + + locals->DestinationLinesToRequestRowInVBlank[k]) + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]; + } else { + locals->XFCSlaveVUpdateOffset[k] = 0; + locals->XFCSlaveVupdateWidth[k] = 0; + locals->XFCSlaveVReadyOffset[k] = 0; + locals->XFCRemoteSurfaceFlipLatency[k] = 0; + locals->XFCPrechargeDelay[k] = 0; + locals->XFCTransferDelay[k] = 0; + locals->XFCPrefetchMargin[k] = 0; + } + } + + // Stutter Efficiency + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + CalculateDETBufferSize( + mode_lib->vba.DETBufferSizeInKByte, + mode_lib->vba.SwathHeightY[k], + mode_lib->vba.SwathHeightC[k], + &locals->DETBufferSizeY[k], + &locals->DETBufferSizeC[k]); + + locals->LinesInDETY[k] = locals->DETBufferSizeY[k] + / locals->BytePerPixelDETY[k] / locals->SwathWidthY[k]; + locals->LinesInDETYRoundedDownToSwath[k] = dml_floor( + locals->LinesInDETY[k], + mode_lib->vba.SwathHeightY[k]); + locals->FullDETBufferingTimeY[k] = + locals->LinesInDETYRoundedDownToSwath[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) + / mode_lib->vba.VRatio[k]; + } + + mode_lib->vba.StutterPeriod = 999999.0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (locals->FullDETBufferingTimeY[k] < mode_lib->vba.StutterPeriod) { + mode_lib->vba.StutterPeriod = locals->FullDETBufferingTimeY[k]; + mode_lib->vba.FrameTimeForMinFullDETBufferingTime = + (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]; + locals->BytePerPixelYCriticalPlane = dml_ceil(locals->BytePerPixelDETY[k], 1); + locals->SwathWidthYCriticalPlane = locals->SwathWidthY[k]; + locals->LinesToFinishSwathTransferStutterCriticalPlane = + mode_lib->vba.SwathHeightY[k] - (locals->LinesInDETY[k] - locals->LinesInDETYRoundedDownToSwath[k]); + } + } + + mode_lib->vba.AverageReadBandwidth = 0.0; + mode_lib->vba.TotalRowReadBandwidth = 0.0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + unsigned int DCCRateLimit; + + if (mode_lib->vba.DCCEnable[k]) { + if (locals->DCCYMaxCompressedBlock[k] == 256) + DCCRateLimit = 4; + else + DCCRateLimit = 2; + + mode_lib->vba.AverageReadBandwidth = + mode_lib->vba.AverageReadBandwidth + + (locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k]) / + dml_min(mode_lib->vba.DCCRate[k], DCCRateLimit); + } else { + mode_lib->vba.AverageReadBandwidth = + mode_lib->vba.AverageReadBandwidth + + locals->ReadBandwidthPlaneLuma[k] + + locals->ReadBandwidthPlaneChroma[k]; + } + mode_lib->vba.TotalRowReadBandwidth = mode_lib->vba.TotalRowReadBandwidth + + locals->meta_row_bw[k] + locals->dpte_row_bw[k]; + } + + mode_lib->vba.AverageDCCCompressionRate = mode_lib->vba.TotalDataReadBandwidth / mode_lib->vba.AverageReadBandwidth; + + mode_lib->vba.PartOfBurstThatFitsInROB = + dml_min( + mode_lib->vba.StutterPeriod + * mode_lib->vba.TotalDataReadBandwidth, + mode_lib->vba.ROBBufferSizeInKByte * 1024 + * mode_lib->vba.AverageDCCCompressionRate); + mode_lib->vba.StutterBurstTime = mode_lib->vba.PartOfBurstThatFitsInROB + / mode_lib->vba.AverageDCCCompressionRate / mode_lib->vba.ReturnBW + + (mode_lib->vba.StutterPeriod * mode_lib->vba.TotalDataReadBandwidth + - mode_lib->vba.PartOfBurstThatFitsInROB) + / (mode_lib->vba.DCFCLK * 64) + + mode_lib->vba.StutterPeriod * mode_lib->vba.TotalRowReadBandwidth / mode_lib->vba.ReturnBW; + mode_lib->vba.StutterBurstTime = dml_max( + mode_lib->vba.StutterBurstTime, + (locals->LinesToFinishSwathTransferStutterCriticalPlane * locals->BytePerPixelYCriticalPlane * + locals->SwathWidthYCriticalPlane / mode_lib->vba.ReturnBW) + ); + + mode_lib->vba.TotalActiveWriteback = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.WritebackEnable[k] == true) { + mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1; + } + } + + if (mode_lib->vba.TotalActiveWriteback == 0) { + mode_lib->vba.StutterEfficiencyNotIncludingVBlank = (1 + - (mode_lib->vba.SRExitTime + mode_lib->vba.StutterBurstTime) + / mode_lib->vba.StutterPeriod) * 100; + } else { + mode_lib->vba.StutterEfficiencyNotIncludingVBlank = 0; + } + + mode_lib->vba.SmallestVBlank = 999999; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) { + mode_lib->vba.VBlankTime = (double) (mode_lib->vba.VTotal[k] + - mode_lib->vba.VActive[k]) * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]; + } else { + mode_lib->vba.VBlankTime = 0; + } + mode_lib->vba.SmallestVBlank = dml_min( + mode_lib->vba.SmallestVBlank, + mode_lib->vba.VBlankTime); + } + + mode_lib->vba.StutterEfficiency = (mode_lib->vba.StutterEfficiencyNotIncludingVBlank / 100 + * (mode_lib->vba.FrameTimeForMinFullDETBufferingTime + - mode_lib->vba.SmallestVBlank) + + mode_lib->vba.SmallestVBlank) + / mode_lib->vba.FrameTimeForMinFullDETBufferingTime * 100; +} + +static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib) +{ + // Display Pipe Configuration + double BytePerPixDETY; + double BytePerPixDETC; + double Read256BytesBlockHeightY; + double Read256BytesBlockHeightC; + double Read256BytesBlockWidthY; + double Read256BytesBlockWidthC; + double MaximumSwathHeightY; + double MaximumSwathHeightC; + double MinimumSwathHeightY; + double MinimumSwathHeightC; + double SwathWidth; + double SwathWidthGranularityY; + double SwathWidthGranularityC; + double RoundedUpMaxSwathSizeBytesY; + double RoundedUpMaxSwathSizeBytesC; + unsigned int j, k; + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + bool MainPlaneDoesODMCombine = false; + + if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { + BytePerPixDETY = 8; + BytePerPixDETC = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { + BytePerPixDETY = 4; + BytePerPixDETC = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { + BytePerPixDETY = 2; + BytePerPixDETC = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) { + BytePerPixDETY = 1; + BytePerPixDETC = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { + BytePerPixDETY = 1; + BytePerPixDETC = 2; + } else { + BytePerPixDETY = 4.0 / 3.0; + BytePerPixDETC = 8.0 / 3.0; + } + + if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + Read256BytesBlockHeightY = 1; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { + Read256BytesBlockHeightY = 4; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { + Read256BytesBlockHeightY = 8; + } else { + Read256BytesBlockHeightY = 16; + } + Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1) + / Read256BytesBlockHeightY; + Read256BytesBlockHeightC = 0; + Read256BytesBlockWidthC = 0; + } else { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + Read256BytesBlockHeightY = 1; + Read256BytesBlockHeightC = 1; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { + Read256BytesBlockHeightY = 16; + Read256BytesBlockHeightC = 8; + } else { + Read256BytesBlockHeightY = 8; + Read256BytesBlockHeightC = 8; + } + Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1) + / Read256BytesBlockHeightY; + Read256BytesBlockWidthC = 256 / dml_ceil(BytePerPixDETC, 2) + / Read256BytesBlockHeightC; + } + + if (mode_lib->vba.SourceScan[k] == dm_horz) { + MaximumSwathHeightY = Read256BytesBlockHeightY; + MaximumSwathHeightC = Read256BytesBlockHeightC; + } else { + MaximumSwathHeightY = Read256BytesBlockWidthY; + MaximumSwathHeightC = Read256BytesBlockWidthC; + } + + if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear + || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + && (mode_lib->vba.SurfaceTiling[k] + == dm_sw_4kb_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_4kb_s_x + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s_t + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s_x + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_var_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_var_s_x) + && mode_lib->vba.SourceScan[k] == dm_horz)) { + MinimumSwathHeightY = MaximumSwathHeightY; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8 + && mode_lib->vba.SourceScan[k] != dm_horz) { + MinimumSwathHeightY = MaximumSwathHeightY; + } else { + MinimumSwathHeightY = MaximumSwathHeightY / 2.0; + } + MinimumSwathHeightC = MaximumSwathHeightC; + } else { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + MinimumSwathHeightY = MaximumSwathHeightY; + MinimumSwathHeightC = MaximumSwathHeightC; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 + && mode_lib->vba.SourceScan[k] == dm_horz) { + MinimumSwathHeightY = MaximumSwathHeightY / 2.0; + MinimumSwathHeightC = MaximumSwathHeightC; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10 + && mode_lib->vba.SourceScan[k] == dm_horz) { + MinimumSwathHeightC = MaximumSwathHeightC / 2.0; + MinimumSwathHeightY = MaximumSwathHeightY; + } else { + MinimumSwathHeightY = MaximumSwathHeightY; + MinimumSwathHeightC = MaximumSwathHeightC; + } + } + + if (mode_lib->vba.SourceScan[k] == dm_horz) { + SwathWidth = mode_lib->vba.ViewportWidth[k]; + } else { + SwathWidth = mode_lib->vba.ViewportHeight[k]; + } + + if (mode_lib->vba.ODMCombineEnabled[k] == true) { + MainPlaneDoesODMCombine = true; + } + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { + if (mode_lib->vba.BlendingAndTiming[k] == j + && mode_lib->vba.ODMCombineEnabled[j] == true) { + MainPlaneDoesODMCombine = true; + } + } + + if (MainPlaneDoesODMCombine == true) { + SwathWidth = dml_min( + SwathWidth, + mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]); + } else { + SwathWidth = SwathWidth / mode_lib->vba.DPPPerPlane[k]; + } + + SwathWidthGranularityY = 256 / dml_ceil(BytePerPixDETY, 1) / MaximumSwathHeightY; + RoundedUpMaxSwathSizeBytesY = (dml_ceil( + (double) (SwathWidth - 1), + SwathWidthGranularityY) + SwathWidthGranularityY) * BytePerPixDETY + * MaximumSwathHeightY; + if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { + RoundedUpMaxSwathSizeBytesY = dml_ceil(RoundedUpMaxSwathSizeBytesY, 256) + + 256; + } + if (MaximumSwathHeightC > 0) { + SwathWidthGranularityC = 256.0 / dml_ceil(BytePerPixDETC, 2) + / MaximumSwathHeightC; + RoundedUpMaxSwathSizeBytesC = (dml_ceil( + (double) (SwathWidth / 2.0 - 1), + SwathWidthGranularityC) + SwathWidthGranularityC) + * BytePerPixDETC * MaximumSwathHeightC; + if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { + RoundedUpMaxSwathSizeBytesC = dml_ceil( + RoundedUpMaxSwathSizeBytesC, + 256) + 256; + } + } else + RoundedUpMaxSwathSizeBytesC = 0.0; + + if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC + <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) { + mode_lib->vba.SwathHeightY[k] = MaximumSwathHeightY; + mode_lib->vba.SwathHeightC[k] = MaximumSwathHeightC; + } else { + mode_lib->vba.SwathHeightY[k] = MinimumSwathHeightY; + mode_lib->vba.SwathHeightC[k] = MinimumSwathHeightC; + } + + CalculateDETBufferSize( + mode_lib->vba.DETBufferSizeInKByte, + mode_lib->vba.SwathHeightY[k], + mode_lib->vba.SwathHeightC[k], + &mode_lib->vba.DETBufferSizeY[k], + &mode_lib->vba.DETBufferSizeC[k]); + } +} + +static double CalculateTWait( + unsigned int PrefetchMode, + double DRAMClockChangeLatency, + double UrgentLatency, + double SREnterPlusExitTime) +{ + if (PrefetchMode == 0) { + return dml_max( + DRAMClockChangeLatency + UrgentLatency, + dml_max(SREnterPlusExitTime, UrgentLatency)); + } else if (PrefetchMode == 1) { + return dml_max(SREnterPlusExitTime, UrgentLatency); + } else { + return UrgentLatency; + } +} + +static double CalculateRemoteSurfaceFlipDelay( + struct display_mode_lib *mode_lib, + double VRatio, + double SwathWidth, + double Bpp, + double LineTime, + double XFCTSlvVupdateOffset, + double XFCTSlvVupdateWidth, + double XFCTSlvVreadyOffset, + double XFCXBUFLatencyTolerance, + double XFCFillBWOverhead, + double XFCSlvChunkSize, + double XFCBusTransportTime, + double TCalc, + double TWait, + double *SrcActiveDrainRate, + double *TInitXFill, + double *TslvChk) +{ + double TSlvSetup, AvgfillRate, result; + + *SrcActiveDrainRate = VRatio * SwathWidth * Bpp / LineTime; + TSlvSetup = XFCTSlvVupdateOffset + XFCTSlvVupdateWidth + XFCTSlvVreadyOffset; + *TInitXFill = XFCXBUFLatencyTolerance / (1 + XFCFillBWOverhead / 100); + AvgfillRate = *SrcActiveDrainRate * (1 + XFCFillBWOverhead / 100); + *TslvChk = XFCSlvChunkSize / AvgfillRate; + dml_print( + "DML::CalculateRemoteSurfaceFlipDelay: SrcActiveDrainRate: %f\n", + *SrcActiveDrainRate); + dml_print("DML::CalculateRemoteSurfaceFlipDelay: TSlvSetup: %f\n", TSlvSetup); + dml_print("DML::CalculateRemoteSurfaceFlipDelay: TInitXFill: %f\n", *TInitXFill); + dml_print("DML::CalculateRemoteSurfaceFlipDelay: AvgfillRate: %f\n", AvgfillRate); + dml_print("DML::CalculateRemoteSurfaceFlipDelay: TslvChk: %f\n", *TslvChk); + result = 2 * XFCBusTransportTime + TSlvSetup + TCalc + TWait + *TslvChk + *TInitXFill; // TODO: This doesn't seem to match programming guide + dml_print("DML::CalculateRemoteSurfaceFlipDelay: RemoteSurfaceFlipDelay: %f\n", result); + return result; +} + +static double CalculateWriteBackDelay( + enum source_format_class WritebackPixelFormat, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackLumaHTaps, + unsigned int WritebackLumaVTaps, + unsigned int WritebackChromaHTaps, + unsigned int WritebackChromaVTaps, + unsigned int WritebackDestinationWidth) +{ + double CalculateWriteBackDelay = + dml_max( + dml_ceil(WritebackLumaHTaps / 4.0, 1) / WritebackHRatio, + WritebackLumaVTaps * dml_ceil(1.0 / WritebackVRatio, 1) + * dml_ceil( + WritebackDestinationWidth + / 4.0, + 1) + + dml_ceil(1.0 / WritebackVRatio, 1) + * (dml_ceil( + WritebackLumaVTaps + / 4.0, + 1) + 4)); + + if (WritebackPixelFormat != dm_444_32) { + CalculateWriteBackDelay = + dml_max( + CalculateWriteBackDelay, + dml_max( + dml_ceil( + WritebackChromaHTaps + / 2.0, + 1) + / (2 + * WritebackHRatio), + WritebackChromaVTaps + * dml_ceil( + 1 + / (2 + * WritebackVRatio), + 1) + * dml_ceil( + WritebackDestinationWidth + / 2.0 + / 2.0, + 1) + + dml_ceil( + 1 + / (2 + * WritebackVRatio), + 1) + * (dml_ceil( + WritebackChromaVTaps + / 4.0, + 1) + + 4))); + } + return CalculateWriteBackDelay; +} + +static void CalculateActiveRowBandwidth( + bool GPUVMEnable, + enum source_format_class SourcePixelFormat, + double VRatio, + bool DCCEnable, + double LineTime, + unsigned int MetaRowByteLuma, + unsigned int MetaRowByteChroma, + unsigned int meta_row_height_luma, + unsigned int meta_row_height_chroma, + unsigned int PixelPTEBytesPerRowLuma, + unsigned int PixelPTEBytesPerRowChroma, + unsigned int dpte_row_height_luma, + unsigned int dpte_row_height_chroma, + double *meta_row_bw, + double *dpte_row_bw) +{ + if (DCCEnable != true) { + *meta_row_bw = 0; + } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { + *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + + VRatio / 2 * MetaRowByteChroma + / (meta_row_height_chroma * LineTime); + } else { + *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); + } + + if (GPUVMEnable != true) { + *dpte_row_bw = 0; + } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + + VRatio / 2 * PixelPTEBytesPerRowChroma + / (dpte_row_height_chroma * LineTime); + } else { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); + } +} + +static void CalculateFlipSchedule( + struct display_mode_lib *mode_lib, + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, + double UrgentExtraLatency, + double UrgentLatency, + unsigned int GPUVMMaxPageTableLevels, + bool HostVMEnable, + unsigned int HostVMMaxPageTableLevels, + unsigned int HostVMCachedPageTableLevels, + bool GPUVMEnable, + double PDEAndMetaPTEBytesPerFrame, + double MetaRowBytes, + double DPTEBytesPerRow, + double BandwidthAvailableForImmediateFlip, + unsigned int TotImmediateFlipBytes, + enum source_format_class SourcePixelFormat, + double LineTime, + double VRatio, + double Tno_bw, + bool DCCEnable, + unsigned int dpte_row_height, + unsigned int meta_row_height, + unsigned int dpte_row_height_chroma, + unsigned int meta_row_height_chroma, + double *DestinationLinesToRequestVMInImmediateFlip, + double *DestinationLinesToRequestRowInImmediateFlip, + double *final_flip_bw, + bool *ImmediateFlipSupportedForPipe) +{ + double min_row_time = 0.0; + unsigned int HostVMDynamicLevels; + double TimeForFetchingMetaPTEImmediateFlip; + double TimeForFetchingRowInVBlankImmediateFlip; + double ImmediateFlipBW; + double HostVMInefficiencyFactor; + + if (GPUVMEnable == true && HostVMEnable == true) { + HostVMInefficiencyFactor = + PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData + / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly; + HostVMDynamicLevels = HostVMMaxPageTableLevels - HostVMCachedPageTableLevels; + } else { + HostVMInefficiencyFactor = 1; + HostVMDynamicLevels = 0; + } + + ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) + * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; + + if (GPUVMEnable == true) { + TimeForFetchingMetaPTEImmediateFlip = dml_max3( + Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, + UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevels + 1) - 1), + LineTime / 4.0); + } else { + TimeForFetchingMetaPTEImmediateFlip = 0; + } + + *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0; + if ((GPUVMEnable == true || DCCEnable == true)) { + TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow) * HostVMInefficiencyFactor / ImmediateFlipBW, UrgentLatency * (HostVMDynamicLevels + 1), LineTime / 4); + } else { + TimeForFetchingRowInVBlankImmediateFlip = 0; + } + + *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0; + *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime), (MetaRowBytes + DPTEBytesPerRow) * HostVMInefficiencyFactor / (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); + if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { + if (GPUVMEnable == true && DCCEnable != true) { + min_row_time = dml_min( + dpte_row_height * LineTime / VRatio, + dpte_row_height_chroma * LineTime / (VRatio / 2)); + } else if (GPUVMEnable != true && DCCEnable == true) { + min_row_time = dml_min( + meta_row_height * LineTime / VRatio, + meta_row_height_chroma * LineTime / (VRatio / 2)); + } else { + min_row_time = dml_min4( + dpte_row_height * LineTime / VRatio, + meta_row_height * LineTime / VRatio, + dpte_row_height_chroma * LineTime / (VRatio / 2), + meta_row_height_chroma * LineTime / (VRatio / 2)); + } + } else { + if (GPUVMEnable == true && DCCEnable != true) { + min_row_time = dpte_row_height * LineTime / VRatio; + } else if (GPUVMEnable != true && DCCEnable == true) { + min_row_time = meta_row_height * LineTime / VRatio; + } else { + min_row_time = dml_min( + dpte_row_height * LineTime / VRatio, + meta_row_height * LineTime / VRatio); + } + } + + if (*DestinationLinesToRequestVMInImmediateFlip >= 32 + || *DestinationLinesToRequestRowInImmediateFlip >= 16 + || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) { + *ImmediateFlipSupportedForPipe = false; + } else { + *ImmediateFlipSupportedForPipe = true; + } +} + +static unsigned int TruncToValidBPP( + double DecimalBPP, + double DesiredBPP, + bool DSCEnabled, + enum output_encoder_class Output, + enum output_format_class Format, + unsigned int DSCInputBitPerComponent) +{ + if (Output == dm_hdmi) { + if (Format == dm_420) { + if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18)) + return 18; + else if (DecimalBPP >= 15 && (DesiredBPP == 0 || DesiredBPP == 15)) + return 15; + else if (DecimalBPP >= 12 && (DesiredBPP == 0 || DesiredBPP == 12)) + return 12; + else + return BPP_INVALID; + } else if (Format == dm_444) { + if (DecimalBPP >= 36 && (DesiredBPP == 0 || DesiredBPP == 36)) + return 36; + else if (DecimalBPP >= 30 && (DesiredBPP == 0 || DesiredBPP == 30)) + return 30; + else if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24)) + return 24; + else if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18)) + return 18; + else + return BPP_INVALID; + } else { + if (DecimalBPP / 1.5 >= 24 && (DesiredBPP == 0 || DesiredBPP == 24)) + return 24; + else if (DecimalBPP / 1.5 >= 20 && (DesiredBPP == 0 || DesiredBPP == 20)) + return 20; + else if (DecimalBPP / 1.5 >= 16 && (DesiredBPP == 0 || DesiredBPP == 16)) + return 16; + else + return BPP_INVALID; + } + } else { + if (DSCEnabled) { + if (Format == dm_420) { + if (DesiredBPP == 0) { + if (DecimalBPP < 6) + return BPP_INVALID; + else if (DecimalBPP >= 1.5 * DSCInputBitPerComponent - 1.0 / 16.0) + return 1.5 * DSCInputBitPerComponent - 1.0 / 16.0; + else + return dml_floor(16 * DecimalBPP, 1) / 16.0; + } else { + if (DecimalBPP < 6 + || DesiredBPP < 6 + || DesiredBPP > 1.5 * DSCInputBitPerComponent - 1.0 / 16.0 + || DecimalBPP < DesiredBPP) { + return BPP_INVALID; + } else { + return DesiredBPP; + } + } + } else if (Format == dm_n422) { + if (DesiredBPP == 0) { + if (DecimalBPP < 7) + return BPP_INVALID; + else if (DecimalBPP >= 2 * DSCInputBitPerComponent - 1.0 / 16.0) + return 2 * DSCInputBitPerComponent - 1.0 / 16.0; + else + return dml_floor(16 * DecimalBPP, 1) / 16.0; + } else { + if (DecimalBPP < 7 + || DesiredBPP < 7 + || DesiredBPP > 2 * DSCInputBitPerComponent - 1.0 / 16.0 + || DecimalBPP < DesiredBPP) { + return BPP_INVALID; + } else { + return DesiredBPP; + } + } + } else { + if (DesiredBPP == 0) { + if (DecimalBPP < 8) + return BPP_INVALID; + else if (DecimalBPP >= 3 * DSCInputBitPerComponent - 1.0 / 16.0) + return 3 * DSCInputBitPerComponent - 1.0 / 16.0; + else + return dml_floor(16 * DecimalBPP, 1) / 16.0; + } else { + if (DecimalBPP < 8 + || DesiredBPP < 8 + || DesiredBPP > 3 * DSCInputBitPerComponent - 1.0 / 16.0 + || DecimalBPP < DesiredBPP) { + return BPP_INVALID; + } else { + return DesiredBPP; + } + } + } + } else if (Format == dm_420) { + if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18)) + return 18; + else if (DecimalBPP >= 15 && (DesiredBPP == 0 || DesiredBPP == 15)) + return 15; + else if (DecimalBPP >= 12 && (DesiredBPP == 0 || DesiredBPP == 12)) + return 12; + else + return BPP_INVALID; + } else if (Format == dm_s422 || Format == dm_n422) { + if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24)) + return 24; + else if (DecimalBPP >= 20 && (DesiredBPP == 0 || DesiredBPP == 20)) + return 20; + else if (DecimalBPP >= 16 && (DesiredBPP == 0 || DesiredBPP == 16)) + return 16; + else + return BPP_INVALID; + } else { + if (DecimalBPP >= 36 && (DesiredBPP == 0 || DesiredBPP == 36)) + return 36; + else if (DecimalBPP >= 30 && (DesiredBPP == 0 || DesiredBPP == 30)) + return 30; + else if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24)) + return 24; + else + return BPP_INVALID; + } + } +} + +void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) +{ + struct vba_vars_st *locals = &mode_lib->vba; + + int i; + unsigned int j, k, m; + + /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ + + /*Scale Ratio, taps Support Check*/ + + mode_lib->vba.ScaleRatioAndTapsSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.ScalerEnabled[k] == false + && ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) + || mode_lib->vba.HRatio[k] != 1.0 + || mode_lib->vba.htaps[k] != 1.0 + || mode_lib->vba.VRatio[k] != 1.0 + || mode_lib->vba.vtaps[k] != 1.0)) { + mode_lib->vba.ScaleRatioAndTapsSupport = false; + } else if (mode_lib->vba.vtaps[k] < 1.0 || mode_lib->vba.vtaps[k] > 8.0 + || mode_lib->vba.htaps[k] < 1.0 || mode_lib->vba.htaps[k] > 8.0 + || (mode_lib->vba.htaps[k] > 1.0 + && (mode_lib->vba.htaps[k] % 2) == 1) + || mode_lib->vba.HRatio[k] > mode_lib->vba.MaxHSCLRatio + || mode_lib->vba.VRatio[k] > mode_lib->vba.MaxVSCLRatio + || mode_lib->vba.HRatio[k] > mode_lib->vba.htaps[k] + || mode_lib->vba.VRatio[k] > mode_lib->vba.vtaps[k] + || (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8 + && (mode_lib->vba.HRatio[k] / 2.0 + > mode_lib->vba.HTAPsChroma[k] + || mode_lib->vba.VRatio[k] / 2.0 + > mode_lib->vba.VTAPsChroma[k]))) { + mode_lib->vba.ScaleRatioAndTapsSupport = false; + } + } + /*Source Format, Pixel Format and Scan Support Check*/ + + mode_lib->vba.SourceFormatPixelAndScanSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if ((mode_lib->vba.SurfaceTiling[k] == dm_sw_linear + && mode_lib->vba.SourceScan[k] != dm_horz) + || ((mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d + || mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d_x + || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d + || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_t + || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_x + || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d + || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d_x) + && mode_lib->vba.SourcePixelFormat[k] != dm_444_64) + || (mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_r_x + && (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8 + || mode_lib->vba.SourcePixelFormat[k] + == dm_420_8 + || mode_lib->vba.SourcePixelFormat[k] + == dm_420_10)) + || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_gfx7_2d_thin_lvp) + && !((mode_lib->vba.SourcePixelFormat[k] + == dm_444_64 + || mode_lib->vba.SourcePixelFormat[k] + == dm_444_32) + && mode_lib->vba.SourceScan[k] + == dm_horz + && mode_lib->vba.SupportGFX7CompatibleTilingIn32bppAnd64bpp + == true + && mode_lib->vba.DCCEnable[k] + == false)) + || (mode_lib->vba.DCCEnable[k] == true + && (mode_lib->vba.SurfaceTiling[k] + == dm_sw_linear + || mode_lib->vba.SourcePixelFormat[k] + == dm_420_8 + || mode_lib->vba.SourcePixelFormat[k] + == dm_420_10)))) { + mode_lib->vba.SourceFormatPixelAndScanSupport = false; + } + } + /*Bandwidth Support Check*/ + + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { + locals->BytePerPixelInDETY[k] = 8.0; + locals->BytePerPixelInDETC[k] = 0.0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { + locals->BytePerPixelInDETY[k] = 4.0; + locals->BytePerPixelInDETC[k] = 0.0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) { + locals->BytePerPixelInDETY[k] = 2.0; + locals->BytePerPixelInDETC[k] = 0.0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) { + locals->BytePerPixelInDETY[k] = 1.0; + locals->BytePerPixelInDETC[k] = 0.0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { + locals->BytePerPixelInDETY[k] = 1.0; + locals->BytePerPixelInDETC[k] = 2.0; + } else { + locals->BytePerPixelInDETY[k] = 4.0 / 3; + locals->BytePerPixelInDETC[k] = 8.0 / 3; + } + if (mode_lib->vba.SourceScan[k] == dm_horz) { + locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportWidth[k]; + } else { + locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportHeight[k]; + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->ReadBandwidthLuma[k] = locals->SwathWidthYSingleDPP[k] * dml_ceil(locals->BytePerPixelInDETY[k], 1.0) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; + locals->ReadBandwidthChroma[k] = locals->SwathWidthYSingleDPP[k] / 2 * dml_ceil(locals->BytePerPixelInDETC[k], 2.0) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k] / 2.0; + locals->ReadBandwidth[k] = locals->ReadBandwidthLuma[k] + locals->ReadBandwidthChroma[k]; + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true + && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { + locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) * 4.0; + } else if (mode_lib->vba.WritebackEnable[k] == true + && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { + locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) * 3.0; + } else if (mode_lib->vba.WritebackEnable[k] == true) { + locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) * 1.5; + } else { + locals->WriteBandwidth[k] = 0.0; + } + } + mode_lib->vba.DCCEnabledInAnyPlane = false; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.DCCEnable[k] == true) { + mode_lib->vba.DCCEnabledInAnyPlane = true; + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->IdealSDPPortBandwidthPerState[i] = dml_min3( + mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i], + mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels + * mode_lib->vba.DRAMChannelWidth, + mode_lib->vba.FabricClockPerState[i] + * mode_lib->vba.FabricDatapathToDCNDataReturn); + if (mode_lib->vba.HostVMEnable == false) { + locals->ReturnBWPerState[i] = locals->IdealSDPPortBandwidthPerState[i] + * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100.0; + } else { + locals->ReturnBWPerState[i] = locals->IdealSDPPortBandwidthPerState[i] + * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0; + } + } + /*Writeback Latency support check*/ + + mode_lib->vba.WritebackLatencySupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) { + if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { + if (locals->WriteBandwidth[k] + > (mode_lib->vba.WritebackInterfaceLumaBufferSize + + mode_lib->vba.WritebackInterfaceChromaBufferSize) + / mode_lib->vba.WritebackLatency) { + mode_lib->vba.WritebackLatencySupport = false; + } + } else { + if (locals->WriteBandwidth[k] + > 1.5 + * dml_min( + mode_lib->vba.WritebackInterfaceLumaBufferSize, + 2.0 + * mode_lib->vba.WritebackInterfaceChromaBufferSize) + / mode_lib->vba.WritebackLatency) { + mode_lib->vba.WritebackLatencySupport = false; + } + } + } + } + /*Re-ordering Buffer Support Check*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] = + (mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i] + + dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly, + mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, + mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly) + * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i]; + if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i] + > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) { + locals->ROBSupport[i] = true; + } else { + locals->ROBSupport[i] = false; + } + } + /*Writeback Mode Support Check*/ + + mode_lib->vba.TotalNumberOfActiveWriteback = 0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) { + if (mode_lib->vba.ActiveWritebacksPerPlane[k] == 0) + mode_lib->vba.ActiveWritebacksPerPlane[k] = 1; + mode_lib->vba.TotalNumberOfActiveWriteback = + mode_lib->vba.TotalNumberOfActiveWriteback + + mode_lib->vba.ActiveWritebacksPerPlane[k]; + } + } + mode_lib->vba.WritebackModeSupport = true; + if (mode_lib->vba.TotalNumberOfActiveWriteback > mode_lib->vba.MaxNumWriteback) { + mode_lib->vba.WritebackModeSupport = false; + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true + && mode_lib->vba.Writeback10bpc420Supported != true + && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { + mode_lib->vba.WritebackModeSupport = false; + } + } + /*Writeback Scale Ratio and Taps Support Check*/ + + mode_lib->vba.WritebackScaleRatioAndTapsSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) { + if (mode_lib->vba.WritebackLumaAndChromaScalingSupported == false + && (mode_lib->vba.WritebackHRatio[k] != 1.0 + || mode_lib->vba.WritebackVRatio[k] != 1.0)) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + if (mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackMaxHSCLRatio + || mode_lib->vba.WritebackVRatio[k] + > mode_lib->vba.WritebackMaxVSCLRatio + || mode_lib->vba.WritebackHRatio[k] + < mode_lib->vba.WritebackMinHSCLRatio + || mode_lib->vba.WritebackVRatio[k] + < mode_lib->vba.WritebackMinVSCLRatio + || mode_lib->vba.WritebackLumaHTaps[k] + > mode_lib->vba.WritebackMaxHSCLTaps + || mode_lib->vba.WritebackLumaVTaps[k] + > mode_lib->vba.WritebackMaxVSCLTaps + || mode_lib->vba.WritebackHRatio[k] + > mode_lib->vba.WritebackLumaHTaps[k] + || mode_lib->vba.WritebackVRatio[k] + > mode_lib->vba.WritebackLumaVTaps[k] + || (mode_lib->vba.WritebackLumaHTaps[k] > 2.0 + && ((mode_lib->vba.WritebackLumaHTaps[k] % 2) + == 1)) + || (mode_lib->vba.WritebackPixelFormat[k] != dm_444_32 + && (mode_lib->vba.WritebackChromaHTaps[k] + > mode_lib->vba.WritebackMaxHSCLTaps + || mode_lib->vba.WritebackChromaVTaps[k] + > mode_lib->vba.WritebackMaxVSCLTaps + || 2.0 + * mode_lib->vba.WritebackHRatio[k] + > mode_lib->vba.WritebackChromaHTaps[k] + || 2.0 + * mode_lib->vba.WritebackVRatio[k] + > mode_lib->vba.WritebackChromaVTaps[k] + || (mode_lib->vba.WritebackChromaHTaps[k] > 2.0 + && ((mode_lib->vba.WritebackChromaHTaps[k] % 2) == 1))))) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + if (mode_lib->vba.WritebackVRatio[k] < 1.0) { + mode_lib->vba.WritebackLumaVExtra = + dml_max(1.0 - 2.0 / dml_ceil(1.0 / mode_lib->vba.WritebackVRatio[k], 1.0), 0.0); + } else { + mode_lib->vba.WritebackLumaVExtra = -1; + } + if ((mode_lib->vba.WritebackPixelFormat[k] == dm_444_32 + && mode_lib->vba.WritebackLumaVTaps[k] + > (mode_lib->vba.WritebackLineBufferLumaBufferSize + + mode_lib->vba.WritebackLineBufferChromaBufferSize) + / 3.0 + / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackLumaVExtra) + || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_8 + && mode_lib->vba.WritebackLumaVTaps[k] + > mode_lib->vba.WritebackLineBufferLumaBufferSize + * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackLumaVExtra) + || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10 + && mode_lib->vba.WritebackLumaVTaps[k] + > mode_lib->vba.WritebackLineBufferLumaBufferSize + * 8.0 / 10.0 + / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackLumaVExtra)) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + if (2.0 * mode_lib->vba.WritebackVRatio[k] < 1) { + mode_lib->vba.WritebackChromaVExtra = 0.0; + } else { + mode_lib->vba.WritebackChromaVExtra = -1; + } + if ((mode_lib->vba.WritebackPixelFormat[k] == dm_420_8 + && mode_lib->vba.WritebackChromaVTaps[k] + > mode_lib->vba.WritebackLineBufferChromaBufferSize + * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackChromaVExtra) + || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10 + && mode_lib->vba.WritebackChromaVTaps[k] + > mode_lib->vba.WritebackLineBufferChromaBufferSize + * 8.0 / 10.0 + / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackChromaVExtra)) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + } + } + /*Maximum DISPCLK/DPPCLK Support check*/ + + mode_lib->vba.WritebackRequiredDISPCLK = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) { + mode_lib->vba.WritebackRequiredDISPCLK = + dml_max( + mode_lib->vba.WritebackRequiredDISPCLK, + CalculateWriteBackDISPCLK( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.PixelClock[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackLumaHTaps[k], + mode_lib->vba.WritebackLumaVTaps[k], + mode_lib->vba.WritebackChromaHTaps[k], + mode_lib->vba.WritebackChromaVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k], + mode_lib->vba.HTotal[k], + mode_lib->vba.WritebackChromaLineBufferWidth)); + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.HRatio[k] > 1.0) { + locals->PSCL_FACTOR[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput + * mode_lib->vba.HRatio[k] + / dml_ceil( + mode_lib->vba.htaps[k] + / 6.0, + 1.0)); + } else { + locals->PSCL_FACTOR[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput); + } + if (locals->BytePerPixelInDETC[k] == 0.0) { + locals->PSCL_FACTOR_CHROMA[k] = 0.0; + locals->MinDPPCLKUsingSingleDPP[k] = + mode_lib->vba.PixelClock[k] + * dml_max3( + mode_lib->vba.vtaps[k] / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k]), + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / locals->PSCL_FACTOR[k], + 1.0); + if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0) + && locals->MinDPPCLKUsingSingleDPP[k] + < 2.0 * mode_lib->vba.PixelClock[k]) { + locals->MinDPPCLKUsingSingleDPP[k] = 2.0 + * mode_lib->vba.PixelClock[k]; + } + } else { + if (mode_lib->vba.HRatio[k] / 2.0 > 1.0) { + locals->PSCL_FACTOR_CHROMA[k] = + dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput + * mode_lib->vba.HRatio[k] + / 2.0 + / dml_ceil( + mode_lib->vba.HTAPsChroma[k] + / 6.0, + 1.0)); + } else { + locals->PSCL_FACTOR_CHROMA[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput); + } + locals->MinDPPCLKUsingSingleDPP[k] = + mode_lib->vba.PixelClock[k] + * dml_max5( + mode_lib->vba.vtaps[k] / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k]), + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / locals->PSCL_FACTOR[k], + mode_lib->vba.VTAPsChroma[k] + / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k] + / 2.0), + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / 4.0 + / locals->PSCL_FACTOR_CHROMA[k], + 1.0); + if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0 + || mode_lib->vba.HTAPsChroma[k] > 6.0 + || mode_lib->vba.VTAPsChroma[k] > 6.0) + && locals->MinDPPCLKUsingSingleDPP[k] + < 2.0 * mode_lib->vba.PixelClock[k]) { + locals->MinDPPCLKUsingSingleDPP[k] = 2.0 + * mode_lib->vba.PixelClock[k]; + } + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + Calculate256BBlockSizes( + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(locals->BytePerPixelInDETY[k], 1.0), + dml_ceil(locals->BytePerPixelInDETC[k], 2.0), + &locals->Read256BlockHeightY[k], + &locals->Read256BlockHeightC[k], + &locals->Read256BlockWidthY[k], + &locals->Read256BlockWidthC[k]); + if (mode_lib->vba.SourceScan[k] == dm_horz) { + locals->MaxSwathHeightY[k] = locals->Read256BlockHeightY[k]; + locals->MaxSwathHeightC[k] = locals->Read256BlockHeightC[k]; + } else { + locals->MaxSwathHeightY[k] = locals->Read256BlockWidthY[k]; + locals->MaxSwathHeightC[k] = locals->Read256BlockWidthC[k]; + } + if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8)) { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear + || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + && (mode_lib->vba.SurfaceTiling[k] + == dm_sw_4kb_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_4kb_s_x + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s_t + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s_x + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_var_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_var_s_x) + && mode_lib->vba.SourceScan[k] == dm_horz)) { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; + } else { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k] + / 2.0; + } + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; + } else { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 + && mode_lib->vba.SourceScan[k] == dm_horz) { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k] + / 2.0; + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10 + && mode_lib->vba.SourceScan[k] == dm_horz) { + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k] + / 2.0; + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; + } else { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; + } + } + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + mode_lib->vba.MaximumSwathWidthSupport = 8192.0; + } else { + mode_lib->vba.MaximumSwathWidthSupport = 5120.0; + } + mode_lib->vba.MaximumSwathWidthInDETBuffer = + dml_min( + mode_lib->vba.MaximumSwathWidthSupport, + mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0 + / (locals->BytePerPixelInDETY[k] + * locals->MinSwathHeightY[k] + + locals->BytePerPixelInDETC[k] + / 2.0 + * locals->MinSwathHeightC[k])); + if (locals->BytePerPixelInDETC[k] == 0.0) { + mode_lib->vba.MaximumSwathWidthInLineBuffer = + mode_lib->vba.LineBufferSize + * dml_max(mode_lib->vba.HRatio[k], 1.0) + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.vtaps[k] + + dml_max( + dml_ceil( + mode_lib->vba.VRatio[k], + 1.0) + - 2, + 0.0)); + } else { + mode_lib->vba.MaximumSwathWidthInLineBuffer = + dml_min( + mode_lib->vba.LineBufferSize + * dml_max( + mode_lib->vba.HRatio[k], + 1.0) + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.vtaps[k] + + dml_max( + dml_ceil( + mode_lib->vba.VRatio[k], + 1.0) + - 2, + 0.0)), + 2.0 * mode_lib->vba.LineBufferSize + * dml_max( + mode_lib->vba.HRatio[k] + / 2.0, + 1.0) + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.VTAPsChroma[k] + + dml_max( + dml_ceil( + mode_lib->vba.VRatio[k] + / 2.0, + 1.0) + - 2, + 0.0))); + } + locals->MaximumSwathWidth[k] = dml_min( + mode_lib->vba.MaximumSwathWidthInDETBuffer, + mode_lib->vba.MaximumSwathWidthInLineBuffer); + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (j = 0; j < 2; j++) { + mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown( + mode_lib->vba.MaxDispclk[i], + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown( + mode_lib->vba.MaxDppclk[i], + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + locals->RequiredDISPCLK[i][j] = 0.0; + locals->DISPCLK_DPPCLK_Support[i][j] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = + mode_lib->vba.PixelClock[k] + * (1.0 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100.0) + * (1.0 + + mode_lib->vba.DISPCLKRampingMargin + / 100.0); + if (mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine >= mode_lib->vba.MaxDispclk[i] + && i == mode_lib->vba.soc.num_states) + mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = mode_lib->vba.PixelClock[k] + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + + mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + mode_lib->vba.DISPCLKRampingMargin / 100.0); + if (mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine >= mode_lib->vba.MaxDispclk[i] + && i == mode_lib->vba.soc.num_states) + mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + if (mode_lib->vba.ODMCapability == false || mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine <= mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) { + locals->ODMCombineEnablePerState[i][k] = false; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine; + } else { + locals->ODMCombineEnablePerState[i][k] = true; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; + } + if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity + && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k] + && locals->ODMCombineEnablePerState[i][k] == false) { + locals->NoOfDPP[i][j][k] = 1; + locals->RequiredDPPCLK[i][j][k] = + locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + } else { + locals->NoOfDPP[i][j][k] = 2; + locals->RequiredDPPCLK[i][j][k] = + locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; + } + locals->RequiredDISPCLK[i][j] = dml_max( + locals->RequiredDISPCLK[i][j], + mode_lib->vba.PlaneRequiredDISPCLK); + if ((locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity) + || (mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)) { + locals->DISPCLK_DPPCLK_Support[i][j] = false; + } + } + locals->TotalNumberOfActiveDPP[i][j] = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) + locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; + if (j == 1) { + while (locals->TotalNumberOfActiveDPP[i][j] < mode_lib->vba.MaxNumDPP + && locals->TotalNumberOfActiveDPP[i][j] < 2 * mode_lib->vba.NumberOfActivePlanes) { + double BWOfNonSplitPlaneOfMaximumBandwidth; + unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth; + + BWOfNonSplitPlaneOfMaximumBandwidth = 0; + NumberOfNonSplitPlaneOfMaximumBandwidth = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (locals->ReadBandwidth[k] > BWOfNonSplitPlaneOfMaximumBandwidth && locals->NoOfDPP[i][j][k] == 1) { + BWOfNonSplitPlaneOfMaximumBandwidth = locals->ReadBandwidth[k]; + NumberOfNonSplitPlaneOfMaximumBandwidth = k; + } + } + locals->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2; + locals->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = + locals->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth] + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2; + locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + 1; + } + } + if (locals->TotalNumberOfActiveDPP[i][j] > mode_lib->vba.MaxNumDPP) { + locals->RequiredDISPCLK[i][j] = 0.0; + locals->DISPCLK_DPPCLK_Support[i][j] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->ODMCombineEnablePerState[i][k] = false; + if (locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]) { + locals->NoOfDPP[i][j][k] = 1; + locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + } else { + locals->NoOfDPP[i][j][k] = 2; + locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; + } + if (i != mode_lib->vba.soc.num_states) { + mode_lib->vba.PlaneRequiredDISPCLK = + mode_lib->vba.PixelClock[k] + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + * (1.0 + mode_lib->vba.DISPCLKRampingMargin / 100.0); + } else { + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PixelClock[k] + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + } + locals->RequiredDISPCLK[i][j] = dml_max( + locals->RequiredDISPCLK[i][j], + mode_lib->vba.PlaneRequiredDISPCLK); + if (locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity + || mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) + locals->DISPCLK_DPPCLK_Support[i][j] = false; + } + locals->TotalNumberOfActiveDPP[i][j] = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) + locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; + } + locals->RequiredDISPCLK[i][j] = dml_max( + locals->RequiredDISPCLK[i][j], + mode_lib->vba.WritebackRequiredDISPCLK); + if (mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity + < mode_lib->vba.WritebackRequiredDISPCLK) { + locals->DISPCLK_DPPCLK_Support[i][j] = false; + } + } + } + /*Viewport Size Check*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->ViewportSizeSupport[i] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->ODMCombineEnablePerState[i][k] == true) { + if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])) + > locals->MaximumSwathWidth[k]) { + locals->ViewportSizeSupport[i] = false; + } + } else { + if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) { + locals->ViewportSizeSupport[i] = false; + } + } + } + } + /*Total Available Pipes Support Check*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (j = 0; j < 2; j++) { + if (locals->TotalNumberOfActiveDPP[i][j] <= mode_lib->vba.MaxNumDPP) + locals->TotalAvailablePipesSupport[i][j] = true; + else + locals->TotalAvailablePipesSupport[i][j] = false; + } + } + /*Total Available OTG Support Check*/ + + mode_lib->vba.TotalNumberOfActiveOTG = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + + 1.0; + } + } + if (mode_lib->vba.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG) { + mode_lib->vba.NumberOfOTGSupport = true; + } else { + mode_lib->vba.NumberOfOTGSupport = false; + } + /*Display IO and DSC Support Check*/ + + mode_lib->vba.NonsupportedDSCInputBPC = false; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (!(mode_lib->vba.DSCInputBitPerComponent[k] == 12.0 + || mode_lib->vba.DSCInputBitPerComponent[k] == 10.0 + || mode_lib->vba.DSCInputBitPerComponent[k] == 8.0)) { + mode_lib->vba.NonsupportedDSCInputBPC = true; + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->RequiresDSC[i][k] = 0; + locals->RequiresFEC[i][k] = 0; + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if (mode_lib->vba.Output[k] == dm_hdmi) { + locals->RequiresDSC[i][k] = 0; + locals->RequiresFEC[i][k] = 0; + locals->OutputBppPerState[i][k] = TruncToValidBPP( + dml_min(600.0, mode_lib->vba.PHYCLKPerState[i]) / mode_lib->vba.PixelClockBackEnd[k] * 24, + mode_lib->vba.ForcedOutputLinkBPP[k], + false, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + } else if (mode_lib->vba.Output[k] == dm_dp + || mode_lib->vba.Output[k] == dm_edp) { + if (mode_lib->vba.Output[k] == dm_edp) { + mode_lib->vba.EffectiveFECOverhead = 0.0; + } else { + mode_lib->vba.EffectiveFECOverhead = + mode_lib->vba.FECOverhead; + } + if (mode_lib->vba.PHYCLKPerState[i] >= 270.0) { + mode_lib->vba.Outbpp = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * 270.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + mode_lib->vba.ForcedOutputLinkBPP[k], + false, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + mode_lib->vba.OutbppDSC = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 270.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + mode_lib->vba.ForcedOutputLinkBPP[k], + true, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + if (mode_lib->vba.DSCEnabled[k] == true) { + locals->RequiresDSC[i][k] = true; + if (mode_lib->vba.Output[k] == dm_dp) { + locals->RequiresFEC[i][k] = true; + } else { + locals->RequiresFEC[i][k] = false; + } + mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; + } else { + locals->RequiresDSC[i][k] = false; + locals->RequiresFEC[i][k] = false; + } + locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp; + } + if (mode_lib->vba.Outbpp == BPP_INVALID && mode_lib->vba.PHYCLKPerState[i] >= 540.0) { + mode_lib->vba.Outbpp = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * 540.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + mode_lib->vba.ForcedOutputLinkBPP[k], + false, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + mode_lib->vba.OutbppDSC = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 540.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + mode_lib->vba.ForcedOutputLinkBPP[k], + true, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + if (mode_lib->vba.DSCEnabled[k] == true) { + locals->RequiresDSC[i][k] = true; + if (mode_lib->vba.Output[k] == dm_dp) { + locals->RequiresFEC[i][k] = true; + } else { + locals->RequiresFEC[i][k] = false; + } + mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; + } else { + locals->RequiresDSC[i][k] = false; + locals->RequiresFEC[i][k] = false; + } + locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp; + } + if (mode_lib->vba.Outbpp == BPP_INVALID + && mode_lib->vba.PHYCLKPerState[i] + >= 810.0) { + mode_lib->vba.Outbpp = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * 810.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + mode_lib->vba.ForcedOutputLinkBPP[k], + false, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + mode_lib->vba.OutbppDSC = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 810.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + mode_lib->vba.ForcedOutputLinkBPP[k], + true, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + if (mode_lib->vba.DSCEnabled[k] == true || mode_lib->vba.Outbpp == BPP_INVALID) { + locals->RequiresDSC[i][k] = true; + if (mode_lib->vba.Output[k] == dm_dp) { + locals->RequiresFEC[i][k] = true; + } else { + locals->RequiresFEC[i][k] = false; + } + mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; + } else { + locals->RequiresDSC[i][k] = false; + locals->RequiresFEC[i][k] = false; + } + locals->OutputBppPerState[i][k] = + mode_lib->vba.Outbpp; + } + } + } else { + locals->OutputBppPerState[i][k] = BPP_BLENDED_PIPE; + } + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->DIOSupport[i] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->OutputBppPerState[i][k] == BPP_INVALID + || (mode_lib->vba.OutputFormat[k] == dm_420 + && mode_lib->vba.Interlace[k] == true + && mode_lib->vba.ProgressiveToInterlaceUnitInOPP == true)) { + locals->DIOSupport[i] = false; + } + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->DSCCLKRequiredMoreThanSupported[i] = false; + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if ((mode_lib->vba.Output[k] == dm_dp + || mode_lib->vba.Output[k] == dm_edp)) { + if (mode_lib->vba.OutputFormat[k] == dm_420 + || mode_lib->vba.OutputFormat[k] + == dm_n422) { + mode_lib->vba.DSCFormatFactor = 2; + } else { + mode_lib->vba.DSCFormatFactor = 1; + } + if (locals->RequiresDSC[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] + == true) { + if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor + > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { + locals->DSCCLKRequiredMoreThanSupported[i] = + true; + } + } else { + if (mode_lib->vba.PixelClockBackEnd[k] / 3.0 / mode_lib->vba.DSCFormatFactor + > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { + locals->DSCCLKRequiredMoreThanSupported[i] = + true; + } + } + } + } + } + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->NotEnoughDSCUnits[i] = false; + mode_lib->vba.TotalDSCUnitsRequired = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->RequiresDSC[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == true) { + mode_lib->vba.TotalDSCUnitsRequired = + mode_lib->vba.TotalDSCUnitsRequired + 2.0; + } else { + mode_lib->vba.TotalDSCUnitsRequired = + mode_lib->vba.TotalDSCUnitsRequired + 1.0; + } + } + } + if (mode_lib->vba.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) { + locals->NotEnoughDSCUnits[i] = true; + } + } + /*DSC Delay per state*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] != k) { + mode_lib->vba.slices = 0; + } else if (locals->RequiresDSC[i][k] == 0 + || locals->RequiresDSC[i][k] == false) { + mode_lib->vba.slices = 0; + } else if (mode_lib->vba.PixelClockBackEnd[k] > 3200.0) { + mode_lib->vba.slices = dml_ceil( + mode_lib->vba.PixelClockBackEnd[k] / 400.0, + 4.0); + } else if (mode_lib->vba.PixelClockBackEnd[k] > 1360.0) { + mode_lib->vba.slices = 8.0; + } else if (mode_lib->vba.PixelClockBackEnd[k] > 680.0) { + mode_lib->vba.slices = 4.0; + } else if (mode_lib->vba.PixelClockBackEnd[k] > 340.0) { + mode_lib->vba.slices = 2.0; + } else { + mode_lib->vba.slices = 1.0; + } + if (locals->OutputBppPerState[i][k] == BPP_BLENDED_PIPE + || locals->OutputBppPerState[i][k] == BPP_INVALID) { + mode_lib->vba.bpp = 0.0; + } else { + mode_lib->vba.bpp = locals->OutputBppPerState[i][k]; + } + if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) { + if (locals->ODMCombineEnablePerState[i][k] == false) { + locals->DSCDelayPerState[i][k] = + dscceComputeDelay( + mode_lib->vba.DSCInputBitPerComponent[k], + mode_lib->vba.bpp, + dml_ceil( + mode_lib->vba.HActive[k] + / mode_lib->vba.slices, + 1.0), + mode_lib->vba.slices, + mode_lib->vba.OutputFormat[k]) + + dscComputeDelay( + mode_lib->vba.OutputFormat[k]); + } else { + locals->DSCDelayPerState[i][k] = + 2.0 * (dscceComputeDelay( + mode_lib->vba.DSCInputBitPerComponent[k], + mode_lib->vba.bpp, + dml_ceil(mode_lib->vba.HActive[k] / mode_lib->vba.slices, 1.0), + mode_lib->vba.slices / 2, + mode_lib->vba.OutputFormat[k]) + + dscComputeDelay(mode_lib->vba.OutputFormat[k])); + } + locals->DSCDelayPerState[i][k] = + locals->DSCDelayPerState[i][k] * mode_lib->vba.PixelClock[k] / mode_lib->vba.PixelClockBackEnd[k]; + } else { + locals->DSCDelayPerState[i][k] = 0.0; + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { + for (j = 0; j <= mode_lib->vba.NumberOfActivePlanes - 1; j++) { + if (mode_lib->vba.BlendingAndTiming[k] == m && locals->RequiresDSC[i][m] == true) + locals->DSCDelayPerState[i][k] = locals->DSCDelayPerState[i][m]; + } + } + } + } + + //Prefetch Check + for (i = 0; i <= mode_lib->vba.soc.num_states; ++i) { + for (j = 0; j <= 1; ++j) { + locals->TotalNumberOfDCCActiveDPP[i][j] = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.DCCEnable[k] == true) + locals->TotalNumberOfDCCActiveDPP[i][j] = locals->TotalNumberOfDCCActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; + } + } + } + + mode_lib->vba.UrgentLatency = dml_max3( + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.UrgentLatencyPixelMixedWithVMData, + mode_lib->vba.UrgentLatencyVMDataOnly); + mode_lib->vba.PrefetchERROR = CalculateMinAndMaxPrefetchMode( + mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, + &mode_lib->vba.MinPrefetchMode, + &mode_lib->vba.MaxPrefetchMode); + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (j = 0; j < 2; j++) { + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->RequiredDPPCLKThisState[k] = locals->RequiredDPPCLK[i][j][k]; + locals->NoOfDPPThisState[k] = locals->NoOfDPP[i][j][k]; + if (locals->ODMCombineEnablePerState[i][k] == true) { + locals->SwathWidthYThisState[k] = + dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])); + } else { + locals->SwathWidthYThisState[k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k]; + } + mode_lib->vba.SwathWidthGranularityY = 256.0 + / dml_ceil(locals->BytePerPixelInDETY[k], 1.0) + / locals->MaxSwathHeightY[k]; + mode_lib->vba.RoundedUpMaxSwathSizeBytesY = + (dml_ceil(locals->SwathWidthYThisState[k] - 1.0, mode_lib->vba.SwathWidthGranularityY) + + mode_lib->vba.SwathWidthGranularityY) * locals->BytePerPixelInDETY[k] * locals->MaxSwathHeightY[k]; + if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { + mode_lib->vba.RoundedUpMaxSwathSizeBytesY = dml_ceil( + mode_lib->vba.RoundedUpMaxSwathSizeBytesY, + 256.0) + 256; + } + if (locals->MaxSwathHeightC[k] > 0.0) { + mode_lib->vba.SwathWidthGranularityC = 256.0 / dml_ceil(locals->BytePerPixelInDETC[k], 2.0) / locals->MaxSwathHeightC[k]; + mode_lib->vba.RoundedUpMaxSwathSizeBytesC = (dml_ceil(locals->SwathWidthYThisState[k] / 2.0 - 1.0, mode_lib->vba.SwathWidthGranularityC) + + mode_lib->vba.SwathWidthGranularityC) * locals->BytePerPixelInDETC[k] * locals->MaxSwathHeightC[k]; + if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { + mode_lib->vba.RoundedUpMaxSwathSizeBytesC = dml_ceil(mode_lib->vba.RoundedUpMaxSwathSizeBytesC, 256.0) + 256; + } + } else { + mode_lib->vba.RoundedUpMaxSwathSizeBytesC = 0.0; + } + if (mode_lib->vba.RoundedUpMaxSwathSizeBytesY + mode_lib->vba.RoundedUpMaxSwathSizeBytesC + <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) { + locals->SwathHeightYThisState[k] = locals->MaxSwathHeightY[k]; + locals->SwathHeightCThisState[k] = locals->MaxSwathHeightC[k]; + } else { + locals->SwathHeightYThisState[k] = + locals->MinSwathHeightY[k]; + locals->SwathHeightCThisState[k] = + locals->MinSwathHeightC[k]; + } + } + + CalculateDCFCLKDeepSleep( + mode_lib, + mode_lib->vba.NumberOfActivePlanes, + locals->BytePerPixelInDETY, + locals->BytePerPixelInDETC, + mode_lib->vba.VRatio, + locals->SwathWidthYThisState, + locals->NoOfDPPThisState, + mode_lib->vba.HRatio, + mode_lib->vba.PixelClock, + locals->PSCL_FACTOR, + locals->PSCL_FACTOR_CHROMA, + locals->RequiredDPPCLKThisState, + &mode_lib->vba.ProjectedDCFCLKDeepSleep); + + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8)) { + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( + mode_lib, + mode_lib->vba.DCCEnable[k], + locals->Read256BlockHeightC[k], + locals->Read256BlockWidthC[k], + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(locals->BytePerPixelInDETC[k], 2.0), + mode_lib->vba.SourceScan[k], + mode_lib->vba.ViewportWidth[k] / 2.0, + mode_lib->vba.ViewportHeight[k] / 2.0, + locals->SwathWidthYThisState[k] / 2.0, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.HostVMEnable, + mode_lib->vba.HostVMMaxPageTableLevels, + mode_lib->vba.HostVMCachedPageTableLevels, + mode_lib->vba.VMMPageSize, + mode_lib->vba.PTEBufferSizeInRequestsChroma, + mode_lib->vba.PitchC[k], + 0.0, + &locals->MacroTileWidthC[k], + &mode_lib->vba.MetaRowBytesC, + &mode_lib->vba.DPTEBytesPerRowC, + &locals->PTEBufferSizeNotExceededC[i][j][k], + locals->dpte_row_width_chroma_ub, + &locals->dpte_row_height_chroma[k], + &locals->meta_req_width_chroma[k], + &locals->meta_req_height_chroma[k], + &locals->meta_row_width_chroma[k], + &locals->meta_row_height_chroma[k], + &locals->vm_group_bytes_chroma, + &locals->dpte_group_bytes_chroma, + locals->PixelPTEReqWidthC, + locals->PixelPTEReqHeightC, + locals->PTERequestSizeC, + locals->dpde0_bytes_per_frame_ub_c, + locals->meta_pte_bytes_per_frame_ub_c); + locals->PrefetchLinesC[k] = CalculatePrefetchSourceLines( + mode_lib, + mode_lib->vba.VRatio[k]/2, + mode_lib->vba.VTAPsChroma[k], + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + locals->SwathHeightCThisState[k], + mode_lib->vba.ViewportYStartC[k], + &locals->PrefillC[k], + &locals->MaxNumSwC[k]); + locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma; + } else { + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0; + mode_lib->vba.MetaRowBytesC = 0.0; + mode_lib->vba.DPTEBytesPerRowC = 0.0; + locals->PrefetchLinesC[k] = 0.0; + locals->PTEBufferSizeNotExceededC[i][j][k] = true; + locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma; + } + mode_lib->vba.PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( + mode_lib, + mode_lib->vba.DCCEnable[k], + locals->Read256BlockHeightY[k], + locals->Read256BlockWidthY[k], + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(locals->BytePerPixelInDETY[k], 1.0), + mode_lib->vba.SourceScan[k], + mode_lib->vba.ViewportWidth[k], + mode_lib->vba.ViewportHeight[k], + locals->SwathWidthYThisState[k], + mode_lib->vba.GPUVMEnable, + mode_lib->vba.HostVMEnable, + mode_lib->vba.HostVMMaxPageTableLevels, + mode_lib->vba.HostVMCachedPageTableLevels, + mode_lib->vba.VMMPageSize, + locals->PTEBufferSizeInRequestsForLuma, + mode_lib->vba.PitchY[k], + mode_lib->vba.DCCMetaPitchY[k], + &locals->MacroTileWidthY[k], + &mode_lib->vba.MetaRowBytesY, + &mode_lib->vba.DPTEBytesPerRowY, + &locals->PTEBufferSizeNotExceededY[i][j][k], + locals->dpte_row_width_luma_ub, + &locals->dpte_row_height[k], + &locals->meta_req_width[k], + &locals->meta_req_height[k], + &locals->meta_row_width[k], + &locals->meta_row_height[k], + &locals->vm_group_bytes[k], + &locals->dpte_group_bytes[k], + locals->PixelPTEReqWidthY, + locals->PixelPTEReqHeightY, + locals->PTERequestSizeY, + locals->dpde0_bytes_per_frame_ub_l, + locals->meta_pte_bytes_per_frame_ub_l); + locals->PrefetchLinesY[k] = CalculatePrefetchSourceLines( + mode_lib, + mode_lib->vba.VRatio[k], + mode_lib->vba.vtaps[k], + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + locals->SwathHeightYThisState[k], + mode_lib->vba.ViewportYStartY[k], + &locals->PrefillY[k], + &locals->MaxNumSwY[k]); + locals->PDEAndMetaPTEBytesPerFrame[k] = + mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC; + locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; + locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; + + CalculateActiveRowBandwidth( + mode_lib->vba.GPUVMEnable, + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.VRatio[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.HTotal[k] / + mode_lib->vba.PixelClock[k], + mode_lib->vba.MetaRowBytesY, + mode_lib->vba.MetaRowBytesC, + locals->meta_row_height[k], + locals->meta_row_height_chroma[k], + mode_lib->vba.DPTEBytesPerRowY, + mode_lib->vba.DPTEBytesPerRowC, + locals->dpte_row_height[k], + locals->dpte_row_height_chroma[k], + &locals->meta_row_bw[k], + &locals->dpte_row_bw[k]); + } + mode_lib->vba.ExtraLatency = CalculateExtraLatency( + locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i], + locals->TotalNumberOfActiveDPP[i][j], + mode_lib->vba.PixelChunkSizeInKByte, + locals->TotalNumberOfDCCActiveDPP[i][j], + mode_lib->vba.MetaChunkSize, + locals->ReturnBWPerState[i], + mode_lib->vba.GPUVMEnable, + mode_lib->vba.HostVMEnable, + mode_lib->vba.NumberOfActivePlanes, + locals->NoOfDPPThisState, + locals->dpte_group_bytes, + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, + mode_lib->vba.HostVMMaxPageTableLevels, + mode_lib->vba.HostVMCachedPageTableLevels); + + mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if (mode_lib->vba.WritebackEnable[k] == true) { + locals->WritebackDelay[i][k] = mode_lib->vba.WritebackLatency + + CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackLumaHTaps[k], + mode_lib->vba.WritebackLumaVTaps[k], + mode_lib->vba.WritebackChromaHTaps[k], + mode_lib->vba.WritebackChromaVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k]) / locals->RequiredDISPCLK[i][j]; + } else { + locals->WritebackDelay[i][k] = 0.0; + } + for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { + if (mode_lib->vba.BlendingAndTiming[m] == k + && mode_lib->vba.WritebackEnable[m] + == true) { + locals->WritebackDelay[i][k] = dml_max(locals->WritebackDelay[i][k], + mode_lib->vba.WritebackLatency + CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[m], + mode_lib->vba.WritebackHRatio[m], + mode_lib->vba.WritebackVRatio[m], + mode_lib->vba.WritebackLumaHTaps[m], + mode_lib->vba.WritebackLumaVTaps[m], + mode_lib->vba.WritebackChromaHTaps[m], + mode_lib->vba.WritebackChromaVTaps[m], + mode_lib->vba.WritebackDestinationWidth[m]) / locals->RequiredDISPCLK[i][j]); + } + } + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { + if (mode_lib->vba.BlendingAndTiming[k] == m) { + locals->WritebackDelay[i][k] = locals->WritebackDelay[i][m]; + } + } + } + mode_lib->vba.MaxMaxVStartup = 0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] + - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0)); + mode_lib->vba.MaxMaxVStartup = dml_max(mode_lib->vba.MaxMaxVStartup, locals->MaximumVStartup[k]); + } + + mode_lib->vba.NextPrefetchMode = mode_lib->vba.MinPrefetchMode; + mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup; + do { + mode_lib->vba.PrefetchMode[i][j] = mode_lib->vba.NextPrefetchMode; + mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup; + + mode_lib->vba.TWait = CalculateTWait( + mode_lib->vba.PrefetchMode[i][j], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.UrgentLatency, + mode_lib->vba.SREnterPlusExitTime); + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + Pipe myPipe; + HostVM myHostVM; + + if (mode_lib->vba.XFCEnabled[k] == true) { + mode_lib->vba.XFCRemoteSurfaceFlipDelay = + CalculateRemoteSurfaceFlipDelay( + mode_lib, + mode_lib->vba.VRatio[k], + locals->SwathWidthYThisState[k], + dml_ceil(locals->BytePerPixelInDETY[k], 1.0), + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + mode_lib->vba.XFCTSlvVupdateOffset, + mode_lib->vba.XFCTSlvVupdateWidth, + mode_lib->vba.XFCTSlvVreadyOffset, + mode_lib->vba.XFCXBUFLatencyTolerance, + mode_lib->vba.XFCFillBWOverhead, + mode_lib->vba.XFCSlvChunkSize, + mode_lib->vba.XFCBusTransportTime, + mode_lib->vba.TimeCalc, + mode_lib->vba.TWait, + &mode_lib->vba.SrcActiveDrainRate, + &mode_lib->vba.TInitXFill, + &mode_lib->vba.TslvChk); + } else { + mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0; + } + + myPipe.DPPCLK = locals->RequiredDPPCLK[i][j][k]; + myPipe.DISPCLK = locals->RequiredDISPCLK[i][j]; + myPipe.PixelClock = mode_lib->vba.PixelClock[k]; + myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep; + myPipe.DPPPerPlane = locals->NoOfDPP[i][j][k]; + myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; + myPipe.SourceScan = mode_lib->vba.SourceScan[k]; + myPipe.BlockWidth256BytesY = locals->Read256BlockWidthY[k]; + myPipe.BlockHeight256BytesY = locals->Read256BlockHeightY[k]; + myPipe.BlockWidth256BytesC = locals->Read256BlockWidthC[k]; + myPipe.BlockHeight256BytesC = locals->Read256BlockHeightC[k]; + myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; + myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; + myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; + myPipe.HTotal = mode_lib->vba.HTotal[k]; + + + myHostVM.Enable = mode_lib->vba.HostVMEnable; + myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels; + myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels; + + + mode_lib->vba.IsErrorResult[i][j][k] = CalculatePrefetchSchedule( + mode_lib, + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, + &myPipe, + locals->DSCDelayPerState[i][k], + mode_lib->vba.DPPCLKDelaySubtotal, + mode_lib->vba.DPPCLKDelaySCL, + mode_lib->vba.DPPCLKDelaySCLLBOnly, + mode_lib->vba.DPPCLKDelayCNVCFormater, + mode_lib->vba.DPPCLKDelayCNVCCursor, + mode_lib->vba.DISPCLKDelaySubtotal, + locals->SwathWidthYThisState[k] / mode_lib->vba.HRatio[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.MaxInterDCNTileRepeaters, + dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[k]), + locals->MaximumVStartup[k], + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.GPUVMEnable, + &myHostVM, + mode_lib->vba.DynamicMetadataEnable[k], + mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], + mode_lib->vba.DynamicMetadataTransmittedBytes[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.UrgentLatency, + mode_lib->vba.ExtraLatency, + mode_lib->vba.TimeCalc, + locals->PDEAndMetaPTEBytesPerFrame[k], + locals->MetaRowBytes[k], + locals->DPTEBytesPerRow[k], + locals->PrefetchLinesY[k], + locals->SwathWidthYThisState[k], + locals->BytePerPixelInDETY[k], + locals->PrefillY[k], + locals->MaxNumSwY[k], + locals->PrefetchLinesC[k], + locals->BytePerPixelInDETC[k], + locals->PrefillC[k], + locals->MaxNumSwC[k], + locals->SwathHeightYThisState[k], + locals->SwathHeightCThisState[k], + mode_lib->vba.TWait, + mode_lib->vba.XFCEnabled[k], + mode_lib->vba.XFCRemoteSurfaceFlipDelay, + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + &locals->dst_x_after_scaler, + &locals->dst_y_after_scaler, + &locals->LineTimesForPrefetch[k], + &locals->PrefetchBW[k], + &locals->LinesForMetaPTE[k], + &locals->LinesForMetaAndDPTERow[k], + &locals->VRatioPreY[i][j][k], + &locals->VRatioPreC[i][j][k], + &locals->RequiredPrefetchPixelDataBWLuma[i][j][k], + &locals->RequiredPrefetchPixelDataBWChroma[i][j][k], + &locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, + &locals->Tno_bw[k], + &locals->prefetch_vmrow_bw[k], + locals->swath_width_luma_ub, + locals->swath_width_chroma_ub, + &mode_lib->vba.VUpdateOffsetPix[k], + &mode_lib->vba.VUpdateWidthPix[k], + &mode_lib->vba.VReadyOffsetPix[k]); + } + mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = 0.0; + mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + uint m; + + locals->cursor_bw[k] = 0; + locals->cursor_bw_pre[k] = 0; + for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) { + locals->cursor_bw[k] = mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] + / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; + locals->cursor_bw_pre[k] = mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] + / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * locals->VRatioPreY[i][j][k]; + } + + CalculateUrgentBurstFactor( + mode_lib->vba.DETBufferSizeInKByte, + locals->SwathHeightYThisState[k], + locals->SwathHeightCThisState[k], + locals->SwathWidthYThisState[k], + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + mode_lib->vba.UrgentLatency, + mode_lib->vba.CursorBufferSize, + mode_lib->vba.CursorWidth[k][0] + mode_lib->vba.CursorWidth[k][1], + dml_max(mode_lib->vba.CursorBPP[k][0], mode_lib->vba.CursorBPP[k][1]), + mode_lib->vba.VRatio[k], + locals->VRatioPreY[i][j][k], + locals->VRatioPreC[i][j][k], + locals->BytePerPixelInDETY[k], + locals->BytePerPixelInDETC[k], + &locals->UrgentBurstFactorCursor[k], + &locals->UrgentBurstFactorCursorPre[k], + &locals->UrgentBurstFactorLuma[k], + &locals->UrgentBurstFactorLumaPre[k], + &locals->UrgentBurstFactorChroma[k], + &locals->UrgentBurstFactorChromaPre[k], + &locals->NotEnoughUrgentLatencyHiding, + &locals->NotEnoughUrgentLatencyHidingPre); + + if (mode_lib->vba.UseUrgentBurstBandwidth == false) { + locals->UrgentBurstFactorCursor[k] = 1; + locals->UrgentBurstFactorCursorPre[k] = 1; + locals->UrgentBurstFactorLuma[k] = 1; + locals->UrgentBurstFactorLumaPre[k] = 1; + locals->UrgentBurstFactorChroma[k] = 1; + locals->UrgentBurstFactorChromaPre[k] = 1; + } + + mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = mode_lib->vba.MaximumReadBandwidthWithoutPrefetch + + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k] + locals->ReadBandwidthLuma[k] + * locals->UrgentBurstFactorLuma[k] + locals->ReadBandwidthChroma[k] + * locals->UrgentBurstFactorChroma[k] + locals->meta_row_bw[k] + locals->dpte_row_bw[k]; + mode_lib->vba.MaximumReadBandwidthWithPrefetch = mode_lib->vba.MaximumReadBandwidthWithPrefetch + + dml_max3(locals->prefetch_vmrow_bw[k], + locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k] + locals->ReadBandwidthChroma[k] + * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k] + + locals->meta_row_bw[k] + locals->dpte_row_bw[k], + locals->RequiredPrefetchPixelDataBWLuma[i][j][k] * locals->UrgentBurstFactorLumaPre[k] + + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] * locals->UrgentBurstFactorChromaPre[k] + + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]); + } + locals->BandwidthWithoutPrefetchSupported[i] = true; + if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i] + || locals->NotEnoughUrgentLatencyHiding == 1) { + locals->BandwidthWithoutPrefetchSupported[i] = false; + } + + locals->PrefetchSupported[i][j] = true; + if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i] + || locals->NotEnoughUrgentLatencyHiding == 1 + || locals->NotEnoughUrgentLatencyHidingPre == 1) { + locals->PrefetchSupported[i][j] = false; + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->LineTimesForPrefetch[k] < 2.0 + || locals->LinesForMetaPTE[k] >= 32.0 + || locals->LinesForMetaAndDPTERow[k] >= 16.0 + || mode_lib->vba.IsErrorResult[i][j][k] == true) { + locals->PrefetchSupported[i][j] = false; + } + } + locals->VRatioInPrefetchSupported[i][j] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->VRatioPreY[i][j][k] > 4.0 + || locals->VRatioPreC[i][j][k] > 4.0 + || mode_lib->vba.IsErrorResult[i][j][k] == true) { + locals->VRatioInPrefetchSupported[i][j] = false; + } + } + mode_lib->vba.AnyLinesForVMOrRowTooLarge = false; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (locals->LinesForMetaAndDPTERow[k] >= 16 || locals->LinesForMetaPTE[k] >= 32) { + mode_lib->vba.AnyLinesForVMOrRowTooLarge = true; + } + } + + if (mode_lib->vba.MaxVStartup <= 13 || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) { + mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup; + mode_lib->vba.NextPrefetchMode = mode_lib->vba.NextPrefetchMode + 1; + } else { + mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1; + } + } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true) + && (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup + || mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode)); + + if (locals->PrefetchSupported[i][j] == true && locals->VRatioInPrefetchSupported[i][j] == true) { + mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i]; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.BandwidthAvailableForImmediateFlip + - dml_max(locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k] + + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k] + + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k], + locals->RequiredPrefetchPixelDataBWLuma[i][j][k] * locals->UrgentBurstFactorLumaPre[k] + + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] * locals->UrgentBurstFactorChromaPre[k] + + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]); + } + mode_lib->vba.TotImmediateFlipBytes = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes + + locals->PDEAndMetaPTEBytesPerFrame[k] + locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k]; + } + + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + CalculateFlipSchedule( + mode_lib, + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, + mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, + mode_lib->vba.ExtraLatency, + mode_lib->vba.UrgentLatency, + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.HostVMEnable, + mode_lib->vba.HostVMMaxPageTableLevels, + mode_lib->vba.HostVMCachedPageTableLevels, + mode_lib->vba.GPUVMEnable, + locals->PDEAndMetaPTEBytesPerFrame[k], + locals->MetaRowBytes[k], + locals->DPTEBytesPerRow[k], + mode_lib->vba.BandwidthAvailableForImmediateFlip, + mode_lib->vba.TotImmediateFlipBytes, + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + mode_lib->vba.VRatio[k], + locals->Tno_bw[k], + mode_lib->vba.DCCEnable[k], + locals->dpte_row_height[k], + locals->meta_row_height[k], + locals->dpte_row_height_chroma[k], + locals->meta_row_height_chroma[k], + &locals->DestinationLinesToRequestVMInImmediateFlip[k], + &locals->DestinationLinesToRequestRowInImmediateFlip[k], + &locals->final_flip_bw[k], + &locals->ImmediateFlipSupportedForPipe[k]); + } + mode_lib->vba.total_dcn_read_bw_with_flip = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.total_dcn_read_bw_with_flip = mode_lib->vba.total_dcn_read_bw_with_flip + dml_max3( + locals->prefetch_vmrow_bw[k], + locals->final_flip_bw[k] + locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k] + + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k] + + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k], + locals->final_flip_bw[k] + locals->RequiredPrefetchPixelDataBWLuma[i][j][k] + * locals->UrgentBurstFactorLumaPre[k] + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] + * locals->UrgentBurstFactorChromaPre[k] + locals->cursor_bw_pre[k] + * locals->UrgentBurstFactorCursorPre[k]); + } + locals->ImmediateFlipSupportedForState[i][j] = true; + if (mode_lib->vba.total_dcn_read_bw_with_flip + > locals->ReturnBWPerState[i]) { + locals->ImmediateFlipSupportedForState[i][j] = false; + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->ImmediateFlipSupportedForPipe[k] == false) { + locals->ImmediateFlipSupportedForState[i][j] = false; + } + } + } else { + locals->ImmediateFlipSupportedForState[i][j] = false; + } + mode_lib->vba.UrgentOutOfOrderReturnPerChannel = dml_max3( + mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly, + mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, + mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly); + CalculateWatermarksAndDRAMSpeedChangeSupport( + mode_lib, + mode_lib->vba.PrefetchMode[i][j], + mode_lib->vba.NumberOfActivePlanes, + mode_lib->vba.MaxLineBufferLines, + mode_lib->vba.LineBufferSize, + mode_lib->vba.DPPOutputBufferPixels, + mode_lib->vba.DETBufferSizeInKByte, + mode_lib->vba.WritebackInterfaceLumaBufferSize, + mode_lib->vba.WritebackInterfaceChromaBufferSize, + mode_lib->vba.DCFCLKPerState[i], + mode_lib->vba.UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels, + locals->ReturnBWPerState[i], + mode_lib->vba.GPUVMEnable, + locals->dpte_group_bytes, + mode_lib->vba.MetaChunkSize, + mode_lib->vba.UrgentLatency, + mode_lib->vba.ExtraLatency, + mode_lib->vba.WritebackLatency, + mode_lib->vba.WritebackChunkSize, + mode_lib->vba.SOCCLKPerState[i], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.SRExitTime, + mode_lib->vba.SREnterPlusExitTime, + mode_lib->vba.ProjectedDCFCLKDeepSleep, + locals->NoOfDPPThisState, + mode_lib->vba.DCCEnable, + locals->RequiredDPPCLKThisState, + locals->SwathWidthYSingleDPP, + locals->SwathHeightYThisState, + locals->ReadBandwidthLuma, + locals->SwathHeightCThisState, + locals->ReadBandwidthChroma, + mode_lib->vba.LBBitPerPixel, + locals->SwathWidthYThisState, + mode_lib->vba.HRatio, + mode_lib->vba.vtaps, + mode_lib->vba.VTAPsChroma, + mode_lib->vba.VRatio, + mode_lib->vba.HTotal, + mode_lib->vba.PixelClock, + mode_lib->vba.BlendingAndTiming, + locals->BytePerPixelInDETY, + locals->BytePerPixelInDETC, + mode_lib->vba.WritebackEnable, + mode_lib->vba.WritebackPixelFormat, + mode_lib->vba.WritebackDestinationWidth, + mode_lib->vba.WritebackDestinationHeight, + mode_lib->vba.WritebackSourceHeight, + &locals->DRAMClockChangeSupport[i][j], + &mode_lib->vba.UrgentWatermark, + &mode_lib->vba.WritebackUrgentWatermark, + &mode_lib->vba.DRAMClockChangeWatermark, + &mode_lib->vba.WritebackDRAMClockChangeWatermark, + &mode_lib->vba.StutterExitWatermark, + &mode_lib->vba.StutterEnterPlusExitWatermark, + &mode_lib->vba.MinActiveDRAMClockChangeLatencySupported); + } + } + + /*Vertical Active BW support*/ + { + double MaxTotalVActiveRDBandwidth = 0.0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + locals->ReadBandwidth[k]; + } + for (i = 0; i <= mode_lib->vba.soc.num_states; ++i) { + locals->MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min( + locals->IdealSDPPortBandwidthPerState[i] * + mode_lib->vba.MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation + / 100.0, mode_lib->vba.DRAMSpeedPerState[i] * + mode_lib->vba.NumberOfChannels * + mode_lib->vba.DRAMChannelWidth * + mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation + / 100.0); + + if (MaxTotalVActiveRDBandwidth <= locals->MaxTotalVerticalActiveAvailableBandwidth[i]) { + locals->TotalVerticalActiveBandwidthSupport[i] = true; + } else { + locals->TotalVerticalActiveBandwidthSupport[i] = false; + } + } + } + + /*PTE Buffer Size Check*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (j = 0; j < 2; j++) { + locals->PTEBufferSizeNotExceeded[i][j] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->PTEBufferSizeNotExceededY[i][j][k] == false + || locals->PTEBufferSizeNotExceededC[i][j][k] == false) { + locals->PTEBufferSizeNotExceeded[i][j] = false; + } + } + } + } + /*Cursor Support Check*/ + + mode_lib->vba.CursorSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.CursorWidth[k][0] > 0.0) { + for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) { + if (mode_lib->vba.CursorBPP[k][m] == 64 && mode_lib->vba.Cursor64BppSupport == false) { + mode_lib->vba.CursorSupport = false; + } + } + } + } + /*Valid Pitch Check*/ + + mode_lib->vba.PitchSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->AlignedYPitch[k] = dml_ceil( + dml_max(mode_lib->vba.PitchY[k], mode_lib->vba.ViewportWidth[k]), + locals->MacroTileWidthY[k]); + if (locals->AlignedYPitch[k] > mode_lib->vba.PitchY[k]) { + mode_lib->vba.PitchSupport = false; + } + if (mode_lib->vba.DCCEnable[k] == true) { + locals->AlignedDCCMetaPitch[k] = dml_ceil( + dml_max( + mode_lib->vba.DCCMetaPitchY[k], + mode_lib->vba.ViewportWidth[k]), + 64.0 * locals->Read256BlockWidthY[k]); + } else { + locals->AlignedDCCMetaPitch[k] = mode_lib->vba.DCCMetaPitchY[k]; + } + if (locals->AlignedDCCMetaPitch[k] > mode_lib->vba.DCCMetaPitchY[k]) { + mode_lib->vba.PitchSupport = false; + } + if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) { + locals->AlignedCPitch[k] = dml_ceil( + dml_max( + mode_lib->vba.PitchC[k], + mode_lib->vba.ViewportWidth[k] / 2.0), + locals->MacroTileWidthC[k]); + } else { + locals->AlignedCPitch[k] = mode_lib->vba.PitchC[k]; + } + if (locals->AlignedCPitch[k] > mode_lib->vba.PitchC[k]) { + mode_lib->vba.PitchSupport = false; + } + } + /*Mode Support, Voltage State and SOC Configuration*/ + + for (i = mode_lib->vba.soc.num_states; i >= 0; i--) { + for (j = 0; j < 2; j++) { + enum dm_validation_status status = DML_VALIDATION_OK; + + if (mode_lib->vba.ScaleRatioAndTapsSupport != true) { + status = DML_FAIL_SCALE_RATIO_TAP; + } else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) { + status = DML_FAIL_SOURCE_PIXEL_FORMAT; + } else if (locals->ViewportSizeSupport[i] != true) { + status = DML_FAIL_VIEWPORT_SIZE; + } else if (locals->DIOSupport[i] != true) { + status = DML_FAIL_DIO_SUPPORT; + } else if (locals->NotEnoughDSCUnits[i] != false) { + status = DML_FAIL_NOT_ENOUGH_DSC; + } else if (locals->DSCCLKRequiredMoreThanSupported[i] != false) { + status = DML_FAIL_DSC_CLK_REQUIRED; + } else if (locals->ROBSupport[i] != true) { + status = DML_FAIL_REORDERING_BUFFER; + } else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) { + status = DML_FAIL_DISPCLK_DPPCLK; + } else if (locals->TotalAvailablePipesSupport[i][j] != true) { + status = DML_FAIL_TOTAL_AVAILABLE_PIPES; + } else if (mode_lib->vba.NumberOfOTGSupport != true) { + status = DML_FAIL_NUM_OTG; + } else if (mode_lib->vba.WritebackModeSupport != true) { + status = DML_FAIL_WRITEBACK_MODE; + } else if (mode_lib->vba.WritebackLatencySupport != true) { + status = DML_FAIL_WRITEBACK_LATENCY; + } else if (mode_lib->vba.WritebackScaleRatioAndTapsSupport != true) { + status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP; + } else if (mode_lib->vba.CursorSupport != true) { + status = DML_FAIL_CURSOR_SUPPORT; + } else if (mode_lib->vba.PitchSupport != true) { + status = DML_FAIL_PITCH_SUPPORT; + } else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) { + status = DML_FAIL_TOTAL_V_ACTIVE_BW; + } else if (locals->PTEBufferSizeNotExceeded[i][j] != true) { + status = DML_FAIL_PTE_BUFFER_SIZE; + } else if (mode_lib->vba.NonsupportedDSCInputBPC != false) { + status = DML_FAIL_DSC_INPUT_BPC; + } else if ((mode_lib->vba.HostVMEnable != false + && locals->ImmediateFlipSupportedForState[i][j] != true)) { + status = DML_FAIL_HOST_VM_IMMEDIATE_FLIP; + } else if (locals->PrefetchSupported[i][j] != true) { + status = DML_FAIL_PREFETCH_SUPPORT; + } else if (locals->VRatioInPrefetchSupported[i][j] != true) { + status = DML_FAIL_V_RATIO_PREFETCH; + } + + if (status == DML_VALIDATION_OK) { + locals->ModeSupport[i][j] = true; + } else { + locals->ModeSupport[i][j] = false; + } + locals->ValidationStatus[i] = status; + } + } + { + unsigned int MaximumMPCCombine = 0; + mode_lib->vba.VoltageLevel = mode_lib->vba.soc.num_states + 1; + for (i = mode_lib->vba.VoltageOverrideLevel; i <= mode_lib->vba.soc.num_states; i++) { + if (locals->ModeSupport[i][0] == true || locals->ModeSupport[i][1] == true) { + mode_lib->vba.VoltageLevel = i; + if (locals->ModeSupport[i][1] == true && (locals->ModeSupport[i][0] == false + || mode_lib->vba.WhenToDoMPCCombine == dm_mpc_always_when_possible + || (mode_lib->vba.WhenToDoMPCCombine == dm_mpc_reduce_voltage_and_clocks + && ((locals->DRAMClockChangeSupport[i][1] == dm_dram_clock_change_vactive + && locals->DRAMClockChangeSupport[i][0] != dm_dram_clock_change_vactive) + || (locals->DRAMClockChangeSupport[i][1] == dm_dram_clock_change_vblank + && locals->DRAMClockChangeSupport[i][0] == dm_dram_clock_change_unsupported))))) { + MaximumMPCCombine = 1; + } else { + MaximumMPCCombine = 0; + } + break; + } + } + mode_lib->vba.ImmediateFlipSupport = + locals->ImmediateFlipSupportedForState[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.DPPPerPlane[k] = locals->NoOfDPP[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; + locals->DPPCLK[k] = locals->RequiredDPPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; + } + mode_lib->vba.DISPCLK = locals->RequiredDISPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; + mode_lib->vba.maxMpcComb = MaximumMPCCombine; + } + mode_lib->vba.DCFCLK = mode_lib->vba.DCFCLKPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel]; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + mode_lib->vba.ODMCombineEnabled[k] = + locals->ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k]; + } else { + mode_lib->vba.ODMCombineEnabled[k] = 0; + } + mode_lib->vba.DSCEnabled[k] = + locals->RequiresDSC[mode_lib->vba.VoltageLevel][k]; + mode_lib->vba.OutputBpp[k] = + locals->OutputBppPerState[mode_lib->vba.VoltageLevel][k]; + } +} + +static void CalculateWatermarksAndDRAMSpeedChangeSupport( + struct display_mode_lib *mode_lib, + unsigned int PrefetchMode, + unsigned int NumberOfActivePlanes, + unsigned int MaxLineBufferLines, + unsigned int LineBufferSize, + unsigned int DPPOutputBufferPixels, + double DETBufferSizeInKByte, + unsigned int WritebackInterfaceLumaBufferSize, + unsigned int WritebackInterfaceChromaBufferSize, + double DCFCLK, + double UrgentOutOfOrderReturn, + double ReturnBW, + bool GPUVMEnable, + long dpte_group_bytes[], + unsigned int MetaChunkSize, + double UrgentLatency, + double ExtraLatency, + double WritebackLatency, + double WritebackChunkSize, + double SOCCLK, + double DRAMClockChangeLatency, + double SRExitTime, + double SREnterPlusExitTime, + double DCFCLKDeepSleep, + int DPPPerPlane[], + bool DCCEnable[], + double DPPCLK[], + unsigned int SwathWidthSingleDPPY[], + unsigned int SwathHeightY[], + double ReadBandwidthPlaneLuma[], + unsigned int SwathHeightC[], + double ReadBandwidthPlaneChroma[], + unsigned int LBBitPerPixel[], + unsigned int SwathWidthY[], + double HRatio[], + unsigned int vtaps[], + unsigned int VTAPsChroma[], + double VRatio[], + unsigned int HTotal[], + double PixelClock[], + unsigned int BlendingAndTiming[], + double BytePerPixelDETY[], + double BytePerPixelDETC[], + bool WritebackEnable[], + enum source_format_class WritebackPixelFormat[], + double WritebackDestinationWidth[], + double WritebackDestinationHeight[], + double WritebackSourceHeight[], + enum clock_change_support *DRAMClockChangeSupport, + double *UrgentWatermark, + double *WritebackUrgentWatermark, + double *DRAMClockChangeWatermark, + double *WritebackDRAMClockChangeWatermark, + double *StutterExitWatermark, + double *StutterEnterPlusExitWatermark, + double *MinActiveDRAMClockChangeLatencySupported) +{ + double EffectiveLBLatencyHidingY; + double EffectiveLBLatencyHidingC; + double DPPOutputBufferLinesY; + double DPPOutputBufferLinesC; + double DETBufferSizeY; + double DETBufferSizeC; + double LinesInDETY[DC__NUM_DPP__MAX]; + double LinesInDETC; + unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; + unsigned int LinesInDETCRoundedDownToSwath; + double FullDETBufferingTimeY[DC__NUM_DPP__MAX]; + double FullDETBufferingTimeC; + double ActiveDRAMClockChangeLatencyMarginY; + double ActiveDRAMClockChangeLatencyMarginC; + double WritebackDRAMClockChangeLatencyMargin; + double PlaneWithMinActiveDRAMClockChangeMargin; + double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank; + double FullDETBufferingTimeYStutterCriticalPlane = 0; + double TimeToFinishSwathTransferStutterCriticalPlane = 0; + uint k, j; + + mode_lib->vba.TotalActiveDPP = 0; + mode_lib->vba.TotalDCCActiveDPP = 0; + for (k = 0; k < NumberOfActivePlanes; ++k) { + mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + DPPPerPlane[k]; + if (DCCEnable[k] == true) { + mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + DPPPerPlane[k]; + } + } + + mode_lib->vba.TotalDataReadBandwidth = 0; + for (k = 0; k < NumberOfActivePlanes; ++k) { + mode_lib->vba.TotalDataReadBandwidth = mode_lib->vba.TotalDataReadBandwidth + + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k]; + } + + *UrgentWatermark = UrgentLatency + ExtraLatency; + + *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark; + + mode_lib->vba.TotalActiveWriteback = 0; + for (k = 0; k < NumberOfActivePlanes; ++k) { + if (WritebackEnable[k] == true) { + mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1; + } + } + + if (mode_lib->vba.TotalActiveWriteback <= 1) { + *WritebackUrgentWatermark = WritebackLatency; + } else { + *WritebackUrgentWatermark = WritebackLatency + + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; + } + + if (mode_lib->vba.TotalActiveWriteback <= 1) { + *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency; + } else { + *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; + } + + for (k = 0; k < NumberOfActivePlanes; ++k) { + + mode_lib->vba.LBLatencyHidingSourceLinesY = dml_min((double) MaxLineBufferLines, + dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) + - (vtaps[k] - 1); + + mode_lib->vba.LBLatencyHidingSourceLinesC = dml_min((double) MaxLineBufferLines, + dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / 2 / dml_max(HRatio[k] / 2, 1.0)), 1)) + - (VTAPsChroma[k] - 1); + + EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY / VRatio[k] + * (HTotal[k] / PixelClock[k]); + + EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC + / (VRatio[k] / 2) * (HTotal[k] / PixelClock[k]); + + if (SwathWidthY[k] > 2 * DPPOutputBufferPixels) { + DPPOutputBufferLinesY = (double) DPPOutputBufferPixels / SwathWidthY[k]; + } else if (SwathWidthY[k] > DPPOutputBufferPixels) { + DPPOutputBufferLinesY = 0.5; + } else { + DPPOutputBufferLinesY = 1; + } + + if (SwathWidthY[k] / 2.0 > 2 * DPPOutputBufferPixels) { + DPPOutputBufferLinesC = (double) DPPOutputBufferPixels + / (SwathWidthY[k] / 2.0); + } else if (SwathWidthY[k] / 2.0 > DPPOutputBufferPixels) { + DPPOutputBufferLinesC = 0.5; + } else { + DPPOutputBufferLinesC = 1; + } + + CalculateDETBufferSize( + DETBufferSizeInKByte, + SwathHeightY[k], + SwathHeightC[k], + &DETBufferSizeY, + &DETBufferSizeC); + + LinesInDETY[k] = DETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; + LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); + FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] + * (HTotal[k] / PixelClock[k]) / VRatio[k]; + if (BytePerPixelDETC[k] > 0) { + LinesInDETC = DETBufferSizeC / BytePerPixelDETC[k] / (SwathWidthY[k] / 2.0); + LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]); + FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath + * (HTotal[k] / PixelClock[k]) / (VRatio[k] / 2); + } else { + LinesInDETC = 0; + FullDETBufferingTimeC = 999999; + } + + ActiveDRAMClockChangeLatencyMarginY = HTotal[k] / PixelClock[k] + * DPPOutputBufferLinesY + EffectiveLBLatencyHidingY + + FullDETBufferingTimeY[k] - *DRAMClockChangeWatermark; + + if (NumberOfActivePlanes > 1) { + ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY + - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k]; + } + + if (BytePerPixelDETC[k] > 0) { + ActiveDRAMClockChangeLatencyMarginC = HTotal[k] / PixelClock[k] + * DPPOutputBufferLinesC + EffectiveLBLatencyHidingC + + FullDETBufferingTimeC - *DRAMClockChangeWatermark; + if (NumberOfActivePlanes > 1) { + ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC + - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / (VRatio[k] / 2); + } + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min( + ActiveDRAMClockChangeLatencyMarginY, + ActiveDRAMClockChangeLatencyMarginC); + } else { + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; + } + + if (WritebackEnable[k] == true) { + if (WritebackPixelFormat[k] == dm_444_32) { + WritebackDRAMClockChangeLatencyMargin = (WritebackInterfaceLumaBufferSize + + WritebackInterfaceChromaBufferSize) / (WritebackDestinationWidth[k] + * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] + / PixelClock[k]) * 4) - *WritebackDRAMClockChangeWatermark; + } else { + WritebackDRAMClockChangeLatencyMargin = dml_min( + WritebackInterfaceLumaBufferSize * 8.0 / 10, + 2 * WritebackInterfaceChromaBufferSize * 8.0 / 10) / (WritebackDestinationWidth[k] + * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k])) + - *WritebackDRAMClockChangeWatermark; + } + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min( + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k], + WritebackDRAMClockChangeLatencyMargin); + } + } + + mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999; + PlaneWithMinActiveDRAMClockChangeMargin = 0; + for (k = 0; k < NumberOfActivePlanes; ++k) { + if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] + < mode_lib->vba.MinActiveDRAMClockChangeMargin) { + mode_lib->vba.MinActiveDRAMClockChangeMargin = + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]; + if (BlendingAndTiming[k] == k) { + PlaneWithMinActiveDRAMClockChangeMargin = k; + } else { + for (j = 0; j < NumberOfActivePlanes; ++j) { + if (BlendingAndTiming[k] == j) { + PlaneWithMinActiveDRAMClockChangeMargin = j; + } + } + } + } + } + + *MinActiveDRAMClockChangeLatencySupported = mode_lib->vba.MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency; + + SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; + for (k = 0; k < NumberOfActivePlanes; ++k) { + if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) + && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) + && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] + < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { + SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]; + } + } + + mode_lib->vba.TotalNumberOfActiveOTG = 0; + for (k = 0; k < NumberOfActivePlanes; ++k) { + if (BlendingAndTiming[k] == k) { + mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + 1; + } + } + + if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) { + *DRAMClockChangeSupport = dm_dram_clock_change_vactive; + } else if (((mode_lib->vba.SynchronizedVBlank == true + || mode_lib->vba.TotalNumberOfActiveOTG == 1 + || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) + && PrefetchMode == 0)) { + *DRAMClockChangeSupport = dm_dram_clock_change_vblank; + } else { + *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; + } + + FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[0]; + for (k = 0; k < NumberOfActivePlanes; ++k) { + if (FullDETBufferingTimeY[k] <= FullDETBufferingTimeYStutterCriticalPlane) { + TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k] + - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k])) + * (HTotal[k] / PixelClock[k]) / VRatio[k]; + } + } + + *StutterExitWatermark = SRExitTime + mode_lib->vba.LastPixelOfLineExtraWatermark + + ExtraLatency + 10 / DCFCLKDeepSleep; + *StutterEnterPlusExitWatermark = dml_max( + SREnterPlusExitTime + mode_lib->vba.LastPixelOfLineExtraWatermark + + ExtraLatency + 10 / DCFCLKDeepSleep, + TimeToFinishSwathTransferStutterCriticalPlane); + +} + +static void CalculateDCFCLKDeepSleep( + struct display_mode_lib *mode_lib, + unsigned int NumberOfActivePlanes, + double BytePerPixelDETY[], + double BytePerPixelDETC[], + double VRatio[], + unsigned int SwathWidthY[], + int DPPPerPlane[], + double HRatio[], + double PixelClock[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double DPPCLK[], + double *DCFCLKDeepSleep) +{ + uint k; + double DisplayPipeLineDeliveryTimeLuma; + double DisplayPipeLineDeliveryTimeChroma; + //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX]; + + for (k = 0; k < NumberOfActivePlanes; ++k) { + if (VRatio[k] <= 1) { + DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] + / HRatio[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] + / DPPCLK[k]; + } + if (BytePerPixelDETC[k] == 0) { + DisplayPipeLineDeliveryTimeChroma = 0; + } else { + if (VRatio[k] / 2 <= 1) { + DisplayPipeLineDeliveryTimeChroma = SwathWidthY[k] / 2.0 + * DPPPerPlane[k] / (HRatio[k] / 2) / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeChroma = SwathWidthY[k] / 2.0 + / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; + } + } + + if (BytePerPixelDETC[k] > 0) { + mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max( + 1.1 * SwathWidthY[k] * dml_ceil(BytePerPixelDETY[k], 1) + / 32.0 / DisplayPipeLineDeliveryTimeLuma, + 1.1 * SwathWidthY[k] / 2.0 + * dml_ceil(BytePerPixelDETC[k], 2) / 32.0 + / DisplayPipeLineDeliveryTimeChroma); + } else { + mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * SwathWidthY[k] + * dml_ceil(BytePerPixelDETY[k], 1) / 64.0 + / DisplayPipeLineDeliveryTimeLuma; + } + mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max( + mode_lib->vba.DCFCLKDeepSleepPerPlane[k], + PixelClock[k] / 16); + + } + + *DCFCLKDeepSleep = 8; + for (k = 0; k < NumberOfActivePlanes; ++k) { + *DCFCLKDeepSleep = dml_max( + *DCFCLKDeepSleep, + mode_lib->vba.DCFCLKDeepSleepPerPlane[k]); + } +} + +static void CalculateDETBufferSize( + double DETBufferSizeInKByte, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double *DETBufferSizeY, + double *DETBufferSizeC) +{ + if (SwathHeightC == 0) { + *DETBufferSizeY = DETBufferSizeInKByte * 1024; + *DETBufferSizeC = 0; + } else if (SwathHeightY <= SwathHeightC) { + *DETBufferSizeY = DETBufferSizeInKByte * 1024 / 2; + *DETBufferSizeC = DETBufferSizeInKByte * 1024 / 2; + } else { + *DETBufferSizeY = DETBufferSizeInKByte * 1024 * 2 / 3; + *DETBufferSizeC = DETBufferSizeInKByte * 1024 / 3; + } +} + +static void CalculateUrgentBurstFactor( + unsigned int DETBufferSizeInKByte, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + unsigned int SwathWidthY, + double LineTime, + double UrgentLatency, + double CursorBufferSize, + unsigned int CursorWidth, + unsigned int CursorBPP, + double VRatio, + double VRatioPreY, + double VRatioPreC, + double BytePerPixelInDETY, + double BytePerPixelInDETC, + double *UrgentBurstFactorCursor, + double *UrgentBurstFactorCursorPre, + double *UrgentBurstFactorLuma, + double *UrgentBurstFactorLumaPre, + double *UrgentBurstFactorChroma, + double *UrgentBurstFactorChromaPre, + unsigned int *NotEnoughUrgentLatencyHiding, + unsigned int *NotEnoughUrgentLatencyHidingPre) +{ + double LinesInDETLuma; + double LinesInDETChroma; + unsigned int LinesInCursorBuffer; + double CursorBufferSizeInTime; + double CursorBufferSizeInTimePre; + double DETBufferSizeInTimeLuma; + double DETBufferSizeInTimeLumaPre; + double DETBufferSizeInTimeChroma; + double DETBufferSizeInTimeChromaPre; + double DETBufferSizeY; + double DETBufferSizeC; + + *NotEnoughUrgentLatencyHiding = 0; + *NotEnoughUrgentLatencyHidingPre = 0; + + if (CursorWidth > 0) { + LinesInCursorBuffer = 1 << (unsigned int) dml_floor( + dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0); + CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; + if (CursorBufferSizeInTime - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorCursor = 0; + } else { + *UrgentBurstFactorCursor = CursorBufferSizeInTime + / (CursorBufferSizeInTime - UrgentLatency); + } + if (VRatioPreY > 0) { + CursorBufferSizeInTimePre = LinesInCursorBuffer * LineTime / VRatioPreY; + if (CursorBufferSizeInTimePre - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHidingPre = 1; + *UrgentBurstFactorCursorPre = 0; + } else { + *UrgentBurstFactorCursorPre = CursorBufferSizeInTimePre + / (CursorBufferSizeInTimePre - UrgentLatency); + } + } else { + *UrgentBurstFactorCursorPre = 1; + } + } + + CalculateDETBufferSize( + DETBufferSizeInKByte, + SwathHeightY, + SwathHeightC, + &DETBufferSizeY, + &DETBufferSizeC); + + LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / SwathWidthY; + DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; + if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorLuma = 0; + } else { + *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma + / (DETBufferSizeInTimeLuma - UrgentLatency); + } + if (VRatioPreY > 0) { + DETBufferSizeInTimeLumaPre = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime + / VRatioPreY; + if (DETBufferSizeInTimeLumaPre - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHidingPre = 1; + *UrgentBurstFactorLumaPre = 0; + } else { + *UrgentBurstFactorLumaPre = DETBufferSizeInTimeLumaPre + / (DETBufferSizeInTimeLumaPre - UrgentLatency); + } + } else { + *UrgentBurstFactorLumaPre = 1; + } + + if (BytePerPixelInDETC > 0) { + LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / (SwathWidthY / 2); + DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime + / (VRatio / 2); + if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorChroma = 0; + } else { + *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma + / (DETBufferSizeInTimeChroma - UrgentLatency); + } + if (VRatioPreC > 0) { + DETBufferSizeInTimeChromaPre = dml_floor(LinesInDETChroma, SwathHeightC) + * LineTime / VRatioPreC; + if (DETBufferSizeInTimeChromaPre - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHidingPre = 1; + *UrgentBurstFactorChromaPre = 0; + } else { + *UrgentBurstFactorChromaPre = DETBufferSizeInTimeChromaPre + / (DETBufferSizeInTimeChromaPre - UrgentLatency); + } + } else { + *UrgentBurstFactorChromaPre = 1; + } + } +} + +static void CalculatePixelDeliveryTimes( + unsigned int NumberOfActivePlanes, + double VRatio[], + double VRatioPrefetchY[], + double VRatioPrefetchC[], + unsigned int swath_width_luma_ub[], + unsigned int swath_width_chroma_ub[], + int DPPPerPlane[], + double HRatio[], + double PixelClock[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double DPPCLK[], + double BytePerPixelDETC[], + enum scan_direction_class SourceScan[], + unsigned int BlockWidth256BytesY[], + unsigned int BlockHeight256BytesY[], + unsigned int BlockWidth256BytesC[], + unsigned int BlockHeight256BytesC[], + double DisplayPipeLineDeliveryTimeLuma[], + double DisplayPipeLineDeliveryTimeChroma[], + double DisplayPipeLineDeliveryTimeLumaPrefetch[], + double DisplayPipeLineDeliveryTimeChromaPrefetch[], + double DisplayPipeRequestDeliveryTimeLuma[], + double DisplayPipeRequestDeliveryTimeChroma[], + double DisplayPipeRequestDeliveryTimeLumaPrefetch[], + double DisplayPipeRequestDeliveryTimeChromaPrefetch[]) +{ + double req_per_swath_ub; + uint k; + + for (k = 0; k < NumberOfActivePlanes; ++k) { + if (VRatio[k] <= 1) { + DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] + / HRatio[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] + / PSCL_THROUGHPUT[k] / DPPCLK[k]; + } + + if (BytePerPixelDETC[k] == 0) { + DisplayPipeLineDeliveryTimeChroma[k] = 0; + } else { + if (VRatio[k] / 2 <= 1) { + DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] + * DPPPerPlane[k] / (HRatio[k] / 2) / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] + / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; + } + } + + if (VRatioPrefetchY[k] <= 1) { + DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] + * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] + / PSCL_THROUGHPUT[k] / DPPCLK[k]; + } + + if (BytePerPixelDETC[k] == 0) { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; + } else { + if (VRatioPrefetchC[k] <= 1) { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = + swath_width_chroma_ub[k] * DPPPerPlane[k] + / (HRatio[k] / 2) / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = + swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; + } + } + } + + for (k = 0; k < NumberOfActivePlanes; ++k) { + if (SourceScan[k] == dm_horz) { + req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; + } else { + req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; + } + DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] + / req_per_swath_ub; + DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = + DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; + if (BytePerPixelDETC[k] == 0) { + DisplayPipeRequestDeliveryTimeChroma[k] = 0; + DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; + } else { + if (SourceScan[k] == dm_horz) { + req_per_swath_ub = swath_width_chroma_ub[k] + / BlockWidth256BytesC[k]; + } else { + req_per_swath_ub = swath_width_chroma_ub[k] + / BlockHeight256BytesC[k]; + } + DisplayPipeRequestDeliveryTimeChroma[k] = + DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; + DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = + DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; + } + } +} + +static void CalculateMetaAndPTETimes( + unsigned int NumberOfActivePlanes, + bool GPUVMEnable, + unsigned int MetaChunkSize, + unsigned int MinMetaChunkSizeBytes, + unsigned int GPUVMMaxPageTableLevels, + unsigned int HTotal[], + double VRatio[], + double VRatioPrefetchY[], + double VRatioPrefetchC[], + double DestinationLinesToRequestRowInVBlank[], + double DestinationLinesToRequestRowInImmediateFlip[], + double DestinationLinesToRequestVMInVBlank[], + double DestinationLinesToRequestVMInImmediateFlip[], + bool DCCEnable[], + double PixelClock[], + double BytePerPixelDETY[], + double BytePerPixelDETC[], + enum scan_direction_class SourceScan[], + unsigned int dpte_row_height[], + unsigned int dpte_row_height_chroma[], + unsigned int meta_row_width[], + unsigned int meta_row_height[], + unsigned int meta_req_width[], + unsigned int meta_req_height[], + long dpte_group_bytes[], + unsigned int PTERequestSizeY[], + unsigned int PTERequestSizeC[], + unsigned int PixelPTEReqWidthY[], + unsigned int PixelPTEReqHeightY[], + unsigned int PixelPTEReqWidthC[], + unsigned int PixelPTEReqHeightC[], + unsigned int dpte_row_width_luma_ub[], + unsigned int dpte_row_width_chroma_ub[], + unsigned int vm_group_bytes[], + unsigned int dpde0_bytes_per_frame_ub_l[], + unsigned int dpde0_bytes_per_frame_ub_c[], + unsigned int meta_pte_bytes_per_frame_ub_l[], + unsigned int meta_pte_bytes_per_frame_ub_c[], + double DST_Y_PER_PTE_ROW_NOM_L[], + double DST_Y_PER_PTE_ROW_NOM_C[], + double DST_Y_PER_META_ROW_NOM_L[], + double TimePerMetaChunkNominal[], + double TimePerMetaChunkVBlank[], + double TimePerMetaChunkFlip[], + double time_per_pte_group_nom_luma[], + double time_per_pte_group_vblank_luma[], + double time_per_pte_group_flip_luma[], + double time_per_pte_group_nom_chroma[], + double time_per_pte_group_vblank_chroma[], + double time_per_pte_group_flip_chroma[], + double TimePerVMGroupVBlank[], + double TimePerVMGroupFlip[], + double TimePerVMRequestVBlank[], + double TimePerVMRequestFlip[]) +{ + unsigned int meta_chunk_width; + unsigned int min_meta_chunk_width; + unsigned int meta_chunk_per_row_int; + unsigned int meta_row_remainder; + unsigned int meta_chunk_threshold; + unsigned int meta_chunks_per_row_ub; + unsigned int dpte_group_width_luma; + unsigned int dpte_group_width_chroma; + unsigned int dpte_groups_per_row_luma_ub; + unsigned int dpte_groups_per_row_chroma_ub; + unsigned int num_group_per_lower_vm_stage; + unsigned int num_req_per_lower_vm_stage; + uint k; + + for (k = 0; k < NumberOfActivePlanes; ++k) { + if (GPUVMEnable == true) { + DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; + if (BytePerPixelDETC[k] == 0) { + DST_Y_PER_PTE_ROW_NOM_C[k] = 0; + } else { + DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / (VRatio[k] / 2); + } + } else { + DST_Y_PER_PTE_ROW_NOM_L[k] = 0; + DST_Y_PER_PTE_ROW_NOM_C[k] = 0; + } + if (DCCEnable[k] == true) { + DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; + } else { + DST_Y_PER_META_ROW_NOM_L[k] = 0; + } + } + + for (k = 0; k < NumberOfActivePlanes; ++k) { + if (DCCEnable[k] == true) { + meta_chunk_width = MetaChunkSize * 1024 * 256 + / dml_ceil(BytePerPixelDETY[k], 1) / meta_row_height[k]; + min_meta_chunk_width = MinMetaChunkSizeBytes * 256 + / dml_ceil(BytePerPixelDETY[k], 1) / meta_row_height[k]; + meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; + meta_row_remainder = meta_row_width[k] % meta_chunk_width; + if (SourceScan[k] == dm_horz) { + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; + } else { + meta_chunk_threshold = 2 * min_meta_chunk_width + - meta_req_height[k]; + } + if (meta_row_remainder <= meta_chunk_threshold) { + meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; + } else { + meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; + } + TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] + / PixelClock[k] / meta_chunks_per_row_ub; + TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] + * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; + TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] + * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; + } else { + TimePerMetaChunkNominal[k] = 0; + TimePerMetaChunkVBlank[k] = 0; + TimePerMetaChunkFlip[k] = 0; + } + } + + for (k = 0; k < NumberOfActivePlanes; ++k) { + if (GPUVMEnable == true) { + if (SourceScan[k] == dm_horz) { + dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] + * PixelPTEReqWidthY[k]; + } else { + dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] + * PixelPTEReqHeightY[k]; + } + dpte_groups_per_row_luma_ub = dml_ceil( + dpte_row_width_luma_ub[k] / dpte_group_width_luma, + 1); + time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] + / PixelClock[k] / dpte_groups_per_row_luma_ub; + time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] + * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; + time_per_pte_group_flip_luma[k] = + DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] + / PixelClock[k] + / dpte_groups_per_row_luma_ub; + if (BytePerPixelDETC[k] == 0) { + time_per_pte_group_nom_chroma[k] = 0; + time_per_pte_group_vblank_chroma[k] = 0; + time_per_pte_group_flip_chroma[k] = 0; + } else { + if (SourceScan[k] == dm_horz) { + dpte_group_width_chroma = dpte_group_bytes[k] + / PTERequestSizeC[k] * PixelPTEReqWidthC[k]; + } else { + dpte_group_width_chroma = dpte_group_bytes[k] + / PTERequestSizeC[k] + * PixelPTEReqHeightC[k]; + } + dpte_groups_per_row_chroma_ub = dml_ceil( + dpte_row_width_chroma_ub[k] + / dpte_group_width_chroma, + 1); + time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] + * HTotal[k] / PixelClock[k] + / dpte_groups_per_row_chroma_ub; + time_per_pte_group_vblank_chroma[k] = + DestinationLinesToRequestRowInVBlank[k] * HTotal[k] + / PixelClock[k] + / dpte_groups_per_row_chroma_ub; + time_per_pte_group_flip_chroma[k] = + DestinationLinesToRequestRowInImmediateFlip[k] + * HTotal[k] / PixelClock[k] + / dpte_groups_per_row_chroma_ub; + } + } else { + time_per_pte_group_nom_luma[k] = 0; + time_per_pte_group_vblank_luma[k] = 0; + time_per_pte_group_flip_luma[k] = 0; + time_per_pte_group_nom_chroma[k] = 0; + time_per_pte_group_vblank_chroma[k] = 0; + time_per_pte_group_flip_chroma[k] = 0; + } + } + + for (k = 0; k < NumberOfActivePlanes; ++k) { + if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { + if (DCCEnable[k] == false) { + if (BytePerPixelDETC[k] > 0) { + num_group_per_lower_vm_stage = + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) + + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); + } else { + num_group_per_lower_vm_stage = + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); + } + } else { + if (GPUVMMaxPageTableLevels == 1) { + if (BytePerPixelDETC[k] > 0) { + num_group_per_lower_vm_stage = + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) + + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); + } else { + num_group_per_lower_vm_stage = + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); + } + } else { + if (BytePerPixelDETC[k] > 0) { + num_group_per_lower_vm_stage = + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) + + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1) + + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) + + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); + } else { + num_group_per_lower_vm_stage = + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) + + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); + } + } + } + + if (DCCEnable[k] == false) { + if (BytePerPixelDETC[k] > 0) { + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] + / 64 + dpde0_bytes_per_frame_ub_c[k] / 64; + } else { + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] + / 64; + } + } else { + if (GPUVMMaxPageTableLevels == 1) { + if (BytePerPixelDETC[k] > 0) { + num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + + meta_pte_bytes_per_frame_ub_c[k] / 64; + } else { + num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; + } + } else { + if (BytePerPixelDETC[k] > 0) { + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + + dpde0_bytes_per_frame_ub_c[k] / 64 + + meta_pte_bytes_per_frame_ub_l[k] / 64 + + meta_pte_bytes_per_frame_ub_c[k] / 64; + } else { + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + + meta_pte_bytes_per_frame_ub_l[k] / 64; + } + } + } + + TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] + / PixelClock[k] / num_group_per_lower_vm_stage; + TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] + * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; + TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] + * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; + TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] + * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; + + if (GPUVMMaxPageTableLevels > 2) { + TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; + TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; + TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; + TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; + } + + } else { + TimePerVMGroupVBlank[k] = 0; + TimePerVMGroupFlip[k] = 0; + TimePerVMRequestVBlank[k] = 0; + TimePerVMRequestFlip[k] = 0; + } + } +} + +static double CalculateExtraLatency( + double UrgentRoundTripAndOutOfOrderLatency, + int TotalNumberOfActiveDPP, + int PixelChunkSizeInKByte, + int TotalNumberOfDCCActiveDPP, + int MetaChunkSize, + double ReturnBW, + bool GPUVMEnable, + bool HostVMEnable, + int NumberOfActivePlanes, + int NumberOfDPP[], + long dpte_group_bytes[], + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, + double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, + int HostVMMaxPageTableLevels, + int HostVMCachedPageTableLevels) +{ + double CalculateExtraLatency; + double HostVMInefficiencyFactor; + int HostVMDynamicLevels; + + if (GPUVMEnable && HostVMEnable) { + HostVMInefficiencyFactor = + PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData + / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly; + HostVMDynamicLevels = HostVMMaxPageTableLevels - HostVMCachedPageTableLevels; + } else { + HostVMInefficiencyFactor = 1; + HostVMDynamicLevels = 0; + } + + CalculateExtraLatency = UrgentRoundTripAndOutOfOrderLatency + + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0 + / ReturnBW; + + if (GPUVMEnable) { + int k; + + for (k = 0; k < NumberOfActivePlanes; k++) { + CalculateExtraLatency = CalculateExtraLatency + + NumberOfDPP[k] * dpte_group_bytes[k] + * (1 + 8 * HostVMDynamicLevels) + * HostVMInefficiencyFactor / ReturnBW; + } + } + return CalculateExtraLatency; +} + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.h b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.h new file mode 100644 index 000000000000..fb9548a2f894 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.h @@ -0,0 +1,32 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML21_DISPLAY_MODE_VBA_H__ +#define __DML21_DISPLAY_MODE_VBA_H__ + +void dml21_recalculate(struct display_mode_lib *mode_lib); +void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib); + +#endif /* _DML21_DISPLAY_MODE_VBA_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c new file mode 100644 index 000000000000..a1f207cbb966 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c @@ -0,0 +1,1823 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifdef CONFIG_DRM_AMD_DC_DCN2_0 + +#include "../display_mode_lib.h" +#include "../display_mode_vba.h" +#include "../dml_inline_defs.h" +#include "display_rq_dlg_calc_21.h" + +/* + * NOTE: + * This file is gcc-parseable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + +static void calculate_ttu_cursor( + struct display_mode_lib *mode_lib, + double *refcyc_per_req_delivery_pre_cur, + double *refcyc_per_req_delivery_cur, + double refclk_freq_in_mhz, + double ref_freq_to_pix_freq, + double hscale_pixel_rate_l, + double hscl_ratio, + double vratio_pre_l, + double vratio_l, + unsigned int cur_width, + enum cursor_bpp cur_bpp); + +static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) +{ + unsigned int ret_val = 0; + + if (source_format == dm_444_16) { + if (!is_chroma) + ret_val = 2; + } else if (source_format == dm_444_32) { + if (!is_chroma) + ret_val = 4; + } else if (source_format == dm_444_64) { + if (!is_chroma) + ret_val = 8; + } else if (source_format == dm_420_8) { + if (is_chroma) + ret_val = 2; + else + ret_val = 1; + } else if (source_format == dm_420_10) { + if (is_chroma) + ret_val = 4; + else + ret_val = 2; + } else if (source_format == dm_444_8) { + ret_val = 1; + } + return ret_val; +} + +static bool is_dual_plane(enum source_format_class source_format) +{ + bool ret_val = 0; + + if ((source_format == dm_420_8) || (source_format == dm_420_10)) + ret_val = 1; + + return ret_val; +} + +static double get_refcyc_per_delivery( + struct display_mode_lib *mode_lib, + double refclk_freq_in_mhz, + double pclk_freq_in_mhz, + bool odm_combine, + unsigned int recout_width, + unsigned int hactive, + double vratio, + double hscale_pixel_rate, + unsigned int delivery_width, + unsigned int req_per_swath_ub) +{ + double refcyc_per_delivery = 0.0; + + if (vratio <= 1.0) { + if (odm_combine) + refcyc_per_delivery = (double) refclk_freq_in_mhz + * dml_min((double) recout_width, (double) hactive / 2.0) + / pclk_freq_in_mhz / (double) req_per_swath_ub; + else + refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) recout_width + / pclk_freq_in_mhz / (double) req_per_swath_ub; + } else { + refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) delivery_width + / (double) hscale_pixel_rate / (double) req_per_swath_ub; + } + + dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); + dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); + dml_print("DML_DLG: %s: recout_width = %d\n", __func__, recout_width); + dml_print("DML_DLG: %s: vratio = %3.2f\n", __func__, vratio); + dml_print("DML_DLG: %s: req_per_swath_ub = %d\n", __func__, req_per_swath_ub); + dml_print("DML_DLG: %s: refcyc_per_delivery= %3.2f\n", __func__, refcyc_per_delivery); + + return refcyc_per_delivery; + +} + +static unsigned int get_blk_size_bytes(const enum source_macro_tile_size tile_size) +{ + if (tile_size == dm_256k_tile) + return (256 * 1024); + else if (tile_size == dm_64k_tile) + return (64 * 1024); + else + return (4 * 1024); +} + +static void extract_rq_sizing_regs( + struct display_mode_lib *mode_lib, + display_data_rq_regs_st *rq_regs, + const display_data_rq_sizing_params_st rq_sizing) +{ + dml_print("DML_DLG: %s: rq_sizing param\n", __func__); + print__data_rq_sizing_params_st(mode_lib, rq_sizing); + + rq_regs->chunk_size = dml_log2(rq_sizing.chunk_bytes) - 10; + + if (rq_sizing.min_chunk_bytes == 0) + rq_regs->min_chunk_size = 0; + else + rq_regs->min_chunk_size = dml_log2(rq_sizing.min_chunk_bytes) - 8 + 1; + + rq_regs->meta_chunk_size = dml_log2(rq_sizing.meta_chunk_bytes) - 10; + if (rq_sizing.min_meta_chunk_bytes == 0) + rq_regs->min_meta_chunk_size = 0; + else + rq_regs->min_meta_chunk_size = dml_log2(rq_sizing.min_meta_chunk_bytes) - 6 + 1; + + rq_regs->dpte_group_size = dml_log2(rq_sizing.dpte_group_bytes) - 6; + rq_regs->mpte_group_size = dml_log2(rq_sizing.mpte_group_bytes) - 6; +} + +static void extract_rq_regs( + struct display_mode_lib *mode_lib, + display_rq_regs_st *rq_regs, + const display_rq_params_st rq_param) +{ + unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; + unsigned int detile_buf_plane1_addr = 0; + + extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), rq_param.sizing.rq_l); + + rq_regs->rq_regs_l.pte_row_height_linear = dml_floor( + dml_log2(rq_param.dlg.rq_l.dpte_row_height), + 1) - 3; + + if (rq_param.yuv420) { + extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), rq_param.sizing.rq_c); + rq_regs->rq_regs_c.pte_row_height_linear = dml_floor( + dml_log2(rq_param.dlg.rq_c.dpte_row_height), + 1) - 3; + } + + rq_regs->rq_regs_l.swath_height = dml_log2(rq_param.dlg.rq_l.swath_height); + rq_regs->rq_regs_c.swath_height = dml_log2(rq_param.dlg.rq_c.swath_height); + + // FIXME: take the max between luma, chroma chunk size? + // okay for now, as we are setting chunk_bytes to 8kb anyways + if (rq_param.sizing.rq_l.chunk_bytes >= 32 * 1024) { //32kb + rq_regs->drq_expansion_mode = 0; + } else { + rq_regs->drq_expansion_mode = 2; + } + rq_regs->prq_expansion_mode = 1; + rq_regs->mrq_expansion_mode = 1; + rq_regs->crq_expansion_mode = 1; + + if (rq_param.yuv420) { + if ((double) rq_param.misc.rq_l.stored_swath_bytes + / (double) rq_param.misc.rq_c.stored_swath_bytes <= 1.5) { + detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 64.0); // half to chroma + } else { + detile_buf_plane1_addr = dml_round_to_multiple( + (unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0), + 256, + 0) / 64.0; // 2/3 to chroma + } + } + rq_regs->plane1_base_address = detile_buf_plane1_addr; +} + +static void handle_det_buf_split( + struct display_mode_lib *mode_lib, + display_rq_params_st *rq_param, + const display_pipe_source_params_st pipe_src_param) +{ + unsigned int total_swath_bytes = 0; + unsigned int swath_bytes_l = 0; + unsigned int swath_bytes_c = 0; + unsigned int full_swath_bytes_packed_l = 0; + unsigned int full_swath_bytes_packed_c = 0; + bool req128_l = 0; + bool req128_c = 0; + bool surf_linear = (pipe_src_param.sw_mode == dm_sw_linear); + bool surf_vert = (pipe_src_param.source_scan == dm_vert); + unsigned int log2_swath_height_l = 0; + unsigned int log2_swath_height_c = 0; + unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; + + full_swath_bytes_packed_l = rq_param->misc.rq_l.full_swath_bytes; + full_swath_bytes_packed_c = rq_param->misc.rq_c.full_swath_bytes; + + if (rq_param->yuv420_10bpc) { + full_swath_bytes_packed_l = dml_round_to_multiple( + rq_param->misc.rq_l.full_swath_bytes * 2 / 3, + 256, + 1) + 256; + full_swath_bytes_packed_c = dml_round_to_multiple( + rq_param->misc.rq_c.full_swath_bytes * 2 / 3, + 256, + 1) + 256; + } + + if (rq_param->yuv420) { + total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c; + + if (total_swath_bytes <= detile_buf_size_in_bytes) { //full 256b request + req128_l = 0; + req128_c = 0; + swath_bytes_l = full_swath_bytes_packed_l; + swath_bytes_c = full_swath_bytes_packed_c; + } else { //128b request (for luma only for yuv420 8bpc) + req128_l = 1; + req128_c = 0; + swath_bytes_l = full_swath_bytes_packed_l / 2; + swath_bytes_c = full_swath_bytes_packed_c; + } + // Note: assumption, the config that pass in will fit into + // the detiled buffer. + } else { + total_swath_bytes = 2 * full_swath_bytes_packed_l; + + if (total_swath_bytes <= detile_buf_size_in_bytes) + req128_l = 0; + else + req128_l = 1; + + swath_bytes_l = total_swath_bytes; + swath_bytes_c = 0; + } + rq_param->misc.rq_l.stored_swath_bytes = swath_bytes_l; + rq_param->misc.rq_c.stored_swath_bytes = swath_bytes_c; + + if (surf_linear) { + log2_swath_height_l = 0; + log2_swath_height_c = 0; + } else if (!surf_vert) { + log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_height) - req128_l; + log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_height) - req128_c; + } else { + log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_width) - req128_l; + log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_width) - req128_c; + } + rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l; + rq_param->dlg.rq_c.swath_height = 1 << log2_swath_height_c; + + dml_print("DML_DLG: %s: req128_l = %0d\n", __func__, req128_l); + dml_print("DML_DLG: %s: req128_c = %0d\n", __func__, req128_c); + dml_print( + "DML_DLG: %s: full_swath_bytes_packed_l = %0d\n", + __func__, + full_swath_bytes_packed_l); + dml_print( + "DML_DLG: %s: full_swath_bytes_packed_c = %0d\n", + __func__, + full_swath_bytes_packed_c); +} + +static void get_meta_and_pte_attr( + struct display_mode_lib *mode_lib, + display_data_rq_dlg_params_st *rq_dlg_param, + display_data_rq_misc_params_st *rq_misc_param, + display_data_rq_sizing_params_st *rq_sizing_param, + unsigned int vp_width, + unsigned int vp_height, + unsigned int data_pitch, + unsigned int meta_pitch, + unsigned int source_format, + unsigned int tiling, + unsigned int macro_tile_size, + unsigned int source_scan, + unsigned int hostvm_enable, + unsigned int is_chroma) +{ + bool surf_linear = (tiling == dm_sw_linear); + bool surf_vert = (source_scan == dm_vert); + + unsigned int bytes_per_element; + unsigned int bytes_per_element_y = get_bytes_per_element( + (enum source_format_class) (source_format), + false); + unsigned int bytes_per_element_c = get_bytes_per_element( + (enum source_format_class) (source_format), + true); + + unsigned int blk256_width = 0; + unsigned int blk256_height = 0; + + unsigned int blk256_width_y = 0; + unsigned int blk256_height_y = 0; + unsigned int blk256_width_c = 0; + unsigned int blk256_height_c = 0; + unsigned int log2_bytes_per_element; + unsigned int log2_blk256_width; + unsigned int log2_blk256_height; + unsigned int blk_bytes; + unsigned int log2_blk_bytes; + unsigned int log2_blk_height; + unsigned int log2_blk_width; + unsigned int log2_meta_req_bytes; + unsigned int log2_meta_req_height; + unsigned int log2_meta_req_width; + unsigned int meta_req_width; + unsigned int meta_req_height; + unsigned int log2_meta_row_height; + unsigned int meta_row_width_ub; + unsigned int log2_meta_chunk_bytes; + unsigned int log2_meta_chunk_height; + + //full sized meta chunk width in unit of data elements + unsigned int log2_meta_chunk_width; + unsigned int log2_min_meta_chunk_bytes; + unsigned int min_meta_chunk_width; + unsigned int meta_chunk_width; + unsigned int meta_chunk_per_row_int; + unsigned int meta_row_remainder; + unsigned int meta_chunk_threshold; + unsigned int meta_blk_bytes; + unsigned int meta_blk_height; + unsigned int meta_blk_width; + unsigned int meta_surface_bytes; + unsigned int vmpg_bytes; + unsigned int meta_pte_req_per_frame_ub; + unsigned int meta_pte_bytes_per_frame_ub; + const unsigned int log2_vmpg_bytes = dml_log2(mode_lib->soc.vmm_page_size_bytes); + const unsigned int dpte_buf_in_pte_reqs = + mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma + mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma; + const unsigned int pde_proc_buffer_size_64k_reqs = + mode_lib->ip.pde_proc_buffer_size_64k_reqs; + + unsigned int log2_vmpg_height = 0; + unsigned int log2_vmpg_width = 0; + unsigned int log2_dpte_req_height_ptes = 0; + unsigned int log2_dpte_req_height = 0; + unsigned int log2_dpte_req_width = 0; + unsigned int log2_dpte_row_height_linear = 0; + unsigned int log2_dpte_row_height = 0; + unsigned int log2_dpte_group_width = 0; + unsigned int dpte_row_width_ub = 0; + unsigned int dpte_req_height = 0; + unsigned int dpte_req_width = 0; + unsigned int dpte_group_width = 0; + unsigned int log2_dpte_group_bytes = 0; + unsigned int log2_dpte_group_length = 0; + unsigned int pde_buf_entries; + bool yuv420 = (source_format == dm_420_8 || source_format == dm_420_10); + + Calculate256BBlockSizes( + (enum source_format_class) (source_format), + (enum dm_swizzle_mode) (tiling), + bytes_per_element_y, + bytes_per_element_c, + &blk256_height_y, + &blk256_height_c, + &blk256_width_y, + &blk256_width_c); + + if (!is_chroma) { + blk256_width = blk256_width_y; + blk256_height = blk256_height_y; + bytes_per_element = bytes_per_element_y; + } else { + blk256_width = blk256_width_c; + blk256_height = blk256_height_c; + bytes_per_element = bytes_per_element_c; + } + + log2_bytes_per_element = dml_log2(bytes_per_element); + + dml_print("DML_DLG: %s: surf_linear = %d\n", __func__, surf_linear); + dml_print("DML_DLG: %s: surf_vert = %d\n", __func__, surf_vert); + dml_print("DML_DLG: %s: blk256_width = %d\n", __func__, blk256_width); + dml_print("DML_DLG: %s: blk256_height = %d\n", __func__, blk256_height); + + log2_blk256_width = dml_log2((double) blk256_width); + log2_blk256_height = dml_log2((double) blk256_height); + blk_bytes = surf_linear ? + 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size); + log2_blk_bytes = dml_log2((double) blk_bytes); + log2_blk_height = 0; + log2_blk_width = 0; + + // remember log rule + // "+" in log is multiply + // "-" in log is divide + // "/2" is like square root + // blk is vertical biased + if (tiling != dm_sw_linear) + log2_blk_height = log2_blk256_height + + dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1); + else + log2_blk_height = 0; // blk height of 1 + + log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height; + + if (!surf_vert) { + rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_width - 1, blk256_width, 1) + + blk256_width; + rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_width; + } else { + rq_dlg_param->swath_width_ub = dml_round_to_multiple( + vp_height - 1, + blk256_height, + 1) + blk256_height; + rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_height; + } + + if (!surf_vert) + rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_height + * bytes_per_element; + else + rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_width + * bytes_per_element; + + rq_misc_param->blk256_height = blk256_height; + rq_misc_param->blk256_width = blk256_width; + + // ------- + // meta + // ------- + log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element + + // each 64b meta request for dcn is 8x8 meta elements and + // a meta element covers one 256b block of the the data surface. + log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256 + log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element + - log2_meta_req_height; + meta_req_width = 1 << log2_meta_req_width; + meta_req_height = 1 << log2_meta_req_height; + log2_meta_row_height = 0; + meta_row_width_ub = 0; + + // the dimensions of a meta row are meta_row_width x meta_row_height in elements. + // calculate upper bound of the meta_row_width + if (!surf_vert) { + log2_meta_row_height = log2_meta_req_height; + meta_row_width_ub = dml_round_to_multiple(vp_width - 1, meta_req_width, 1) + + meta_req_width; + rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_width; + } else { + log2_meta_row_height = log2_meta_req_width; + meta_row_width_ub = dml_round_to_multiple(vp_height - 1, meta_req_height, 1) + + meta_req_height; + rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_height; + } + rq_dlg_param->meta_bytes_per_row_ub = rq_dlg_param->meta_req_per_row_ub * 64; + + rq_dlg_param->meta_row_height = 1 << log2_meta_row_height; + + log2_meta_chunk_bytes = dml_log2(rq_sizing_param->meta_chunk_bytes); + log2_meta_chunk_height = log2_meta_row_height; + + //full sized meta chunk width in unit of data elements + log2_meta_chunk_width = log2_meta_chunk_bytes + 8 - log2_bytes_per_element + - log2_meta_chunk_height; + log2_min_meta_chunk_bytes = dml_log2(rq_sizing_param->min_meta_chunk_bytes); + min_meta_chunk_width = 1 + << (log2_min_meta_chunk_bytes + 8 - log2_bytes_per_element + - log2_meta_chunk_height); + meta_chunk_width = 1 << log2_meta_chunk_width; + meta_chunk_per_row_int = (unsigned int) (meta_row_width_ub / meta_chunk_width); + meta_row_remainder = meta_row_width_ub % meta_chunk_width; + meta_chunk_threshold = 0; + meta_blk_bytes = 4096; + meta_blk_height = blk256_height * 64; + meta_blk_width = meta_blk_bytes * 256 / bytes_per_element / meta_blk_height; + meta_surface_bytes = meta_pitch + * (dml_round_to_multiple(vp_height - 1, meta_blk_height, 1) + + meta_blk_height) * bytes_per_element / 256; + vmpg_bytes = mode_lib->soc.vmm_page_size_bytes; + meta_pte_req_per_frame_ub = (dml_round_to_multiple( + meta_surface_bytes - vmpg_bytes, + 8 * vmpg_bytes, + 1) + 8 * vmpg_bytes) / (8 * vmpg_bytes); + meta_pte_bytes_per_frame_ub = meta_pte_req_per_frame_ub * 64; //64B mpte request + rq_dlg_param->meta_pte_bytes_per_frame_ub = meta_pte_bytes_per_frame_ub; + + dml_print("DML_DLG: %s: meta_blk_height = %d\n", __func__, meta_blk_height); + dml_print("DML_DLG: %s: meta_blk_width = %d\n", __func__, meta_blk_width); + dml_print("DML_DLG: %s: meta_surface_bytes = %d\n", __func__, meta_surface_bytes); + dml_print( + "DML_DLG: %s: meta_pte_req_per_frame_ub = %d\n", + __func__, + meta_pte_req_per_frame_ub); + dml_print( + "DML_DLG: %s: meta_pte_bytes_per_frame_ub = %d\n", + __func__, + meta_pte_bytes_per_frame_ub); + + if (!surf_vert) + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width; + else + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height; + + if (meta_row_remainder <= meta_chunk_threshold) + rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; + else + rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; + + // ------ + // dpte + // ------ + if (surf_linear) { + log2_vmpg_height = 0; // one line high + } else { + log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height; + } + log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height; + + // only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4. + if (surf_linear) { //one 64B PTE request returns 8 PTEs + log2_dpte_req_height_ptes = 0; + log2_dpte_req_width = log2_vmpg_width + 3; + log2_dpte_req_height = 0; + } else if (log2_blk_bytes == 12) { //4KB tile means 4kB page size + //one 64B req gives 8x1 PTEs for 4KB tile + log2_dpte_req_height_ptes = 0; + log2_dpte_req_width = log2_blk_width + 3; + log2_dpte_req_height = log2_blk_height + 0; + } else if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) { // tile block >= 64KB + //two 64B reqs of 2x4 PTEs give 16 PTEs to cover 64KB + log2_dpte_req_height_ptes = 4; + log2_dpte_req_width = log2_blk256_width + 4; // log2_64KB_width + log2_dpte_req_height = log2_blk256_height + 4; // log2_64KB_height + } else { //64KB page size and must 64KB tile block + //one 64B req gives 8x1 PTEs for 64KB tile + log2_dpte_req_height_ptes = 0; + log2_dpte_req_width = log2_blk_width + 3; + log2_dpte_req_height = log2_blk_height + 0; + } + + // The dpte request dimensions in data elements is dpte_req_width x dpte_req_height + // log2_vmpg_width is how much 1 pte represent, now calculating how much a 64b pte req represent + // That depends on the pte shape (i.e. 8x1, 4x2, 2x4) + //log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes; + //log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes; + dpte_req_height = 1 << log2_dpte_req_height; + dpte_req_width = 1 << log2_dpte_req_width; + + // calculate pitch dpte row buffer can hold + // round the result down to a power of two. + pde_buf_entries = + yuv420 ? (pde_proc_buffer_size_64k_reqs >> 1) : pde_proc_buffer_size_64k_reqs; + if (surf_linear) { + unsigned int dpte_row_height; + + log2_dpte_row_height_linear = dml_floor( + dml_log2( + dml_min( + 64 * 1024 * pde_buf_entries + / bytes_per_element, + dpte_buf_in_pte_reqs + * dpte_req_width) + / data_pitch), + 1); + + ASSERT(log2_dpte_row_height_linear >= 3); + + if (log2_dpte_row_height_linear > 7) + log2_dpte_row_height_linear = 7; + + log2_dpte_row_height = log2_dpte_row_height_linear; + // For linear, the dpte row is pitch dependent and the pte requests wrap at the pitch boundary. + // the dpte_row_width_ub is the upper bound of data_pitch*dpte_row_height in elements with this unique buffering. + dpte_row_height = 1 << log2_dpte_row_height; + dpte_row_width_ub = dml_round_to_multiple( + data_pitch * dpte_row_height - 1, + dpte_req_width, + 1) + dpte_req_width; + rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; + } else { + // the upper bound of the dpte_row_width without dependency on viewport position follows. + // for tiled mode, row height is the same as req height and row store up to vp size upper bound + if (!surf_vert) { + log2_dpte_row_height = log2_dpte_req_height; + dpte_row_width_ub = dml_round_to_multiple(vp_width - 1, dpte_req_width, 1) + + dpte_req_width; + rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; + } else { + log2_dpte_row_height = + (log2_blk_width < log2_dpte_req_width) ? + log2_blk_width : log2_dpte_req_width; + dpte_row_width_ub = dml_round_to_multiple(vp_height - 1, dpte_req_height, 1) + + dpte_req_height; + rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_height; + } + } + if (log2_blk_bytes >= 16 && log2_vmpg_bytes == 12) // tile block >= 64KB + rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 128; //2*64B dpte request + else + rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 64; //64B dpte request + + rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height; + + // the dpte_group_bytes is reduced for the specific case of vertical + // access of a tile surface that has dpte request of 8x1 ptes. + + if (hostvm_enable) + rq_sizing_param->dpte_group_bytes = 512; + else { + if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group + rq_sizing_param->dpte_group_bytes = 512; + else + //full size + rq_sizing_param->dpte_group_bytes = 2048; + } + + //since pte request size is 64byte, the number of data pte requests per full sized group is as follows. + log2_dpte_group_bytes = dml_log2(rq_sizing_param->dpte_group_bytes); + log2_dpte_group_length = log2_dpte_group_bytes - 6; //length in 64b requests + + // full sized data pte group width in elements + if (!surf_vert) + log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_width; + else + log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_height; + + //But if the tile block >=64KB and the page size is 4KB, then each dPTE request is 2*64B + if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) // tile block >= 64KB + log2_dpte_group_width = log2_dpte_group_width - 1; + + dpte_group_width = 1 << log2_dpte_group_width; + + // since dpte groups are only aligned to dpte_req_width and not dpte_group_width, + // the upper bound for the dpte groups per row is as follows. + rq_dlg_param->dpte_groups_per_row_ub = dml_ceil( + (double) dpte_row_width_ub / dpte_group_width, + 1); +} + +static void get_surf_rq_param( + struct display_mode_lib *mode_lib, + display_data_rq_sizing_params_st *rq_sizing_param, + display_data_rq_dlg_params_st *rq_dlg_param, + display_data_rq_misc_params_st *rq_misc_param, + const display_pipe_params_st pipe_param, + bool is_chroma) +{ + bool mode_422 = 0; + unsigned int vp_width = 0; + unsigned int vp_height = 0; + unsigned int data_pitch = 0; + unsigned int meta_pitch = 0; + unsigned int ppe = mode_422 ? 2 : 1; + + // FIXME check if ppe apply for both luma and chroma in 422 case + if (is_chroma) { + vp_width = pipe_param.src.viewport_width_c / ppe; + vp_height = pipe_param.src.viewport_height_c; + data_pitch = pipe_param.src.data_pitch_c; + meta_pitch = pipe_param.src.meta_pitch_c; + } else { + vp_width = pipe_param.src.viewport_width / ppe; + vp_height = pipe_param.src.viewport_height; + data_pitch = pipe_param.src.data_pitch; + meta_pitch = pipe_param.src.meta_pitch; + } + + if (pipe_param.dest.odm_combine) { + unsigned int access_dir; + unsigned int full_src_vp_width; + unsigned int hactive_half; + unsigned int src_hactive_half; + access_dir = (pipe_param.src.source_scan == dm_vert); // vp access direction: horizontal or vertical accessed + hactive_half = pipe_param.dest.hactive / 2; + if (is_chroma) { + full_src_vp_width = pipe_param.scale_ratio_depth.hscl_ratio_c * pipe_param.dest.full_recout_width; + src_hactive_half = pipe_param.scale_ratio_depth.hscl_ratio_c * hactive_half; + } else { + full_src_vp_width = pipe_param.scale_ratio_depth.hscl_ratio * pipe_param.dest.full_recout_width; + src_hactive_half = pipe_param.scale_ratio_depth.hscl_ratio * hactive_half; + } + + if (access_dir == 0) { + vp_width = dml_min(full_src_vp_width, src_hactive_half); + dml_print("DML_DLG: %s: vp_width = %d\n", __func__, vp_width); + } else { + vp_height = dml_min(full_src_vp_width, src_hactive_half); + dml_print("DML_DLG: %s: vp_height = %d\n", __func__, vp_height); + + } + dml_print("DML_DLG: %s: full_src_vp_width = %d\n", __func__, full_src_vp_width); + dml_print("DML_DLG: %s: hactive_half = %d\n", __func__, hactive_half); + dml_print("DML_DLG: %s: src_hactive_half = %d\n", __func__, src_hactive_half); + } + rq_sizing_param->chunk_bytes = 8192; + + if (rq_sizing_param->chunk_bytes == 64 * 1024) + rq_sizing_param->min_chunk_bytes = 0; + else + rq_sizing_param->min_chunk_bytes = 1024; + + rq_sizing_param->meta_chunk_bytes = 2048; + rq_sizing_param->min_meta_chunk_bytes = 256; + + if (pipe_param.src.hostvm) + rq_sizing_param->mpte_group_bytes = 512; + else + rq_sizing_param->mpte_group_bytes = 2048; + + get_meta_and_pte_attr( + mode_lib, + rq_dlg_param, + rq_misc_param, + rq_sizing_param, + vp_width, + vp_height, + data_pitch, + meta_pitch, + pipe_param.src.source_format, + pipe_param.src.sw_mode, + pipe_param.src.macro_tile_size, + pipe_param.src.source_scan, + pipe_param.src.hostvm, + is_chroma); +} + +static void dml_rq_dlg_get_rq_params( + struct display_mode_lib *mode_lib, + display_rq_params_st *rq_param, + const display_pipe_params_st pipe_param) +{ + // get param for luma surface + rq_param->yuv420 = pipe_param.src.source_format == dm_420_8 + || pipe_param.src.source_format == dm_420_10; + rq_param->yuv420_10bpc = pipe_param.src.source_format == dm_420_10; + + get_surf_rq_param( + mode_lib, + &(rq_param->sizing.rq_l), + &(rq_param->dlg.rq_l), + &(rq_param->misc.rq_l), + pipe_param, + 0); + + if (is_dual_plane((enum source_format_class) (pipe_param.src.source_format))) { + // get param for chroma surface + get_surf_rq_param( + mode_lib, + &(rq_param->sizing.rq_c), + &(rq_param->dlg.rq_c), + &(rq_param->misc.rq_c), + pipe_param, + 1); + } + + // calculate how to split the det buffer space between luma and chroma + handle_det_buf_split(mode_lib, rq_param, pipe_param.src); + print__rq_params_st(mode_lib, *rq_param); +} + +void dml21_rq_dlg_get_rq_reg( + struct display_mode_lib *mode_lib, + display_rq_regs_st *rq_regs, + const display_pipe_params_st pipe_param) +{ + display_rq_params_st rq_param = {0}; + + memset(rq_regs, 0, sizeof(*rq_regs)); + dml_rq_dlg_get_rq_params(mode_lib, &rq_param, pipe_param); + extract_rq_regs(mode_lib, rq_regs, rq_param); + + print__rq_regs_st(mode_lib, *rq_regs); +} + +// Note: currently taken in as is. +// Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma. +static void dml_rq_dlg_get_dlg_params( + struct display_mode_lib *mode_lib, + const display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + display_dlg_regs_st *disp_dlg_regs, + display_ttu_regs_st *disp_ttu_regs, + const display_rq_dlg_params_st rq_dlg_param, + const display_dlg_sys_params_st dlg_sys_param, + const bool cstate_en, + const bool pstate_en) +{ + const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src; + const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest; + const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout; + const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg; + const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth; + const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps; + + // ------------------------- + // Section 1.15.2.1: OTG dependent Params + // ------------------------- + // Timing + unsigned int htotal = dst->htotal; + // unsigned int hblank_start = dst.hblank_start; // TODO: Remove + unsigned int hblank_end = dst->hblank_end; + unsigned int vblank_start = dst->vblank_start; + unsigned int vblank_end = dst->vblank_end; + unsigned int min_vblank = mode_lib->ip.min_vblank_lines; + + double dppclk_freq_in_mhz = clks->dppclk_mhz; + double dispclk_freq_in_mhz = clks->dispclk_mhz; + double refclk_freq_in_mhz = clks->refclk_mhz; + double pclk_freq_in_mhz = dst->pixel_rate_mhz; + bool interlaced = dst->interlaced; + + double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz; + + double min_dcfclk_mhz; + double t_calc_us; + double min_ttu_vblank; + + double min_dst_y_ttu_vblank; + unsigned int dlg_vblank_start; + bool dual_plane; + bool mode_422; + unsigned int access_dir; + unsigned int vp_height_l; + unsigned int vp_width_l; + unsigned int vp_height_c; + unsigned int vp_width_c; + + // Scaling + unsigned int htaps_l; + unsigned int htaps_c; + double hratio_l; + double hratio_c; + double vratio_l; + double vratio_c; + bool scl_enable; + + double line_time_in_us; + // double vinit_l; + // double vinit_c; + // double vinit_bot_l; + // double vinit_bot_c; + + // unsigned int swath_height_l; + unsigned int swath_width_ub_l; + // unsigned int dpte_bytes_per_row_ub_l; + unsigned int dpte_groups_per_row_ub_l; + // unsigned int meta_pte_bytes_per_frame_ub_l; + // unsigned int meta_bytes_per_row_ub_l; + + // unsigned int swath_height_c; + unsigned int swath_width_ub_c; + // unsigned int dpte_bytes_per_row_ub_c; + unsigned int dpte_groups_per_row_ub_c; + + unsigned int meta_chunks_per_row_ub_l; + unsigned int meta_chunks_per_row_ub_c; + unsigned int vupdate_offset; + unsigned int vupdate_width; + unsigned int vready_offset; + + unsigned int dppclk_delay_subtotal; + unsigned int dispclk_delay_subtotal; + unsigned int pixel_rate_delay_subtotal; + + unsigned int vstartup_start; + unsigned int dst_x_after_scaler; + unsigned int dst_y_after_scaler; + double line_wait; + double dst_y_prefetch; + double dst_y_per_vm_vblank; + double dst_y_per_row_vblank; + double dst_y_per_vm_flip; + double dst_y_per_row_flip; + double max_dst_y_per_vm_vblank; + double max_dst_y_per_row_vblank; + double lsw; + double vratio_pre_l; + double vratio_pre_c; + unsigned int req_per_swath_ub_l; + unsigned int req_per_swath_ub_c; + unsigned int meta_row_height_l; + unsigned int meta_row_height_c; + unsigned int swath_width_pixels_ub_l; + unsigned int swath_width_pixels_ub_c; + unsigned int scaler_rec_in_width_l; + unsigned int scaler_rec_in_width_c; + unsigned int dpte_row_height_l; + unsigned int dpte_row_height_c; + double hscale_pixel_rate_l; + double hscale_pixel_rate_c; + double min_hratio_fact_l; + double min_hratio_fact_c; + double refcyc_per_line_delivery_pre_l; + double refcyc_per_line_delivery_pre_c; + double refcyc_per_line_delivery_l; + double refcyc_per_line_delivery_c; + + double refcyc_per_req_delivery_pre_l; + double refcyc_per_req_delivery_pre_c; + double refcyc_per_req_delivery_l; + double refcyc_per_req_delivery_c; + + unsigned int full_recout_width; + double xfc_transfer_delay; + double xfc_precharge_delay; + double xfc_remote_surface_flip_latency; + double xfc_dst_y_delta_drq_limit; + double xfc_prefetch_margin; + double refcyc_per_req_delivery_pre_cur0; + double refcyc_per_req_delivery_cur0; + double refcyc_per_req_delivery_pre_cur1; + double refcyc_per_req_delivery_cur1; + + memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs)); + memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs)); + + dml_print("DML_DLG: %s: cstate_en = %d\n", __func__, cstate_en); + dml_print("DML_DLG: %s: pstate_en = %d\n", __func__, pstate_en); + + dml_print("DML_DLG: %s: dppclk_freq_in_mhz = %3.2f\n", __func__, dppclk_freq_in_mhz); + dml_print("DML_DLG: %s: dispclk_freq_in_mhz = %3.2f\n", __func__, dispclk_freq_in_mhz); + dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); + dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); + dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced); + ASSERT(ref_freq_to_pix_freq < 4.0); + + disp_dlg_regs->ref_freq_to_pix_freq = + (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19)); + disp_dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal + * dml_pow(2, 8)); + disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits + disp_dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end + * (double) ref_freq_to_pix_freq); + ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)dml_pow(2, 13)); + + min_dcfclk_mhz = dlg_sys_param.deepsleep_dcfclk_mhz; + t_calc_us = get_tcalc(mode_lib, e2e_pipe_param, num_pipes); + min_ttu_vblank = get_min_ttu_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + min_dst_y_ttu_vblank = min_ttu_vblank * pclk_freq_in_mhz / (double) htotal; + dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start; + + disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start) * dml_pow(2, 2)); + ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18)); + + dml_print( + "DML_DLG: %s: min_dcfclk_mhz = %3.2f\n", + __func__, + min_dcfclk_mhz); + dml_print( + "DML_DLG: %s: min_ttu_vblank = %3.2f\n", + __func__, + min_ttu_vblank); + dml_print( + "DML_DLG: %s: min_dst_y_ttu_vblank = %3.2f\n", + __func__, + min_dst_y_ttu_vblank); + dml_print( + "DML_DLG: %s: t_calc_us = %3.2f\n", + __func__, + t_calc_us); + dml_print( + "DML_DLG: %s: disp_dlg_regs->min_dst_y_next_start = 0x%0x\n", + __func__, + disp_dlg_regs->min_dst_y_next_start); + dml_print( + "DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", + __func__, + ref_freq_to_pix_freq); + + // ------------------------- + // Section 1.15.2.2: Prefetch, Active and TTU + // ------------------------- + // Prefetch Calc + // Source + // dcc_en = src.dcc; + dual_plane = is_dual_plane((enum source_format_class) (src->source_format)); + mode_422 = 0; // FIXME + access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed + // bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0); + // bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1); + vp_height_l = src->viewport_height; + vp_width_l = src->viewport_width; + vp_height_c = src->viewport_height_c; + vp_width_c = src->viewport_width_c; + + // Scaling + htaps_l = taps->htaps; + htaps_c = taps->htaps_c; + hratio_l = scl->hscl_ratio; + hratio_c = scl->hscl_ratio_c; + vratio_l = scl->vscl_ratio; + vratio_c = scl->vscl_ratio_c; + scl_enable = scl->scl_enable; + + line_time_in_us = (htotal / pclk_freq_in_mhz); + swath_width_ub_l = rq_dlg_param.rq_l.swath_width_ub; + dpte_groups_per_row_ub_l = rq_dlg_param.rq_l.dpte_groups_per_row_ub; + swath_width_ub_c = rq_dlg_param.rq_c.swath_width_ub; + dpte_groups_per_row_ub_c = rq_dlg_param.rq_c.dpte_groups_per_row_ub; + + meta_chunks_per_row_ub_l = rq_dlg_param.rq_l.meta_chunks_per_row_ub; + meta_chunks_per_row_ub_c = rq_dlg_param.rq_c.meta_chunks_per_row_ub; + vupdate_offset = dst->vupdate_offset; + vupdate_width = dst->vupdate_width; + vready_offset = dst->vready_offset; + + dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal; + dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal; + + if (scl_enable) + dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl; + else + dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only; + + dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter + + src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor; + + if (dout->dsc_enable) { + double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + dispclk_delay_subtotal += dsc_delay; + } + + pixel_rate_delay_subtotal = dppclk_delay_subtotal * pclk_freq_in_mhz / dppclk_freq_in_mhz + + dispclk_delay_subtotal * pclk_freq_in_mhz / dispclk_freq_in_mhz; + + vstartup_start = dst->vstartup_start; + if (interlaced) { + if (vstartup_start / 2.0 + - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal + <= vblank_end / 2.0) + disp_dlg_regs->vready_after_vcount0 = 1; + else + disp_dlg_regs->vready_after_vcount0 = 0; + } else { + if (vstartup_start + - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal + <= vblank_end) + disp_dlg_regs->vready_after_vcount0 = 1; + else + disp_dlg_regs->vready_after_vcount0 = 0; + } + + // TODO: Where is this coming from? + if (interlaced) + vstartup_start = vstartup_start / 2; + + // TODO: What if this min_vblank doesn't match the value in the dml_config_settings.cpp? + if (vstartup_start >= min_vblank) { + dml_print( + "WARNING: DML_DLG: %s: vblank_start=%d vblank_end=%d\n", + __func__, + vblank_start, + vblank_end); + dml_print( + "WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n", + __func__, + vstartup_start, + min_vblank); + min_vblank = vstartup_start + 1; + dml_print( + "WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n", + __func__, + vstartup_start, + min_vblank); + } + + dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal); + dml_print( + "DML_DLG: %s: pixel_rate_delay_subtotal = %d\n", + __func__, + pixel_rate_delay_subtotal); + dml_print( + "DML_DLG: %s: dst_x_after_scaler = %d\n", + __func__, + dst_x_after_scaler); + dml_print( + "DML_DLG: %s: dst_y_after_scaler = %d\n", + __func__, + dst_y_after_scaler); + + // Lwait + // TODO: Should this be urgent_latency_pixel_mixed_with_vm_data_us? + line_wait = mode_lib->soc.urgent_latency_pixel_data_only_us; + if (cstate_en) + line_wait = dml_max(mode_lib->soc.sr_enter_plus_exit_time_us, line_wait); + if (pstate_en) + line_wait = dml_max( + mode_lib->soc.dram_clock_change_latency_us + + mode_lib->soc.urgent_latency_pixel_data_only_us, // TODO: Should this be urgent_latency_pixel_mixed_with_vm_data_us? + line_wait); + line_wait = line_wait / line_time_in_us; + + dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch); + + dst_y_per_vm_vblank = get_dst_y_per_vm_vblank( + mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx); + dst_y_per_row_vblank = get_dst_y_per_row_vblank( + mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx); + dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + max_dst_y_per_vm_vblank = 32.0; + max_dst_y_per_row_vblank = 16.0; + + // magic! + if (htotal <= 75) { + min_vblank = 300; + max_dst_y_per_vm_vblank = 100.0; + max_dst_y_per_row_vblank = 100.0; + } + + dml_print("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, dst_y_per_vm_flip); + dml_print("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, dst_y_per_row_flip); + dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank); + dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank); + + ASSERT(dst_y_per_vm_vblank < max_dst_y_per_vm_vblank); + ASSERT(dst_y_per_row_vblank < max_dst_y_per_row_vblank); + + ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank)); + lsw = dst_y_prefetch - (dst_y_per_vm_vblank + dst_y_per_row_vblank); + + dml_print("DML_DLG: %s: lsw = %3.2f\n", __func__, lsw); + + vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + dml_print("DML_DLG: %s: vratio_pre_l=%3.2f\n", __func__, vratio_pre_l); + dml_print("DML_DLG: %s: vratio_pre_c=%3.2f\n", __func__, vratio_pre_c); + + // Active + req_per_swath_ub_l = rq_dlg_param.rq_l.req_per_swath_ub; + req_per_swath_ub_c = rq_dlg_param.rq_c.req_per_swath_ub; + meta_row_height_l = rq_dlg_param.rq_l.meta_row_height; + meta_row_height_c = rq_dlg_param.rq_c.meta_row_height; + swath_width_pixels_ub_l = 0; + swath_width_pixels_ub_c = 0; + scaler_rec_in_width_l = 0; + scaler_rec_in_width_c = 0; + dpte_row_height_l = rq_dlg_param.rq_l.dpte_row_height; + dpte_row_height_c = rq_dlg_param.rq_c.dpte_row_height; + + if (mode_422) { + swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element + swath_width_pixels_ub_c = swath_width_ub_c * 2; + } else { + swath_width_pixels_ub_l = swath_width_ub_l * 1; + swath_width_pixels_ub_c = swath_width_ub_c * 1; + } + + hscale_pixel_rate_l = 0.; + hscale_pixel_rate_c = 0.; + min_hratio_fact_l = 1.0; + min_hratio_fact_c = 1.0; + + if (htaps_l <= 1) + min_hratio_fact_l = 2.0; + else if (htaps_l <= 6) { + if ((hratio_l * 2.0) > 4.0) + min_hratio_fact_l = 4.0; + else + min_hratio_fact_l = hratio_l * 2.0; + } else { + if (hratio_l > 4.0) + min_hratio_fact_l = 4.0; + else + min_hratio_fact_l = hratio_l; + } + + hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz; + + if (htaps_c <= 1) + min_hratio_fact_c = 2.0; + else if (htaps_c <= 6) { + if ((hratio_c * 2.0) > 4.0) + min_hratio_fact_c = 4.0; + else + min_hratio_fact_c = hratio_c * 2.0; + } else { + if (hratio_c > 4.0) + min_hratio_fact_c = 4.0; + else + min_hratio_fact_c = hratio_c; + } + + hscale_pixel_rate_c = min_hratio_fact_c * dppclk_freq_in_mhz; + + refcyc_per_line_delivery_pre_l = 0.; + refcyc_per_line_delivery_pre_c = 0.; + refcyc_per_line_delivery_l = 0.; + refcyc_per_line_delivery_c = 0.; + + refcyc_per_req_delivery_pre_l = 0.; + refcyc_per_req_delivery_pre_c = 0.; + refcyc_per_req_delivery_l = 0.; + refcyc_per_req_delivery_c = 0.; + + full_recout_width = 0; + // In ODM + if (src->is_hsplit) { + // This "hack" is only allowed (and valid) for MPC combine. In ODM + // combine, you MUST specify the full_recout_width...according to Oswin + if (dst->full_recout_width == 0 && !dst->odm_combine) { + dml_print( + "DML_DLG: %s: Warning: full_recout_width not set in hsplit mode\n", + __func__); + full_recout_width = dst->recout_width * 2; // assume half split for dcn1 + } else + full_recout_width = dst->full_recout_width; + } else + full_recout_width = dst->recout_width; + + // As of DCN2, mpc_combine and odm_combine are mutually exclusive + refcyc_per_line_delivery_pre_l = get_refcyc_per_delivery( + mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_l, + hscale_pixel_rate_l, + swath_width_pixels_ub_l, + 1); // per line + + refcyc_per_line_delivery_l = get_refcyc_per_delivery( + mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_l, + hscale_pixel_rate_l, + swath_width_pixels_ub_l, + 1); // per line + + dml_print("DML_DLG: %s: full_recout_width = %d\n", __func__, full_recout_width); + dml_print( + "DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n", + __func__, + hscale_pixel_rate_l); + dml_print( + "DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", + __func__, + refcyc_per_line_delivery_pre_l); + dml_print( + "DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", + __func__, + refcyc_per_line_delivery_l); + + if (dual_plane) { + refcyc_per_line_delivery_pre_c = get_refcyc_per_delivery( + mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_c, + hscale_pixel_rate_c, + swath_width_pixels_ub_c, + 1); // per line + + refcyc_per_line_delivery_c = get_refcyc_per_delivery( + mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_c, + hscale_pixel_rate_c, + swath_width_pixels_ub_c, + 1); // per line + + dml_print( + "DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", + __func__, + refcyc_per_line_delivery_pre_c); + dml_print( + "DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", + __func__, + refcyc_per_line_delivery_c); + } + + // TTU - Luma / Chroma + if (access_dir) { // vertical access + scaler_rec_in_width_l = vp_height_l; + scaler_rec_in_width_c = vp_height_c; + } else { + scaler_rec_in_width_l = vp_width_l; + scaler_rec_in_width_c = vp_width_c; + } + + refcyc_per_req_delivery_pre_l = get_refcyc_per_delivery( + mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_l, + hscale_pixel_rate_l, + scaler_rec_in_width_l, + req_per_swath_ub_l); // per req + refcyc_per_req_delivery_l = get_refcyc_per_delivery( + mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_l, + hscale_pixel_rate_l, + scaler_rec_in_width_l, + req_per_swath_ub_l); // per req + + dml_print( + "DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", + __func__, + refcyc_per_req_delivery_pre_l); + dml_print( + "DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", + __func__, + refcyc_per_req_delivery_l); + + ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13)); + ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13)); + + if (dual_plane) { + refcyc_per_req_delivery_pre_c = get_refcyc_per_delivery( + mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_c, + hscale_pixel_rate_c, + scaler_rec_in_width_c, + req_per_swath_ub_c); // per req + refcyc_per_req_delivery_c = get_refcyc_per_delivery( + mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_c, + hscale_pixel_rate_c, + scaler_rec_in_width_c, + req_per_swath_ub_c); // per req + + dml_print( + "DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", + __func__, + refcyc_per_req_delivery_pre_c); + dml_print( + "DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", + __func__, + refcyc_per_req_delivery_c); + + ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13)); + ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13)); + } + + // XFC + xfc_transfer_delay = get_xfc_transfer_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + xfc_precharge_delay = get_xfc_precharge_delay( + mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx); + xfc_remote_surface_flip_latency = get_xfc_remote_surface_flip_latency( + mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx); + xfc_dst_y_delta_drq_limit = xfc_remote_surface_flip_latency; + xfc_prefetch_margin = get_xfc_prefetch_margin( + mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx); + + // TTU - Cursor + refcyc_per_req_delivery_pre_cur0 = 0.0; + refcyc_per_req_delivery_cur0 = 0.0; + if (src->num_cursors > 0) { + calculate_ttu_cursor( + mode_lib, + &refcyc_per_req_delivery_pre_cur0, + &refcyc_per_req_delivery_cur0, + refclk_freq_in_mhz, + ref_freq_to_pix_freq, + hscale_pixel_rate_l, + scl->hscl_ratio, + vratio_pre_l, + vratio_l, + src->cur0_src_width, + (enum cursor_bpp) (src->cur0_bpp)); + } + + refcyc_per_req_delivery_pre_cur1 = 0.0; + refcyc_per_req_delivery_cur1 = 0.0; + if (src->num_cursors > 1) { + calculate_ttu_cursor( + mode_lib, + &refcyc_per_req_delivery_pre_cur1, + &refcyc_per_req_delivery_cur1, + refclk_freq_in_mhz, + ref_freq_to_pix_freq, + hscale_pixel_rate_l, + scl->hscl_ratio, + vratio_pre_l, + vratio_l, + src->cur1_src_width, + (enum cursor_bpp) (src->cur1_bpp)); + } + + // TTU - Misc + // all hard-coded + + // Assignment to register structures + disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line + disp_dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk + ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)dml_pow(2, 13)); + disp_dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2)); + + disp_dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19)); + disp_dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19)); + + dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank); + dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank); + dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); + dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); + + disp_dlg_regs->refcyc_per_pte_group_vblank_l = + (unsigned int) (dst_y_per_row_vblank * (double) htotal + * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l); + ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)dml_pow(2, 13)); + + if (dual_plane) { + disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int) (dst_y_per_row_vblank + * (double) htotal * ref_freq_to_pix_freq + / (double) dpte_groups_per_row_ub_c); + ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c + < (unsigned int)dml_pow(2, 13)); + } + + disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = + (unsigned int) (dst_y_per_row_vblank * (double) htotal + * ref_freq_to_pix_freq / (double) meta_chunks_per_row_ub_l); + ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int)dml_pow(2, 13)); + + disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = + disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; // dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now + + disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int) (dst_y_per_row_flip * htotal + * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_l; + disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int) (dst_y_per_row_flip * htotal + * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_l; + + if (dual_plane) { + disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int) (dst_y_per_row_flip + * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_c; + disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int) (dst_y_per_row_flip + * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_c; + } + + disp_dlg_regs->refcyc_per_vm_group_vblank = get_refcyc_per_vm_group_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; + disp_dlg_regs->refcyc_per_vm_group_flip = get_refcyc_per_vm_group_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; + disp_dlg_regs->refcyc_per_vm_req_vblank = get_refcyc_per_vm_req_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;; + disp_dlg_regs->refcyc_per_vm_req_flip = get_refcyc_per_vm_req_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;; + + // Clamp to max for now + if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_group_vblank = dml_pow(2, 23) - 1; + + if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int)dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_group_flip = dml_pow(2, 23) - 1; + + if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int)dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_req_vblank = dml_pow(2, 23) - 1; + + if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int)dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_req_flip = dml_pow(2, 23) - 1; + disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int) ((double) dpte_row_height_l + / (double) vratio_l * dml_pow(2, 2)); + ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int)dml_pow(2, 17)); + + if (dual_plane) { + disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int) ((double) dpte_row_height_c + / (double) vratio_c * dml_pow(2, 2)); + if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) { + dml_print( + "DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u larger than supported by register format U15.2 %u\n", + __func__, + disp_dlg_regs->dst_y_per_pte_row_nom_c, + (unsigned int)dml_pow(2, 17) - 1); + } + } + + disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int) ((double) meta_row_height_l + / (double) vratio_l * dml_pow(2, 2)); + ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int)dml_pow(2, 17)); + + disp_dlg_regs->dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_l; // TODO: dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now + + dml_print( + "DML: Trow: %fus\n", + line_time_in_us * (double)dpte_row_height_l / (double)vratio_l); + + disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int) ((double) dpte_row_height_l + / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq + / (double) dpte_groups_per_row_ub_l); + if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1; + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int) ((double) meta_row_height_l + / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq + / (double) meta_chunks_per_row_ub_l); + if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1; + + if (dual_plane) { + disp_dlg_regs->refcyc_per_pte_group_nom_c = + (unsigned int) ((double) dpte_row_height_c / (double) vratio_c + * (double) htotal * ref_freq_to_pix_freq + / (double) dpte_groups_per_row_ub_c); + if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1; + + // TODO: Is this the right calculation? Does htotal need to be halved? + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = + (unsigned int) ((double) meta_row_height_c / (double) vratio_c + * (double) htotal * ref_freq_to_pix_freq + / (double) meta_chunks_per_row_ub_c); + if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1; + } + + disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor( + refcyc_per_line_delivery_pre_l, 1); + disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor( + refcyc_per_line_delivery_l, 1); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)dml_pow(2, 13)); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)dml_pow(2, 13)); + + disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor( + refcyc_per_line_delivery_pre_c, 1); + disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor( + refcyc_per_line_delivery_c, 1); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)dml_pow(2, 13)); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)dml_pow(2, 13)); + + disp_dlg_regs->chunk_hdl_adjust_cur0 = 3; + disp_dlg_regs->dst_y_offset_cur0 = 0; + disp_dlg_regs->chunk_hdl_adjust_cur1 = 3; + disp_dlg_regs->dst_y_offset_cur1 = 0; + + disp_dlg_regs->xfc_reg_transfer_delay = xfc_transfer_delay; + disp_dlg_regs->xfc_reg_precharge_delay = xfc_precharge_delay; + disp_dlg_regs->xfc_reg_remote_surface_flip_latency = xfc_remote_surface_flip_latency; + disp_dlg_regs->xfc_reg_prefetch_margin = dml_ceil( + xfc_prefetch_margin * refclk_freq_in_mhz, 1); + + // slave has to have this value also set to off + if (src->xfc_enable && !src->xfc_slave) + disp_dlg_regs->dst_y_delta_drq_limit = dml_ceil(xfc_dst_y_delta_drq_limit, 1); + else + disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off + + disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 = + (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0 + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 = + (unsigned int) (refcyc_per_req_delivery_pre_cur1 * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_cur1 = (unsigned int) (refcyc_per_req_delivery_cur1 + * dml_pow(2, 10)); + disp_ttu_regs->qos_level_low_wm = 0; + ASSERT(disp_ttu_regs->qos_level_low_wm < dml_pow(2, 14)); + disp_ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal + * ref_freq_to_pix_freq); + ASSERT(disp_ttu_regs->qos_level_high_wm < dml_pow(2, 14)); + + disp_ttu_regs->qos_level_flip = 14; + disp_ttu_regs->qos_level_fixed_l = 8; + disp_ttu_regs->qos_level_fixed_c = 8; + disp_ttu_regs->qos_level_fixed_cur0 = 8; + disp_ttu_regs->qos_ramp_disable_l = 0; + disp_ttu_regs->qos_ramp_disable_c = 0; + disp_ttu_regs->qos_ramp_disable_cur0 = 0; + + disp_ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz; + ASSERT(disp_ttu_regs->min_ttu_vblank < dml_pow(2, 24)); + + print__ttu_regs_st(mode_lib, *disp_ttu_regs); + print__dlg_regs_st(mode_lib, *disp_dlg_regs); +} + +void dml21_rq_dlg_get_dlg_reg( + struct display_mode_lib *mode_lib, + display_dlg_regs_st *dlg_regs, + display_ttu_regs_st *ttu_regs, + display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + const bool cstate_en, + const bool pstate_en, + const bool vm_en, + const bool ignore_viewport_pos, + const bool immediate_flip_support) +{ + display_rq_params_st rq_param = {0}; + display_dlg_sys_params_st dlg_sys_param = {0}; + + // Get watermark and Tex. + dlg_sys_param.t_urg_wm_us = get_wm_urgent(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.deepsleep_dcfclk_mhz = get_clk_dcf_deepsleep( + mode_lib, + e2e_pipe_param, + num_pipes); + dlg_sys_param.t_extra_us = get_urgent_extra_latency(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.mem_trip_us = get_wm_memory_trip(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.t_mclk_wm_us = get_wm_dram_clock_change(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.t_sr_wm_us = get_wm_stutter_enter_exit(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.total_flip_bw = get_total_immediate_flip_bw( + mode_lib, + e2e_pipe_param, + num_pipes); + dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes( + mode_lib, + e2e_pipe_param, + num_pipes); + dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency + / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated + + print__dlg_sys_params_st(mode_lib, dlg_sys_param); + + // system parameter calculation done + + dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx); + dml_rq_dlg_get_rq_params(mode_lib, &rq_param, e2e_pipe_param[pipe_idx].pipe); + dml_rq_dlg_get_dlg_params( + mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx, + dlg_regs, + ttu_regs, + rq_param.dlg, + dlg_sys_param, + cstate_en, + pstate_en); + dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx); +} + +void dml_rq_dlg_get_arb_params(struct display_mode_lib *mode_lib, display_arb_params_st *arb_param) +{ + memset(arb_param, 0, sizeof(*arb_param)); + arb_param->max_req_outstanding = 256; + arb_param->min_req_outstanding = 68; + arb_param->sat_level_us = 60; +} + +static void calculate_ttu_cursor( + struct display_mode_lib *mode_lib, + double *refcyc_per_req_delivery_pre_cur, + double *refcyc_per_req_delivery_cur, + double refclk_freq_in_mhz, + double ref_freq_to_pix_freq, + double hscale_pixel_rate_l, + double hscl_ratio, + double vratio_pre_l, + double vratio_l, + unsigned int cur_width, + enum cursor_bpp cur_bpp) +{ + unsigned int cur_src_width = cur_width; + unsigned int cur_req_size = 0; + unsigned int cur_req_width = 0; + double cur_width_ub = 0.0; + double cur_req_per_width = 0.0; + double hactive_cur = 0.0; + + ASSERT(cur_src_width <= 256); + + *refcyc_per_req_delivery_pre_cur = 0.0; + *refcyc_per_req_delivery_cur = 0.0; + if (cur_src_width > 0) { + unsigned int cur_bit_per_pixel = 0; + + if (cur_bpp == dm_cur_2bit) { + cur_req_size = 64; // byte + cur_bit_per_pixel = 2; + } else { // 32bit + cur_bit_per_pixel = 32; + if (cur_src_width >= 1 && cur_src_width <= 16) + cur_req_size = 64; + else if (cur_src_width >= 17 && cur_src_width <= 31) + cur_req_size = 128; + else + cur_req_size = 256; + } + + cur_req_width = (double) cur_req_size / ((double) cur_bit_per_pixel / 8.0); + cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1) + * (double) cur_req_width; + cur_req_per_width = cur_width_ub / (double) cur_req_width; + hactive_cur = (double) cur_src_width / hscl_ratio; // FIXME: oswin to think about what to do for cursor + + if (vratio_pre_l <= 1.0) { + *refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq + / (double) cur_req_per_width; + } else { + *refcyc_per_req_delivery_pre_cur = (double) refclk_freq_in_mhz + * (double) cur_src_width / hscale_pixel_rate_l + / (double) cur_req_per_width; + } + + ASSERT(*refcyc_per_req_delivery_pre_cur < dml_pow(2, 13)); + + if (vratio_l <= 1.0) { + *refcyc_per_req_delivery_cur = hactive_cur * ref_freq_to_pix_freq + / (double) cur_req_per_width; + } else { + *refcyc_per_req_delivery_cur = (double) refclk_freq_in_mhz + * (double) cur_src_width / hscale_pixel_rate_l + / (double) cur_req_per_width; + } + + dml_print( + "DML_DLG: %s: cur_req_width = %d\n", + __func__, + cur_req_width); + dml_print( + "DML_DLG: %s: cur_width_ub = %3.2f\n", + __func__, + cur_width_ub); + dml_print( + "DML_DLG: %s: cur_req_per_width = %3.2f\n", + __func__, + cur_req_per_width); + dml_print( + "DML_DLG: %s: hactive_cur = %3.2f\n", + __func__, + hactive_cur); + dml_print( + "DML_DLG: %s: refcyc_per_req_delivery_pre_cur = %3.2f\n", + __func__, + *refcyc_per_req_delivery_pre_cur); + dml_print( + "DML_DLG: %s: refcyc_per_req_delivery_cur = %3.2f\n", + __func__, + *refcyc_per_req_delivery_cur); + + ASSERT(*refcyc_per_req_delivery_cur < dml_pow(2, 13)); + } +} + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h new file mode 100644 index 000000000000..83e95f8cbff2 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h @@ -0,0 +1,73 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML21_DISPLAY_RQ_DLG_CALC_H__ +#define __DML21_DISPLAY_RQ_DLG_CALC_H__ + +#include "../dml_common_defs.h" +#include "../display_rq_dlg_helpers.h" + +struct display_mode_lib; + + +// Function: dml_rq_dlg_get_rq_reg +// Main entry point for test to get the register values out of this DML class. +// This function calls <get_rq_param> and <extract_rq_regs> functions to calculate +// and then populate the rq_regs struct +// Input: +// pipe_src_param - pipe source configuration (e.g. vp, pitch, etc.) +// Output: +// rq_regs - struct that holds all the RQ registers field value. +// See also: <display_rq_regs_st> +void dml21_rq_dlg_get_rq_reg( + struct display_mode_lib *mode_lib, + display_rq_regs_st *rq_regs, + const display_pipe_params_st pipe_param); + +// Function: dml_rq_dlg_get_dlg_reg +// Calculate and return DLG and TTU register struct given the system setting +// Output: +// dlg_regs - output DLG register struct +// ttu_regs - output DLG TTU register struct +// Input: +// e2e_pipe_param - "compacted" array of e2e pipe param struct +// num_pipes - num of active "pipe" or "route" +// pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg +// cstate - 0: when calculate min_ttu_vblank it is assumed cstate is not required. 1: Normal mode, cstate is considered. +// Added for legacy or unrealistic timing tests. +void dml21_rq_dlg_get_dlg_reg( + struct display_mode_lib *mode_lib, + display_dlg_regs_st *dlg_regs, + display_ttu_regs_st *ttu_regs, + display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + const bool cstate_en, + const bool pstate_en, + const bool vm_en, + const bool ignore_viewport_pos, + const bool immediate_flip_support); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h index 0c2fab1e93b6..1c97083b8d0b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h @@ -37,11 +37,14 @@ enum source_format_class { dm_444_64 = 2, dm_420_8 = 3, dm_420_10 = 4, - dm_422_8 = 5, - dm_422_10 = 6, - dm_444_8 = 7, + dm_420_12 = 5, + dm_422_8 = 6, + dm_422_10 = 7, + dm_444_8 = 8, dm_mono_8 = dm_444_8, - dm_mono_16 = dm_444_16 + dm_mono_16 = dm_444_16, + dm_rgbe = 9, + dm_rgbe_alpha = 10, }; enum output_bpc_class { dm_out_6 = 0, dm_out_8 = 1, dm_out_10 = 2, dm_out_12 = 3, dm_out_16 = 4 @@ -83,7 +86,7 @@ enum dm_swizzle_mode { dm_sw_var_d_x = 30, dm_sw_64kb_r_x, dm_sw_gfx7_2d_thin_lvp, - dm_sw_gfx7_2d_thin_gl + dm_sw_gfx7_2d_thin_gl, }; enum lb_depth { dm_lb_10 = 0, dm_lb_8 = 1, dm_lb_6 = 2, dm_lb_12 = 3, dm_lb_16 = 4, @@ -112,7 +115,8 @@ enum output_standard { enum mpc_combine_affinity { dm_mpc_always_when_possible, dm_mpc_reduce_voltage, - dm_mpc_reduce_voltage_and_clocks + dm_mpc_reduce_voltage_and_clocks, + dm_mpc_never }; enum self_refresh_affinity { @@ -157,4 +161,10 @@ enum writeback_config { dm_whole_buffer_for_single_stream_interleave, }; +enum odm_combine_mode { + dm_odm_combine_mode_disabled, + dm_odm_combine_mode_2to1, + dm_odm_combine_mode_4to1, +}; + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c index 91810c7d5cf5..704efefdcba8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c @@ -28,6 +28,12 @@ #if defined(CONFIG_DRM_AMD_DC_DCN2_0) #include "dcn20/display_mode_vba_20.h" #include "dcn20/display_rq_dlg_calc_20.h" +#include "dcn20/display_mode_vba_20v2.h" +#include "dcn20/display_rq_dlg_calc_20v2.h" +#endif +#ifdef CONFIG_DRM_AMD_DC_DCN2_1 +#include "dcn21/display_mode_vba_21.h" +#include "dcn21/display_rq_dlg_calc_21.h" #endif #if defined(CONFIG_DRM_AMD_DC_DCN2_0) @@ -37,6 +43,22 @@ const struct dml_funcs dml20_funcs = { .rq_dlg_get_dlg_reg = dml20_rq_dlg_get_dlg_reg, .rq_dlg_get_rq_reg = dml20_rq_dlg_get_rq_reg }; + +const struct dml_funcs dml20v2_funcs = { + .validate = dml20v2_ModeSupportAndSystemConfigurationFull, + .recalculate = dml20v2_recalculate, + .rq_dlg_get_dlg_reg = dml20v2_rq_dlg_get_dlg_reg, + .rq_dlg_get_rq_reg = dml20v2_rq_dlg_get_rq_reg +}; +#endif + +#ifdef CONFIG_DRM_AMD_DC_DCN2_1 +const struct dml_funcs dml21_funcs = { + .validate = dml21_ModeSupportAndSystemConfigurationFull, + .recalculate = dml21_recalculate, + .rq_dlg_get_dlg_reg = dml21_rq_dlg_get_dlg_reg, + .rq_dlg_get_rq_reg = dml21_rq_dlg_get_rq_reg +}; #endif void dml_init_instance(struct display_mode_lib *lib, @@ -52,7 +74,16 @@ void dml_init_instance(struct display_mode_lib *lib, case DML_PROJECT_NAVI10: lib->funcs = dml20_funcs; break; + case DML_PROJECT_NAVI10v2: + lib->funcs = dml20v2_funcs; + break; #endif +#ifdef CONFIG_DRM_AMD_DC_DCN2_1 + case DML_PROJECT_DCN21: + lib->funcs = dml21_funcs; + break; +#endif + default: break; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h index 5bf13d67f289..d8c59aa356b6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h @@ -36,6 +36,10 @@ enum dml_project { DML_PROJECT_RAVEN1, #ifdef CONFIG_DRM_AMD_DC_DCN2_0 DML_PROJECT_NAVI10, + DML_PROJECT_NAVI10v2, +#endif +#ifdef CONFIG_DRM_AMD_DC_DCN2_1 + DML_PROJECT_DCN21, #endif }; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 5678472546ab..f4c1ef9046bf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -100,6 +100,7 @@ struct _vcs_dpi_soc_bounding_box_st { unsigned int vmm_page_size_bytes; unsigned int hostvm_min_page_size_bytes; double dram_clock_change_latency_us; + double dummy_pstate_latency_us; double writeback_dram_clock_change_latency_us; unsigned int return_bus_width_bytes; unsigned int voltage_override; @@ -108,6 +109,9 @@ struct _vcs_dpi_soc_bounding_box_st { int use_urgent_burst_bw; unsigned int num_states; struct _vcs_dpi_voltage_scaling_st clock_limits[MAX_CLOCK_LIMIT_STATES]; + bool do_urgent_latency_adjustment; + double urgent_latency_adjustment_fabric_clock_component_us; + double urgent_latency_adjustment_fabric_clock_reference_mhz; }; struct _vcs_dpi_ip_params_st { diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index 4d2a1262d9db..65cf4edddaff 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -262,6 +262,13 @@ static void fetch_socbb_params(struct display_mode_lib *mode_lib) //mode_lib->vba.DRAMSpeedPerState[i] = soc->clock_limits[i].dram_speed_mhz; mode_lib->vba.MaxDispclk[i] = soc->clock_limits[i].dispclk_mhz; } + + mode_lib->vba.DoUrgentLatencyAdjustment = + soc->do_urgent_latency_adjustment; + mode_lib->vba.UrgentLatencyAdjustmentFabricClockComponent = + soc->urgent_latency_adjustment_fabric_clock_component_us; + mode_lib->vba.UrgentLatencyAdjustmentFabricClockReference = + soc->urgent_latency_adjustment_fabric_clock_reference_mhz; } static void fetch_ip_params(struct display_mode_lib *mode_lib) @@ -385,8 +392,10 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) src->viewport_y_c; mode_lib->vba.PitchY[mode_lib->vba.NumberOfActivePlanes] = src->data_pitch; mode_lib->vba.SurfaceHeightY[mode_lib->vba.NumberOfActivePlanes] = src->viewport_height; + mode_lib->vba.SurfaceWidthY[mode_lib->vba.NumberOfActivePlanes] = src->viewport_width; mode_lib->vba.PitchC[mode_lib->vba.NumberOfActivePlanes] = src->data_pitch_c; mode_lib->vba.SurfaceHeightC[mode_lib->vba.NumberOfActivePlanes] = src->viewport_height_c; + mode_lib->vba.SurfaceWidthC[mode_lib->vba.NumberOfActivePlanes] = src->viewport_width_c; mode_lib->vba.DCCMetaPitchY[mode_lib->vba.NumberOfActivePlanes] = src->meta_pitch; mode_lib->vba.DCCMetaPitchC[mode_lib->vba.NumberOfActivePlanes] = src->meta_pitch_c; mode_lib->vba.HRatio[mode_lib->vba.NumberOfActivePlanes] = scl->hscl_ratio; @@ -457,6 +466,10 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) dout->wb.wb_dst_width; mode_lib->vba.WritebackDestinationHeight[mode_lib->vba.NumberOfActivePlanes] = dout->wb.wb_dst_height; + mode_lib->vba.WritebackHRatio[mode_lib->vba.NumberOfActivePlanes] = + dout->wb.wb_hratio; + mode_lib->vba.WritebackVRatio[mode_lib->vba.NumberOfActivePlanes] = + dout->wb.wb_vratio; mode_lib->vba.WritebackPixelFormat[mode_lib->vba.NumberOfActivePlanes] = (enum source_format_class) (dout->wb.wb_pixel_format); mode_lib->vba.WritebackHTaps[mode_lib->vba.NumberOfActivePlanes] = @@ -568,6 +581,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) if (src->is_hsplit) { for (k = j + 1; k < mode_lib->vba.cache_num_pipes; ++k) { display_pipe_source_params_st *src_k = &pipes[k].pipe.src; + display_pipe_dest_params_st *dst_k = &pipes[k].pipe.dest; if (src_k->is_hsplit && !visited[k] && src->hsplit_grp == src_k->hsplit_grp) { @@ -575,12 +589,15 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) mode_lib->vba.NumberOfActivePlanes; mode_lib->vba.DPPPerPlane[mode_lib->vba.NumberOfActivePlanes]++; if (mode_lib->vba.SourceScan[mode_lib->vba.NumberOfActivePlanes] - == dm_horz) + == dm_horz) { mode_lib->vba.ViewportWidth[mode_lib->vba.NumberOfActivePlanes] += src_k->viewport_width; - else + mode_lib->vba.ScalerRecoutWidth[mode_lib->vba.NumberOfActivePlanes] += + dst_k->recout_width; + } else { mode_lib->vba.ViewportHeight[mode_lib->vba.NumberOfActivePlanes] += src_k->viewport_height; + } visited[k] = true; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 0347f74cda3a..91decac50557 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -290,6 +290,7 @@ struct vba_vars_st { double PixelClock[DC__NUM_DPP__MAX]; double PixelClockBackEnd[DC__NUM_DPP__MAX]; bool DCCEnable[DC__NUM_DPP__MAX]; + bool FECEnable[DC__NUM_DPP__MAX]; unsigned int DCCMetaPitchY[DC__NUM_DPP__MAX]; unsigned int DCCMetaPitchC[DC__NUM_DPP__MAX]; enum scan_direction_class SourceScan[DC__NUM_DPP__MAX]; @@ -317,6 +318,7 @@ struct vba_vars_st { double DCCRate[DC__NUM_DPP__MAX]; double AverageDCCCompressionRate; bool ODMCombineEnabled[DC__NUM_DPP__MAX]; + enum odm_combine_mode ODMCombineTypeEnabled[DC__NUM_DPP__MAX]; double OutputBpp[DC__NUM_DPP__MAX]; bool DSCEnabled[DC__NUM_DPP__MAX]; unsigned int DSCInputBitPerComponent[DC__NUM_DPP__MAX]; @@ -395,6 +397,7 @@ struct vba_vars_st { double FabricClockPerState[DC__VOLTAGE_STATES + 1]; double SOCCLKPerState[DC__VOLTAGE_STATES + 1]; double PHYCLKPerState[DC__VOLTAGE_STATES + 1]; + double DTBCLKPerState[DC__VOLTAGE_STATES + 1]; double MaxDppclk[DC__VOLTAGE_STATES + 1]; double MaxDSCCLK[DC__VOLTAGE_STATES + 1]; double DRAMSpeedPerState[DC__VOLTAGE_STATES + 1]; @@ -488,6 +491,7 @@ struct vba_vars_st { unsigned int NoOfDPP[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; int NoOfDPPThisState[DC__NUM_DPP__MAX]; bool ODMCombineEnablePerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; + enum odm_combine_mode ODMCombineTypeEnablePerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; unsigned int SwathWidthYThisState[DC__NUM_DPP__MAX]; unsigned int SwathHeightCPerState[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; unsigned int SwathHeightYThisState[DC__NUM_DPP__MAX]; @@ -513,6 +517,7 @@ struct vba_vars_st { bool DIOSupport[DC__VOLTAGE_STATES + 1]; bool NotEnoughDSCUnits[DC__VOLTAGE_STATES + 1]; bool DSCCLKRequiredMoreThanSupported[DC__VOLTAGE_STATES + 1]; + bool DTBCLKRequiredMoreThanSupported[DC__VOLTAGE_STATES + 1]; double UrgentRoundTripAndOutOfOrderLatencyPerState[DC__VOLTAGE_STATES + 1]; bool ROBSupport[DC__VOLTAGE_STATES + 1]; bool PTEBufferSizeNotExceeded[DC__VOLTAGE_STATES + 1][2]; @@ -605,6 +610,7 @@ struct vba_vars_st { double MaximumSwathWidthLuma[DC__NUM_DPP__MAX]; double MaximumSwathWidthChroma[DC__NUM_DPP__MAX]; bool odm_combine_dummy[DC__NUM_DPP__MAX]; + enum odm_combine_mode odm_combine_mode_dummy[DC__NUM_DPP__MAX]; double dummy1[DC__NUM_DPP__MAX]; double dummy2[DC__NUM_DPP__MAX]; double dummy3[DC__NUM_DPP__MAX]; @@ -625,6 +631,11 @@ struct vba_vars_st { unsigned int dummyinteger10; unsigned int dummyinteger11; unsigned int dummyinteger12; + unsigned int dummyintegerarr1[DC__NUM_DPP__MAX]; + unsigned int dummyintegerarr2[DC__NUM_DPP__MAX]; + unsigned int dummyintegerarr3[DC__NUM_DPP__MAX]; + unsigned int dummyintegerarr4[DC__NUM_DPP__MAX]; + long dummylongarr1[DC__NUM_DPP__MAX]; bool dummysinglestring; bool SingleDPPViewportSizeSupportPerPlane[DC__NUM_DPP__MAX]; double PlaneRequiredDISPCLKWithODMCombine2To1; @@ -633,6 +644,7 @@ struct vba_vars_st { bool LinkDSCEnable; bool ODMCombine4To1SupportCheckOK[DC__VOLTAGE_STATES + 1]; bool ODMCombineEnableThisState[DC__NUM_DPP__MAX]; + enum odm_combine_mode ODMCombineEnableTypeThisState[DC__NUM_DPP__MAX]; unsigned int SwathWidthCThisState[DC__NUM_DPP__MAX]; bool ViewportSizeSupportPerPlane[DC__NUM_DPP__MAX]; double AlignedDCCMetaPitchY[DC__NUM_DPP__MAX]; @@ -641,6 +653,7 @@ struct vba_vars_st { unsigned int NotEnoughUrgentLatencyHiding; unsigned int NotEnoughUrgentLatencyHidingPre; long PTEBufferSizeInRequestsForLuma; + long PTEBufferSizeInRequestsForChroma; // Missing from VBA long dpte_group_bytes_chroma; @@ -787,6 +800,9 @@ struct vba_vars_st { unsigned int PDEProcessingBufIn64KBReqs; double MaxTotalVActiveRDBandwidth; + bool DoUrgentLatencyAdjustment; + double UrgentLatencyAdjustmentFabricClockComponent; + double UrgentLatencyAdjustmentFabricClockReference; double MinUrgentLatencySupportUs; double MinFullDETBufferingTime; double AverageReadBandwidthGBytePerSecond; @@ -801,6 +817,8 @@ struct vba_vars_st { bool ModeIsSupported; bool ODMCombine4To1Supported; + unsigned int SurfaceWidthY[DC__NUM_DPP__MAX]; + unsigned int SurfaceWidthC[DC__NUM_DPP__MAX]; unsigned int SurfaceHeightY[DC__NUM_DPP__MAX]; unsigned int SurfaceHeightC[DC__NUM_DPP__MAX]; unsigned int WritebackHTaps[DC__NUM_DPP__MAX]; |