You can subscribe to this list here.
2006 |
Jan
|
Feb
|
Mar
|
Apr
(102) |
May
(78) |
Jun
(70) |
Jul
(46) |
Aug
|
Sep
(2) |
Oct
(59) |
Nov
(84) |
Dec
(41) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2007 |
Jan
(401) |
Feb
(151) |
Mar
(38) |
Apr
(43) |
May
(77) |
Jun
(52) |
Jul
(65) |
Aug
(33) |
Sep
(15) |
Oct
(14) |
Nov
(9) |
Dec
(4) |
2008 |
Jan
|
Feb
(15) |
Mar
(7) |
Apr
(41) |
May
(16) |
Jun
|
Jul
(8) |
Aug
(43) |
Sep
(111) |
Oct
(58) |
Nov
(93) |
Dec
(185) |
2009 |
Jan
(221) |
Feb
(150) |
Mar
(76) |
Apr
(250) |
May
(242) |
Jun
(182) |
Jul
(232) |
Aug
(101) |
Sep
(121) |
Oct
(78) |
Nov
(110) |
Dec
(155) |
2010 |
Jan
(67) |
Feb
(57) |
Mar
(72) |
Apr
(140) |
May
(54) |
Jun
(35) |
Jul
(39) |
Aug
(30) |
Sep
(35) |
Oct
(46) |
Nov
(34) |
Dec
(29) |
2011 |
Jan
(15) |
Feb
(22) |
Mar
(23) |
Apr
(37) |
May
(21) |
Jun
(29) |
Jul
(23) |
Aug
(19) |
Sep
(9) |
Oct
(18) |
Nov
(17) |
Dec
(5) |
2012 |
Jan
(34) |
Feb
(18) |
Mar
(37) |
Apr
(34) |
May
(24) |
Jun
(10) |
Jul
(42) |
Aug
(55) |
Sep
(9) |
Oct
(9) |
Nov
(5) |
Dec
(34) |
2013 |
Jan
(41) |
Feb
(53) |
Mar
(12) |
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(8) |
Oct
(34) |
Nov
(107) |
Dec
(28) |
2014 |
Jan
(15) |
Feb
(33) |
Mar
(28) |
Apr
(8) |
May
(3) |
Jun
(1) |
Jul
(2) |
Aug
(3) |
Sep
(6) |
Oct
|
Nov
(4) |
Dec
(2) |
2015 |
Jan
(29) |
Feb
(17) |
Mar
(44) |
Apr
(28) |
May
(16) |
Jun
(18) |
Jul
(18) |
Aug
(23) |
Sep
(39) |
Oct
(25) |
Nov
(5) |
Dec
(2) |
2016 |
Jan
(13) |
Feb
(33) |
Mar
(58) |
Apr
(12) |
May
(5) |
Jun
(32) |
Jul
(43) |
Aug
(33) |
Sep
(10) |
Oct
(4) |
Nov
(10) |
Dec
(1) |
From: <ze...@us...> - 2016-08-13 18:24:48
|
Revision: 5528 http://sourceforge.net/p/desmume/code/5528 Author: zeromus Date: 2016-08-13 18:24:45 +0000 (Sat, 13 Aug 2016) Log Message: ----------- winport: fix fastbuild flag Modified Paths: -------------- trunk/desmume/src/windows/desmume.props Modified: trunk/desmume/src/windows/desmume.props =================================================================== --- trunk/desmume/src/windows/desmume.props 2016-08-13 18:24:33 UTC (rev 5527) +++ trunk/desmume/src/windows/desmume.props 2016-08-13 18:24:45 UTC (rev 5528) @@ -94,7 +94,7 @@ <!-- BETA_VERSION ? --> <PreprocessorDefinitions Condition="'$(NDS_OPT)' == 'Debug'">_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions Condition="'$(NDS_OPT)' == 'Release'">RELEASE;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> - <PreprocessorDefinitions Condition="'$(NDS_OPT)' == 'FastBuild'">RELEASE;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <PreprocessorDefinitions Condition="'$(NDS_OPT)' == 'FastBuild'">FASTBUILD;RELEASE;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> <!-- These work together --> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ze...@us...> - 2016-08-13 18:24:34
|
Revision: 5527 http://sourceforge.net/p/desmume/code/5527 Author: zeromus Date: 2016-08-13 18:24:33 +0000 (Sat, 13 Aug 2016) Log Message: ----------- change backup memory whitelist application technique and fix SM64 (KOR) which needs an 0.5KB eeprom apparently (based on its use of WRHI and RDHI commands) Modified Paths: -------------- trunk/desmume/src/mc.cpp Modified: trunk/desmume/src/mc.cpp =================================================================== --- trunk/desmume/src/mc.cpp 2016-08-08 03:10:13 UTC (rev 5526) +++ trunk/desmume/src/mc.cpp 2016-08-13 18:24:33 UTC (rev 5527) @@ -1,7 +1,7 @@ /* Copyright (C) 2006 thoduv Copyright (C) 2006-2007 Theo Berkau - Copyright (C) 2008-2015 DeSmuME team + Copyright (C) 2008-2016 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -618,6 +618,21 @@ ensure((u32)savesize); //expand properly if necessary addr_size = addr_size_for_old_save_type(savetype); } + + //automatically detect these hardcodes + if(state == DETECTING) + { + if(!memcmp(gameInfo.header.gameCode,"ASMK", 4)) addr_size = 1; //super mario 64 ds (KOR, which is different somehow) + else if(!memcmp(gameInfo.header.gameCode,"ASM", 3)) addr_size = 2; //super mario 64 ds + else if(!memcmp(gameInfo.header.gameCode,"BDE", 3)) addr_size = 2; // Dementium II + else if(!memcmp(gameInfo.header.gameCode,"AL3", 3)) addr_size = 1; //spongebob atlantis squarepantis. + else if(!memcmp(gameInfo.header.gameCode,"AH5", 3)) addr_size = 1; //over the hedge + else if(!memcmp(gameInfo.header.gameCode,"AVH", 3)) addr_size = 1; //over the hedge - Hammy Goes Nuts! + else if(!memcmp(gameInfo.header.gameCode,"AQ3", 3)) addr_size = 1; //spider-man 3 + + //if we found a whitelist match, we dont need to run detection + if(addr_size) state = RUNNING; + } } void BackupDevice::close_rom() @@ -661,36 +676,33 @@ addr_size = 1; //choose 1 just to keep the busted savefile from growing too big msgbox->error("Catastrophic error while autodetecting save type.\nIt will need to be specified manually\n"); break; + case 2: //the modern typical case for small eeproms addr_size = 1; break; + case 3: //another modern typical case.. //but unfortunately we select this case on accident sometimes when what it meant to do was present the archaic 1+2 case //(the archaic 1+2 case is: specifying one address byte, and then reading the first two bytes, instead of the first one byte, as most other games would do.) //so, we're gonna hack in checks for the games that are doing this addr_size = 2; + break; - // TODO: will study a deep, why this happens (wrong detect size) - if(!memcmp(gameInfo.header.gameCode,"AL3", 3)) addr_size = 1; //spongebob atlantis squarepantis. - if(!memcmp(gameInfo.header.gameCode,"AH5", 3)) addr_size = 1; //over the hedge - if(!memcmp(gameInfo.header.gameCode,"AVH", 3)) addr_size = 1; //over the hedge - Hammy Goes Nuts! - if(!memcmp(gameInfo.header.gameCode,"AQ3", 3)) addr_size = 1; //spider-man 3 - - break; case 4: //a modern typical case addr_size = 3; - if(!memcmp(gameInfo.header.gameCode,"ASM", 3)) addr_size = 2; //super mario 64 ds + break; default: //the archaic case: write the address and then some modulo-4 number of bytes //why modulo 4? who knows. - //SM64 (KOR) makes it here with autodetect_size=11 and nothing interesting in the buffer addr_size = autodetect_size & 3; - if(!memcmp(gameInfo.header.gameCode,"BDE", 3)) addr_size = 2; // Dementium II + //SM64 (KOR) makes it here with autodetect_size=11 and nothing interesting in the buffer + //we whitelisted it earlier though + break; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ze...@us...> - 2016-08-08 03:10:16
|
Revision: 5526 http://sourceforge.net/p/desmume/code/5526 Author: zeromus Date: 2016-08-08 03:10:13 +0000 (Mon, 08 Aug 2016) Log Message: ----------- winport: allow sizing window under "minimum size". Unclear why we wanted this functionality to begin with, but probably predated view>window size multiples for more easy scaling. Other benefit: stops messing up the viewport determination when magnification overshoots window size (i.e. fullscreening a massive 5x filter applied by idiots) Modified Paths: -------------- trunk/desmume/src/windows/CWindow.cpp Modified: trunk/desmume/src/windows/CWindow.cpp =================================================================== --- trunk/desmume/src/windows/CWindow.cpp 2016-08-07 00:26:31 UTC (rev 5525) +++ trunk/desmume/src/windows/CWindow.cpp 2016-08-08 03:10:13 UTC (rev 5526) @@ -621,7 +621,8 @@ ZeroMemory(&mbi, sizeof(mbi)); mbi.cbSize = sizeof(mbi); GetMenuBarInfo(hwnd, OBJID_MENU, 0, &mbi); - int menuHeight = (mbi.rcBar.bottom - mbi.rcBar.top + 1); + //int menuHeight = (mbi.rcBar.bottom - mbi.rcBar.top + 1); //zero 07-aug-2016 - why did I do this? it isn't normal in windows and in the case of no menu bar it was making a 1 instead of a 0 (r3184 in 2009) + int menuHeight = (mbi.rcBar.bottom - mbi.rcBar.top); rect->bottom -= cymenu; rect->bottom += menuHeight; @@ -656,9 +657,13 @@ _minWidth = adjr.right-adjr.left; _minHeight = adjr.bottom-adjr.top + tbheight; - /* Clamp the size to the minimum size (256x384) */ - rect->right = (rect->left + std::max(_minWidth, (int)(rect->right - rect->left))); - rect->bottom = (rect->top + std::max(_minHeight, (int)(rect->bottom - rect->top))); + //zero 07-aug-2016 - this was really old code. Maybe it isn't needed anymore + //it effectively let the content overshoot the window size. + //really we simply should let the window not overshoot the content size (but that restriction is waived for fullscreen mode) + //SIDE EFFECT: window can now be shrunk under the minimum size, which people have been wanting to do anyway + ////Clamp the size to the minimum size (256x384) + //rect->right = (rect->left + std::max(_minWidth, (int)(rect->right - rect->left))); + //rect->bottom = (rect->top + std::max(_minHeight, (int)(rect->bottom - rect->top))); bool horizontalDrag = (wParam == WMSZ_LEFT) || (wParam == WMSZ_RIGHT); bool verticalDrag = (wParam == WMSZ_TOP) || (wParam == WMSZ_BOTTOM); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-08-07 00:26:34
|
Revision: 5525 http://sourceforge.net/p/desmume/code/5525 Author: rogerman Date: 2016-08-07 00:26:31 +0000 (Sun, 07 Aug 2016) Log Message: ----------- GPU: - Display capture blending functions now support RGB888 color format. (Related to r5433. This rework is still incomplete.) Revision Links: -------------- http://sourceforge.net/p/desmume/code/5433 Modified Paths: -------------- trunk/desmume/src/GPU.cpp trunk/desmume/src/GPU.h Modified: trunk/desmume/src/GPU.cpp =================================================================== --- trunk/desmume/src/GPU.cpp 2016-08-06 04:09:57 UTC (rev 5524) +++ trunk/desmume/src/GPU.cpp 2016-08-07 00:26:31 UTC (rev 5525) @@ -5517,7 +5517,7 @@ } } -template<NDSColorFormat OUTPUTFORMAT, size_t CAPTURELENGTH> +template <NDSColorFormat OUTPUTFORMAT, size_t CAPTURELENGTH> void GPUEngineA::_RenderLine_DisplayCapture(const u16 l) { assert( (CAPTURELENGTH == GPU_FRAMEBUFFER_NATIVE_WIDTH/2) || (CAPTURELENGTH == GPU_FRAMEBUFFER_NATIVE_WIDTH) ); @@ -5708,11 +5708,11 @@ { if (this->isLineRenderNative[l]) { - this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1); + this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1); } else { - this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1); + this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1); } newCaptureLineNativeState = false; @@ -5727,11 +5727,11 @@ if (this->isLineRenderNative[l]) { - this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1); + this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1); } else { - this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1); + this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1); } newCaptureLineNativeState = this->isLineRenderNative[l]; @@ -5743,7 +5743,7 @@ { if ( (DISPCAPCNT.SrcB == 0) && !this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset] ) { - this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1); + this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1); newCaptureLineNativeState = false; } else @@ -5754,7 +5754,7 @@ this->_RenderLine_DispCapture_FIFOToBuffer(fifoLine); } - this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1); + this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1); newCaptureLineNativeState = true; } } @@ -5762,7 +5762,7 @@ { if ( (DISPCAPCNT.SrcB == 0) && !this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset] ) { - this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1); + this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1); } else { @@ -5772,7 +5772,7 @@ this->_RenderLine_DispCapture_FIFOToBuffer(fifoLine); } - this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1); + this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1); } newCaptureLineNativeState = false; @@ -5879,22 +5879,22 @@ { if (this->isLineRenderNative[l]) { - this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount); + this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount); } else { - this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount); + this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount); } } else { if (this->isLineRenderNative[l]) { - this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount); + this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount); } else { - this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount); + this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount); } } } @@ -5904,22 +5904,22 @@ { if ( (DISPCAPCNT.SrcB == 0) && !this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset] ) { - this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount); + this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount); } else { - this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount); + this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount); } } else { if ( (DISPCAPCNT.SrcB == 0) && !this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset] ) { - this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount); + this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount); } else { - this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount); + this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount); } } } @@ -6244,7 +6244,7 @@ } #ifdef ENABLE_SSE2 -template<NDSColorFormat COLORFORMAT> +template <NDSColorFormat COLORFORMAT> __m128i GPUEngineA::_RenderLine_DispCapture_BlendFunc_SSE2(const __m128i &srcA, const __m128i &srcB, const __m128i &blendEVA, const __m128i &blendEVB) { #ifdef ENABLE_SSSE3 @@ -6356,8 +6356,8 @@ } #endif -template<bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB> -void GPUEngineA::_RenderLine_DispCapture_BlendToCustomDstBuffer(const u16 *srcA, const u16 *srcB, u16 *dst, const u8 blendEVA, const u8 blendEVB, const size_t length, size_t l) +template <NDSColorFormat OUTPUTFORMAT, bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB> +void GPUEngineA::_RenderLine_DispCapture_BlendToCustomDstBuffer(const void *srcA, const void *srcB, void *dst, const u8 blendEVA, const u8 blendEVB, const size_t length, size_t l) { #ifdef ENABLE_SSE2 const __m128i blendEVA_vec128 = _mm_set1_epi16(blendEVA); @@ -6368,49 +6368,91 @@ size_t offset = _gpuDstToSrcIndex[_gpuDstLineIndex[l] * dispInfo.customWidth] - (l * GPU_FRAMEBUFFER_NATIVE_WIDTH); size_t i = 0; -#ifdef ENABLE_SSE2 - const size_t ssePixCount = length - (length % 8); - for (; i < ssePixCount; i += 8) + if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) { - __m128i srcA_vec128 = (!CAPTUREFROMNATIVESRCA) ? _mm_load_si128((__m128i *)(srcA + i)) : _mm_set_epi16(srcA[offset + i + 7], - srcA[offset + i + 6], - srcA[offset + i + 5], - srcA[offset + i + 4], - srcA[offset + i + 3], - srcA[offset + i + 2], - srcA[offset + i + 1], - srcA[offset + i + 0]); + const u32 *srcA_32 = (const u32 *)srcA; + const u32 *srcB_32 = (const u32 *)srcB; + FragmentColor *dst32 = (FragmentColor *)dst; - __m128i srcB_vec128 = (!CAPTUREFROMNATIVESRCB) ? _mm_load_si128((__m128i *)(srcB + i)) : _mm_set_epi16(srcB[offset + i + 7], - srcB[offset + i + 6], - srcB[offset + i + 5], - srcB[offset + i + 4], - srcB[offset + i + 3], - srcB[offset + i + 2], - srcB[offset + i + 1], - srcB[offset + i + 0]); +#ifdef ENABLE_SSE2 + const size_t ssePixCount = length - (length % 4); + for (; i < ssePixCount; i+=4) + { + __m128i srcA_vec128 = (!CAPTUREFROMNATIVESRCA) ? _mm_load_si128((__m128i *)(srcA_32 + i)) : _mm_set_epi32(srcA_32[offset + i + 3], + srcA_32[offset + i + 2], + srcA_32[offset + i + 1], + srcA_32[offset + i + 0]); + + __m128i srcB_vec128 = (!CAPTUREFROMNATIVESRCB) ? _mm_load_si128((__m128i *)(srcB_32 + i)) : _mm_set_epi32(srcB_32[offset + i + 3], + srcB_32[offset + i + 2], + srcB_32[offset + i + 1], + srcB_32[offset + i + 0]); + + _mm_store_si128( (__m128i *)(dst32 + i), this->_RenderLine_DispCapture_BlendFunc_SSE2<OUTPUTFORMAT>(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) ); + } +#endif - _mm_store_si128( (__m128i *)(dst + i), this->_RenderLine_DispCapture_BlendFunc_SSE2<NDSColorFormat_BGR555_Rev>(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) ); +#ifdef ENABLE_SSE2 +#pragma LOOPVECTORIZE_DISABLE +#endif + for (; i < length; i++) + { + const FragmentColor colorA = (!CAPTUREFROMNATIVESRCA) ? ((const FragmentColor *)srcA)[i] : ((const FragmentColor *)srcA)[offset + i]; + const FragmentColor colorB = (!CAPTUREFROMNATIVESRCB) ? ((const FragmentColor *)srcB)[i] : ((const FragmentColor *)srcB)[offset + i]; + + ((FragmentColor *)dst)[i] = this->_RenderLine_DispCapture_BlendFunc<OUTPUTFORMAT>(colorA, colorB, blendEVA, blendEVB); + } } + else + { + const u16 *srcA_16 = (const u16 *)srcA; + const u16 *srcB_16 = (const u16 *)srcB; + u16 *dst16 = (u16 *)dst; + +#ifdef ENABLE_SSE2 + const size_t ssePixCount = length - (length % 8); + for (; i < ssePixCount; i+=8) + { + __m128i srcA_vec128 = (!CAPTUREFROMNATIVESRCA) ? _mm_load_si128((__m128i *)(srcA_16 + i)) : _mm_set_epi16(srcA_16[offset + i + 7], + srcA_16[offset + i + 6], + srcA_16[offset + i + 5], + srcA_16[offset + i + 4], + srcA_16[offset + i + 3], + srcA_16[offset + i + 2], + srcA_16[offset + i + 1], + srcA_16[offset + i + 0]); + + __m128i srcB_vec128 = (!CAPTUREFROMNATIVESRCB) ? _mm_load_si128((__m128i *)(srcB_16 + i)) : _mm_set_epi16(srcB_16[offset + i + 7], + srcB_16[offset + i + 6], + srcB_16[offset + i + 5], + srcB_16[offset + i + 4], + srcB_16[offset + i + 3], + srcB_16[offset + i + 2], + srcB_16[offset + i + 1], + srcB_16[offset + i + 0]); + + _mm_store_si128( (__m128i *)(dst16 + i), this->_RenderLine_DispCapture_BlendFunc_SSE2<NDSColorFormat_BGR555_Rev>(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) ); + } #endif - + #ifdef ENABLE_SSE2 #pragma LOOPVECTORIZE_DISABLE #endif - for (; i < length; i++) - { - const u16 colorA = (!CAPTUREFROMNATIVESRCA) ? srcA[i] : srcA[offset + i]; - const u16 colorB = (!CAPTUREFROMNATIVESRCB) ? srcB[i] : srcB[offset + i]; - - dst[i] = this->_RenderLine_DispCapture_BlendFunc(colorA, colorB, blendEVA, blendEVB); + for (; i < length; i++) + { + const u16 colorA = (!CAPTUREFROMNATIVESRCA) ? srcA_16[i] : srcA_16[offset + i]; + const u16 colorB = (!CAPTUREFROMNATIVESRCB) ? srcB_16[i] : srcB_16[offset + i]; + + dst16[i] = this->_RenderLine_DispCapture_BlendFunc(colorA, colorB, blendEVA, blendEVB); + } } } -template<size_t CAPTURELENGTH, bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB, bool CAPTURETONATIVEDST> -void GPUEngineA::_RenderLine_DispCapture_Blend(const u16 *srcA, const u16 *srcB, u16 *dst, const size_t captureLengthExt, const size_t l) +template <NDSColorFormat OUTPUTFORMAT, size_t CAPTURELENGTH, bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB, bool CAPTURETONATIVEDST> +void GPUEngineA::_RenderLine_DispCapture_Blend(const void *srcA, const void *srcB, void *dst, const size_t captureLengthExt, const size_t l) { - const u8 blendEVA = GPU->GetEngineMain()->_dispCapCnt.EVA; - const u8 blendEVB = GPU->GetEngineMain()->_dispCapCnt.EVB; + const u8 blendEVA = this->_dispCapCnt.EVA; + const u8 blendEVB = this->_dispCapCnt.EVB; if (CAPTURETONATIVEDST) { @@ -6418,55 +6460,93 @@ const __m128i blendEVA_vec128 = _mm_set1_epi16(blendEVA); const __m128i blendEVB_vec128 = _mm_set1_epi16(blendEVB); - for (size_t i = 0; i < CAPTURELENGTH; i += 8) + if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) { - __m128i srcA_vec128 = (CAPTUREFROMNATIVESRCA) ? _mm_load_si128((__m128i *)(srcA + i)) : _mm_set_epi16(srcA[_gpuDstPitchIndex[i+7]], - srcA[_gpuDstPitchIndex[i+6]], - srcA[_gpuDstPitchIndex[i+5]], - srcA[_gpuDstPitchIndex[i+4]], - srcA[_gpuDstPitchIndex[i+3]], - srcA[_gpuDstPitchIndex[i+2]], - srcA[_gpuDstPitchIndex[i+1]], - srcA[_gpuDstPitchIndex[i+0]]); + const u32 *srcA_32 = (const u32 *)srcA; + const u32 *srcB_32 = (const u32 *)srcB; + FragmentColor *dst32 = (FragmentColor *)dst; - __m128i srcB_vec128 = (CAPTUREFROMNATIVESRCB) ? _mm_load_si128((__m128i *)(srcB + i)) : _mm_set_epi16(srcB[_gpuDstPitchIndex[i+7]], - srcB[_gpuDstPitchIndex[i+6]], - srcB[_gpuDstPitchIndex[i+5]], - srcB[_gpuDstPitchIndex[i+4]], - srcB[_gpuDstPitchIndex[i+3]], - srcB[_gpuDstPitchIndex[i+2]], - srcB[_gpuDstPitchIndex[i+1]], - srcB[_gpuDstPitchIndex[i+0]]); + for (size_t i = 0; i < CAPTURELENGTH; i+=4) + { + __m128i srcA_vec128 = (CAPTUREFROMNATIVESRCA) ? _mm_load_si128((__m128i *)(srcA_32 + i)) : _mm_set_epi32(srcA_32[_gpuDstPitchIndex[i+3]], + srcA_32[_gpuDstPitchIndex[i+2]], + srcA_32[_gpuDstPitchIndex[i+1]], + srcA_32[_gpuDstPitchIndex[i+0]]); + + __m128i srcB_vec128 = (CAPTUREFROMNATIVESRCB) ? _mm_load_si128((__m128i *)(srcB_32 + i)) : _mm_set_epi32(srcB_32[_gpuDstPitchIndex[i+3]], + srcB_32[_gpuDstPitchIndex[i+2]], + srcB_32[_gpuDstPitchIndex[i+1]], + srcB_32[_gpuDstPitchIndex[i+0]]); + + _mm_store_si128( (__m128i *)(dst32 + i), this->_RenderLine_DispCapture_BlendFunc_SSE2<OUTPUTFORMAT>(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) ); + } + } + else + { + const u16 *srcA_16 = (const u16 *)srcA; + const u16 *srcB_16 = (const u16 *)srcB; + u16 *dst16 = (u16 *)dst; - _mm_store_si128( (__m128i *)(dst + i), this->_RenderLine_DispCapture_BlendFunc_SSE2<NDSColorFormat_BGR555_Rev>(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) ); + for (size_t i = 0; i < CAPTURELENGTH; i+=8) + { + __m128i srcA_vec128 = (CAPTUREFROMNATIVESRCA) ? _mm_load_si128((__m128i *)(srcA_16 + i)) : _mm_set_epi16(srcA_16[_gpuDstPitchIndex[i+7]], + srcA_16[_gpuDstPitchIndex[i+6]], + srcA_16[_gpuDstPitchIndex[i+5]], + srcA_16[_gpuDstPitchIndex[i+4]], + srcA_16[_gpuDstPitchIndex[i+3]], + srcA_16[_gpuDstPitchIndex[i+2]], + srcA_16[_gpuDstPitchIndex[i+1]], + srcA_16[_gpuDstPitchIndex[i+0]]); + + __m128i srcB_vec128 = (CAPTUREFROMNATIVESRCB) ? _mm_load_si128((__m128i *)(srcB_16 + i)) : _mm_set_epi16(srcB_16[_gpuDstPitchIndex[i+7]], + srcB_16[_gpuDstPitchIndex[i+6]], + srcB_16[_gpuDstPitchIndex[i+5]], + srcB_16[_gpuDstPitchIndex[i+4]], + srcB_16[_gpuDstPitchIndex[i+3]], + srcB_16[_gpuDstPitchIndex[i+2]], + srcB_16[_gpuDstPitchIndex[i+1]], + srcB_16[_gpuDstPitchIndex[i+0]]); + + _mm_store_si128( (__m128i *)(dst16 + i), this->_RenderLine_DispCapture_BlendFunc_SSE2<NDSColorFormat_BGR555_Rev>(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) ); + } } #else for (size_t i = 0; i < CAPTURELENGTH; i++) { - const u16 colorA = (CAPTUREFROMNATIVESRCA) ? srcA[i] : srcA[_gpuDstPitchIndex[i]]; - const u16 colorB = (CAPTUREFROMNATIVESRCB) ? srcB[i] : srcB[_gpuDstPitchIndex[i]]; - - dst[i] = this->_RenderLine_DispCapture_BlendFunc(colorA, colorB, blendEVA, blendEVB); + if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) + { + const FragmentColor colorA = (CAPTUREFROMNATIVESRCA) ? ((const FragmentColor *)srcA)[i] : ((const FragmentColor *)srcA)[_gpuDstPitchIndex[i]]; + const FragmentColor colorB = (CAPTUREFROMNATIVESRCB) ? ((const FragmentColor *)srcB)[i] : ((const FragmentColor *)srcB)[_gpuDstPitchIndex[i]]; + + ((FragmentColor *)dst)[i] = this->_RenderLine_DispCapture_BlendFunc<OUTPUTFORMAT>(colorA, colorB, blendEVA, blendEVB); + } + else + { + const u16 colorA = (CAPTUREFROMNATIVESRCA) ? ((u16 *)srcA)[i] : ((u16 *)srcA)[_gpuDstPitchIndex[i]]; + const u16 colorB = (CAPTUREFROMNATIVESRCB) ? ((u16 *)srcB)[i] : ((u16 *)srcB)[_gpuDstPitchIndex[i]]; + + ((u16 *)dst)[i] = this->_RenderLine_DispCapture_BlendFunc(colorA, colorB, blendEVA, blendEVB); + } } #endif } else { - const NDSDisplayInfo &dispInfo = GPU->GetDisplayInfo(); + const size_t lineWidth = GPU->GetDisplayInfo().customWidth; const size_t captureLineCount = _gpuCaptureLineCount[l]; if (CAPTURELENGTH == GPU_FRAMEBUFFER_NATIVE_WIDTH) { - this->_RenderLine_DispCapture_BlendToCustomDstBuffer<CAPTUREFROMNATIVESRCA, CAPTUREFROMNATIVESRCB>(srcA, srcB, dst, blendEVA, blendEVB, captureLengthExt * captureLineCount, l); + this->_RenderLine_DispCapture_BlendToCustomDstBuffer<OUTPUTFORMAT, CAPTUREFROMNATIVESRCA, CAPTUREFROMNATIVESRCB>(srcA, srcB, dst, blendEVA, blendEVB, captureLengthExt * captureLineCount, l); } else { for (size_t line = 0; line < captureLineCount; line++) { - this->_RenderLine_DispCapture_BlendToCustomDstBuffer<CAPTUREFROMNATIVESRCA, CAPTUREFROMNATIVESRCB>(srcA, srcB, dst, blendEVA, blendEVB, captureLengthExt, l); - srcA += dispInfo.customWidth; - srcB += dispInfo.customWidth; - dst += dispInfo.customWidth; + this->_RenderLine_DispCapture_BlendToCustomDstBuffer<OUTPUTFORMAT, CAPTUREFROMNATIVESRCA, CAPTUREFROMNATIVESRCB>(srcA, srcB, dst, blendEVA, blendEVB, captureLengthExt, l); + srcA = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((FragmentColor *)srcA + lineWidth) : (void *)((u16 *)srcA + lineWidth); + srcB = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((FragmentColor *)srcB + lineWidth) : (void *)((u16 *)srcB + lineWidth); + dst = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((FragmentColor *)dst + lineWidth) : (void *)((u16 *)dst + lineWidth); } } } Modified: trunk/desmume/src/GPU.h =================================================================== --- trunk/desmume/src/GPU.h 2016-08-06 04:09:57 UTC (rev 5524) +++ trunk/desmume/src/GPU.h 2016-08-07 00:26:31 UTC (rev 5525) @@ -1567,11 +1567,11 @@ template<NDSColorFormat COLORFORMAT> __m128i _RenderLine_DispCapture_BlendFunc_SSE2(const __m128i &srcA, const __m128i &srcB, const __m128i &blendEVA, const __m128i &blendEVB); #endif - template<bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB> - void _RenderLine_DispCapture_BlendToCustomDstBuffer(const u16 *srcA, const u16 *srcB, u16 *dst, const u8 blendEVA, const u8 blendEVB, const size_t length, size_t l); // Do not use restrict pointers, since srcB and dst can be the same + template<NDSColorFormat OUTPUTFORMAT, bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB> + void _RenderLine_DispCapture_BlendToCustomDstBuffer(const void *srcA, const void *srcB, void *dst, const u8 blendEVA, const u8 blendEVB, const size_t length, size_t l); // Do not use restrict pointers, since srcB and dst can be the same - template<size_t CAPTURELENGTH, bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB, bool CAPTURETONATIVEDST> - void _RenderLine_DispCapture_Blend(const u16 *srcA, const u16 *srcB, u16 *dst, const size_t captureLengthExt, const size_t l); // Do not use restrict pointers, since srcB and dst can be the same + template<NDSColorFormat OUTPUTFORMAT, size_t CAPTURELENGTH, bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB, bool CAPTURETONATIVEDST> + void _RenderLine_DispCapture_Blend(const void *srcA, const void *srcB, void *dst, const size_t captureLengthExt, const size_t l); // Do not use restrict pointers, since srcB and dst can be the same template<NDSColorFormat OUTPUTFORMAT> void _HandleDisplayModeVRAM(const size_t l); template<NDSColorFormat OUTPUTFORMAT> void _HandleDisplayModeMainMemory(const size_t l); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-08-06 04:09:59
|
Revision: 5524 http://sourceforge.net/p/desmume/code/5524 Author: rogerman Date: 2016-08-06 04:09:57 +0000 (Sat, 06 Aug 2016) Log Message: ----------- GPU: - Fix compiling on systems that support SSE2 and not SSSE3. (Regression from r5524.) Revision Links: -------------- http://sourceforge.net/p/desmume/code/5524 Modified Paths: -------------- trunk/desmume/src/GPU.cpp Modified: trunk/desmume/src/GPU.cpp =================================================================== --- trunk/desmume/src/GPU.cpp 2016-08-06 03:36:37 UTC (rev 5523) +++ trunk/desmume/src/GPU.cpp 2016-08-06 04:09:57 UTC (rev 5524) @@ -2242,12 +2242,12 @@ dstEffectEnableMask = _mm_shuffle_epi8(compInfo.renderState.dstBlendEnable_SSSE3, dstLayerID); dstEffectEnableMask = _mm_xor_si128( _mm_cmpeq_epi8(dstEffectEnableMask, _mm_setzero_si128()), _mm_set1_epi32(0xFFFFFFFF) ); #else - dstEffectEnableMask = _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG0)), this->_dstBlendEnable_SSE2[GPULayerID_BG0]); - dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG1)), this->_dstBlendEnable_SSE2[GPULayerID_BG1]) ); - dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG2)), this->_dstBlendEnable_SSE2[GPULayerID_BG2]) ); - dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG3)), this->_dstBlendEnable_SSE2[GPULayerID_BG3]) ); - dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_OBJ)), this->_dstBlendEnable_SSE2[GPULayerID_OBJ]) ); - dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_Backdrop)), this->_dstBlendEnable_SSE2[GPULayerID_Backdrop]) ); + dstEffectEnableMask = _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG0)), compInfo.renderState.dstBlendEnable_SSE2[GPULayerID_BG0]); + dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG1)), compInfo.renderState.dstBlendEnable_SSE2[GPULayerID_BG1]) ); + dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG2)), compInfo.renderState.dstBlendEnable_SSE2[GPULayerID_BG2]) ); + dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG3)), compInfo.renderState.dstBlendEnable_SSE2[GPULayerID_BG3]) ); + dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_OBJ)), compInfo.renderState.dstBlendEnable_SSE2[GPULayerID_OBJ]) ); + dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_Backdrop)), compInfo.renderState.dstBlendEnable_SSE2[GPULayerID_Backdrop]) ); #endif dstEffectEnableMask = _mm_andnot_si128( _mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG0)), dstEffectEnableMask ); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-08-06 03:36:40
|
Revision: 5523 http://sourceforge.net/p/desmume/code/5523 Author: rogerman Date: 2016-08-06 03:36:37 +0000 (Sat, 06 Aug 2016) Log Message: ----------- GPU: - Do some code cleanup. Modified Paths: -------------- trunk/desmume/src/GPU.cpp trunk/desmume/src/GPU.h Modified: trunk/desmume/src/GPU.cpp =================================================================== --- trunk/desmume/src/GPU.cpp 2016-08-04 01:08:42 UTC (rev 5522) +++ trunk/desmume/src/GPU.cpp 2016-08-06 03:36:37 UTC (rev 5523) @@ -474,8 +474,6 @@ memset(this->_renderLineLayerIDCustom, 0, dispInfo.customWidth * _gpuLargestDstLineCount * 4 * sizeof(u8)); } - this->_displayOutputMode = GPUDisplayMode_Off; - this->_enableLayer[GPULayerID_BG0] = false; this->_enableLayer[GPULayerID_BG1] = false; this->_enableLayer[GPULayerID_BG2] = false; @@ -530,13 +528,6 @@ this->_BGLayer[GPULayerID_BG2].extPalette = (u16 **)&MMU.ExtPal[this->_engineID][GPULayerID_BG2]; this->_BGLayer[GPULayerID_BG3].extPalette = (u16 **)&MMU.ExtPal[this->_engineID][GPULayerID_BG3]; - this->_mosaicWidthBG = &GPUEngineBase::_mosaicLookup.table[0][0]; - this->_mosaicHeightBG = &GPUEngineBase::_mosaicLookup.table[0][0]; - this->_mosaicWidthOBJ = &GPUEngineBase::_mosaicLookup.table[0][0]; - this->_mosaicHeightOBJ = &GPUEngineBase::_mosaicLookup.table[0][0]; - this->_isBGMosaicSet = false; - this->_isOBJMosaicSet = false; - this->_needUpdateWINH[0] = true; this->_needUpdateWINH[1] = true; @@ -551,120 +542,133 @@ this->isLineOutputNative[l] = true; } - this->_sprBoundary = 0; - this->_sprBMPBoundary = 0; + GPUEngineRenderState &renderState = this->_currentRenderState; - this->_WIN0_ENABLED = false; - this->_WIN1_ENABLED = false; - this->_WINOBJ_ENABLED = false; - this->_isAnyWindowEnabled = false; + renderState.displayOutputMode = GPUDisplayMode_Off; + renderState.selectedLayerID = GPULayerID_BG0; + renderState.selectedBGLayer = &this->_BGLayer[GPULayerID_BG0]; + renderState.backdropColor16 = LE_TO_LOCAL_16(this->_paletteBG[0]) & 0x7FFF; + renderState.colorEffect = (ColorEffect)this->_IORegisterMap->BLDCNT.ColorEffect; + renderState.blendEVA = 0; + renderState.blendEVB = 0; + renderState.blendEVY = 0; + renderState.blendTable555 = (TBlendTable *)&GPUEngineBase::_blendTable555[renderState.blendEVA][renderState.blendEVB][0][0]; + renderState.brightnessUpTable555 = &GPUEngineBase::_brightnessUpTable555[renderState.blendEVY][0]; + renderState.brightnessUpTable666 = &GPUEngineBase::_brightnessUpTable666[renderState.blendEVY][0]; + renderState.brightnessUpTable888 = &GPUEngineBase::_brightnessUpTable888[renderState.blendEVY][0]; + renderState.brightnessDownTable555 = &GPUEngineBase::_brightnessDownTable555[renderState.blendEVY][0]; + renderState.brightnessDownTable666 = &GPUEngineBase::_brightnessDownTable666[renderState.blendEVY][0]; + renderState.brightnessDownTable888 = &GPUEngineBase::_brightnessDownTable888[renderState.blendEVY][0]; - this->_BLDALPHA_EVA = 0; - this->_BLDALPHA_EVB = 0; - this->_BLDALPHA_EVY = 0; - this->_selectedBlendTable555 = (TBlendTable *)&GPUEngineBase::_blendTable555[this->_BLDALPHA_EVA][this->_BLDALPHA_EVB][0][0]; - this->_selectedBrightnessUpTable555 = &GPUEngineBase::_brightnessUpTable555[this->_BLDALPHA_EVY][0]; - this->_selectedBrightnessUpTable666 = &GPUEngineBase::_brightnessUpTable666[this->_BLDALPHA_EVY][0]; - this->_selectedBrightnessUpTable888 = &GPUEngineBase::_brightnessUpTable888[this->_BLDALPHA_EVY][0]; - this->_selectedBrightnessDownTable555 = &GPUEngineBase::_brightnessDownTable555[this->_BLDALPHA_EVY][0]; - this->_selectedBrightnessDownTable666 = &GPUEngineBase::_brightnessDownTable666[this->_BLDALPHA_EVY][0]; - this->_selectedBrightnessDownTable888 = &GPUEngineBase::_brightnessDownTable888[this->_BLDALPHA_EVY][0]; + renderState.srcBlendEnable[GPULayerID_BG0] = false; + renderState.srcBlendEnable[GPULayerID_BG1] = false; + renderState.srcBlendEnable[GPULayerID_BG2] = false; + renderState.srcBlendEnable[GPULayerID_BG3] = false; + renderState.srcBlendEnable[GPULayerID_OBJ] = false; + renderState.srcBlendEnable[GPULayerID_Backdrop] = false; - this->_srcBlendEnable[GPULayerID_BG0] = false; - this->_srcBlendEnable[GPULayerID_BG1] = false; - this->_srcBlendEnable[GPULayerID_BG2] = false; - this->_srcBlendEnable[GPULayerID_BG3] = false; - this->_srcBlendEnable[GPULayerID_OBJ] = false; - this->_srcBlendEnable[GPULayerID_Backdrop] = false; + renderState.dstBlendEnable[GPULayerID_BG0] = false; + renderState.dstBlendEnable[GPULayerID_BG1] = false; + renderState.dstBlendEnable[GPULayerID_BG2] = false; + renderState.dstBlendEnable[GPULayerID_BG3] = false; + renderState.dstBlendEnable[GPULayerID_OBJ] = false; + renderState.dstBlendEnable[GPULayerID_Backdrop] = false; - this->_dstBlendEnable[GPULayerID_BG0] = false; - this->_dstBlendEnable[GPULayerID_BG1] = false; - this->_dstBlendEnable[GPULayerID_BG2] = false; - this->_dstBlendEnable[GPULayerID_BG3] = false; - this->_dstBlendEnable[GPULayerID_OBJ] = false; - this->_dstBlendEnable[GPULayerID_Backdrop] = false; - #ifdef ENABLE_SSE2 - this->_srcBlendEnable_SSE2[GPULayerID_BG0] = _mm_setzero_si128(); - this->_srcBlendEnable_SSE2[GPULayerID_BG1] = _mm_setzero_si128(); - this->_srcBlendEnable_SSE2[GPULayerID_BG2] = _mm_setzero_si128(); - this->_srcBlendEnable_SSE2[GPULayerID_BG3] = _mm_setzero_si128(); - this->_srcBlendEnable_SSE2[GPULayerID_OBJ] = _mm_setzero_si128(); - this->_srcBlendEnable_SSE2[GPULayerID_Backdrop] = _mm_setzero_si128(); + renderState.srcBlendEnable_SSE2[GPULayerID_BG0] = _mm_setzero_si128(); + renderState.srcBlendEnable_SSE2[GPULayerID_BG1] = _mm_setzero_si128(); + renderState.srcBlendEnable_SSE2[GPULayerID_BG2] = _mm_setzero_si128(); + renderState.srcBlendEnable_SSE2[GPULayerID_BG3] = _mm_setzero_si128(); + renderState.srcBlendEnable_SSE2[GPULayerID_OBJ] = _mm_setzero_si128(); + renderState.srcBlendEnable_SSE2[GPULayerID_Backdrop] = _mm_setzero_si128(); #ifdef ENABLE_SSSE3 - this->_dstBlendEnable_SSSE3 = _mm_setzero_si128(); + renderState.dstBlendEnable_SSSE3 = _mm_setzero_si128(); #else - this->_dstBlendEnable_SSE2[GPULayerID_BG0] = _mm_setzero_si128(); - this->_dstBlendEnable_SSE2[GPULayerID_BG1] = _mm_setzero_si128(); - this->_dstBlendEnable_SSE2[GPULayerID_BG2] = _mm_setzero_si128(); - this->_dstBlendEnable_SSE2[GPULayerID_BG3] = _mm_setzero_si128(); - this->_dstBlendEnable_SSE2[GPULayerID_OBJ] = _mm_setzero_si128(); - this->_dstBlendEnable_SSE2[GPULayerID_Backdrop] = _mm_setzero_si128(); + renderState.dstBlendEnable_SSE2[GPULayerID_BG0] = _mm_setzero_si128(); + renderState.dstBlendEnable_SSE2[GPULayerID_BG1] = _mm_setzero_si128(); + renderState.dstBlendEnable_SSE2[GPULayerID_BG2] = _mm_setzero_si128(); + renderState.dstBlendEnable_SSE2[GPULayerID_BG3] = _mm_setzero_si128(); + renderState.dstBlendEnable_SSE2[GPULayerID_OBJ] = _mm_setzero_si128(); + renderState.dstBlendEnable_SSE2[GPULayerID_Backdrop] = _mm_setzero_si128(); #endif #endif - this->_WIN0_enable[GPULayerID_BG0] = 0; - this->_WIN0_enable[GPULayerID_BG1] = 0; - this->_WIN0_enable[GPULayerID_BG2] = 0; - this->_WIN0_enable[GPULayerID_BG3] = 0; - this->_WIN0_enable[GPULayerID_OBJ] = 0; - this->_WIN0_enable[WINDOWCONTROL_EFFECTFLAG] = 0; + renderState.WIN0_enable[GPULayerID_BG0] = 0; + renderState.WIN0_enable[GPULayerID_BG1] = 0; + renderState.WIN0_enable[GPULayerID_BG2] = 0; + renderState.WIN0_enable[GPULayerID_BG3] = 0; + renderState.WIN0_enable[GPULayerID_OBJ] = 0; + renderState.WIN0_enable[WINDOWCONTROL_EFFECTFLAG] = 0; - this->_WIN1_enable[GPULayerID_BG0] = 0; - this->_WIN1_enable[GPULayerID_BG1] = 0; - this->_WIN1_enable[GPULayerID_BG2] = 0; - this->_WIN1_enable[GPULayerID_BG3] = 0; - this->_WIN1_enable[GPULayerID_OBJ] = 0; - this->_WIN1_enable[WINDOWCONTROL_EFFECTFLAG] = 0; + renderState.WIN1_enable[GPULayerID_BG0] = 0; + renderState.WIN1_enable[GPULayerID_BG1] = 0; + renderState.WIN1_enable[GPULayerID_BG2] = 0; + renderState.WIN1_enable[GPULayerID_BG3] = 0; + renderState.WIN1_enable[GPULayerID_OBJ] = 0; + renderState.WIN1_enable[WINDOWCONTROL_EFFECTFLAG] = 0; - this->_WINOUT_enable[GPULayerID_BG0] = 0; - this->_WINOUT_enable[GPULayerID_BG1] = 0; - this->_WINOUT_enable[GPULayerID_BG2] = 0; - this->_WINOUT_enable[GPULayerID_BG3] = 0; - this->_WINOUT_enable[GPULayerID_OBJ] = 0; - this->_WINOUT_enable[WINDOWCONTROL_EFFECTFLAG] = 0; + renderState.WINOUT_enable[GPULayerID_BG0] = 0; + renderState.WINOUT_enable[GPULayerID_BG1] = 0; + renderState.WINOUT_enable[GPULayerID_BG2] = 0; + renderState.WINOUT_enable[GPULayerID_BG3] = 0; + renderState.WINOUT_enable[GPULayerID_OBJ] = 0; + renderState.WINOUT_enable[WINDOWCONTROL_EFFECTFLAG] = 0; - this->_WINOBJ_enable[GPULayerID_BG0] = 0; - this->_WINOBJ_enable[GPULayerID_BG1] = 0; - this->_WINOBJ_enable[GPULayerID_BG2] = 0; - this->_WINOBJ_enable[GPULayerID_BG3] = 0; - this->_WINOBJ_enable[GPULayerID_OBJ] = 0; - this->_WINOBJ_enable[WINDOWCONTROL_EFFECTFLAG] = 0; + renderState.WINOBJ_enable[GPULayerID_BG0] = 0; + renderState.WINOBJ_enable[GPULayerID_BG1] = 0; + renderState.WINOBJ_enable[GPULayerID_BG2] = 0; + renderState.WINOBJ_enable[GPULayerID_BG3] = 0; + renderState.WINOBJ_enable[GPULayerID_OBJ] = 0; + renderState.WINOBJ_enable[WINDOWCONTROL_EFFECTFLAG] = 0; #if defined(ENABLE_SSE2) - this->_WIN0_enable_SSE2[GPULayerID_BG0] = _mm_setzero_si128(); - this->_WIN0_enable_SSE2[GPULayerID_BG1] = _mm_setzero_si128(); - this->_WIN0_enable_SSE2[GPULayerID_BG2] = _mm_setzero_si128(); - this->_WIN0_enable_SSE2[GPULayerID_BG3] = _mm_setzero_si128(); - this->_WIN0_enable_SSE2[GPULayerID_OBJ] = _mm_setzero_si128(); - this->_WIN0_enable_SSE2[WINDOWCONTROL_EFFECTFLAG] = _mm_setzero_si128(); + renderState.WIN0_enable_SSE2[GPULayerID_BG0] = _mm_setzero_si128(); + renderState.WIN0_enable_SSE2[GPULayerID_BG1] = _mm_setzero_si128(); + renderState.WIN0_enable_SSE2[GPULayerID_BG2] = _mm_setzero_si128(); + renderState.WIN0_enable_SSE2[GPULayerID_BG3] = _mm_setzero_si128(); + renderState.WIN0_enable_SSE2[GPULayerID_OBJ] = _mm_setzero_si128(); + renderState.WIN0_enable_SSE2[WINDOWCONTROL_EFFECTFLAG] = _mm_setzero_si128(); - this->_WIN1_enable_SSE2[GPULayerID_BG0] = _mm_setzero_si128(); - this->_WIN1_enable_SSE2[GPULayerID_BG1] = _mm_setzero_si128(); - this->_WIN1_enable_SSE2[GPULayerID_BG2] = _mm_setzero_si128(); - this->_WIN1_enable_SSE2[GPULayerID_BG3] = _mm_setzero_si128(); - this->_WIN1_enable_SSE2[GPULayerID_OBJ] = _mm_setzero_si128(); - this->_WIN1_enable_SSE2[WINDOWCONTROL_EFFECTFLAG] = _mm_setzero_si128(); + renderState.WIN1_enable_SSE2[GPULayerID_BG0] = _mm_setzero_si128(); + renderState.WIN1_enable_SSE2[GPULayerID_BG1] = _mm_setzero_si128(); + renderState.WIN1_enable_SSE2[GPULayerID_BG2] = _mm_setzero_si128(); + renderState.WIN1_enable_SSE2[GPULayerID_BG3] = _mm_setzero_si128(); + renderState.WIN1_enable_SSE2[GPULayerID_OBJ] = _mm_setzero_si128(); + renderState.WIN1_enable_SSE2[WINDOWCONTROL_EFFECTFLAG] = _mm_setzero_si128(); - this->_WINOUT_enable_SSE2[GPULayerID_BG0] = _mm_setzero_si128(); - this->_WINOUT_enable_SSE2[GPULayerID_BG1] = _mm_setzero_si128(); - this->_WINOUT_enable_SSE2[GPULayerID_BG2] = _mm_setzero_si128(); - this->_WINOUT_enable_SSE2[GPULayerID_BG3] = _mm_setzero_si128(); - this->_WINOUT_enable_SSE2[GPULayerID_OBJ] = _mm_setzero_si128(); - this->_WINOUT_enable_SSE2[WINDOWCONTROL_EFFECTFLAG] = _mm_setzero_si128(); + renderState.WINOUT_enable_SSE2[GPULayerID_BG0] = _mm_setzero_si128(); + renderState.WINOUT_enable_SSE2[GPULayerID_BG1] = _mm_setzero_si128(); + renderState.WINOUT_enable_SSE2[GPULayerID_BG2] = _mm_setzero_si128(); + renderState.WINOUT_enable_SSE2[GPULayerID_BG3] = _mm_setzero_si128(); + renderState.WINOUT_enable_SSE2[GPULayerID_OBJ] = _mm_setzero_si128(); + renderState.WINOUT_enable_SSE2[WINDOWCONTROL_EFFECTFLAG] = _mm_setzero_si128(); - this->_WINOBJ_enable_SSE2[GPULayerID_BG0] = _mm_setzero_si128(); - this->_WINOBJ_enable_SSE2[GPULayerID_BG1] = _mm_setzero_si128(); - this->_WINOBJ_enable_SSE2[GPULayerID_BG2] = _mm_setzero_si128(); - this->_WINOBJ_enable_SSE2[GPULayerID_BG3] = _mm_setzero_si128(); - this->_WINOBJ_enable_SSE2[GPULayerID_OBJ] = _mm_setzero_si128(); - this->_WINOBJ_enable_SSE2[WINDOWCONTROL_EFFECTFLAG] = _mm_setzero_si128(); + renderState.WINOBJ_enable_SSE2[GPULayerID_BG0] = _mm_setzero_si128(); + renderState.WINOBJ_enable_SSE2[GPULayerID_BG1] = _mm_setzero_si128(); + renderState.WINOBJ_enable_SSE2[GPULayerID_BG2] = _mm_setzero_si128(); + renderState.WINOBJ_enable_SSE2[GPULayerID_BG3] = _mm_setzero_si128(); + renderState.WINOBJ_enable_SSE2[GPULayerID_OBJ] = _mm_setzero_si128(); + renderState.WINOBJ_enable_SSE2[WINDOWCONTROL_EFFECTFLAG] = _mm_setzero_si128(); #endif + renderState.WIN0_ENABLED = false; + renderState.WIN1_ENABLED = false; + renderState.WINOBJ_ENABLED = false; + renderState.isAnyWindowEnabled = false; + + renderState.mosaicWidthBG = this->_mosaicLookup.table[0]; + renderState.mosaicHeightBG = this->_mosaicLookup.table[0]; + renderState.mosaicWidthOBJ = this->_mosaicLookup.table[0]; + renderState.mosaicHeightOBJ = this->_mosaicLookup.table[0]; + renderState.isBGMosaicSet = false; + renderState.isOBJMosaicSet = false; + + renderState.spriteRenderMode = SpriteRenderMode_Sprite1D; + renderState.spriteBoundary = 0; + renderState.spriteBMPBoundary = 0; + this->_isMasterBrightFullIntensity = false; - this->_spriteRenderMode = SpriteRenderMode_Sprite1D; - this->savedBG2X.value = 0; this->savedBG2Y.value = 0; this->savedBG3X.value = 0; @@ -674,43 +678,10 @@ this->renderedHeight = GPU_FRAMEBUFFER_NATIVE_HEIGHT; this->renderedBuffer = this->nativeBuffer; - GPUEngineCompositorInfo &compState = this->_currentCompositorState; - compState.lineIndexNative = 0; - compState.lineIndexCustom = 0; - compState.lineWidthCustom = GPU_FRAMEBUFFER_NATIVE_WIDTH; - compState.lineRenderCount = 1; - compState.linePixelCount = compState.lineWidthCustom * compState.lineRenderCount; - compState.blockOffsetNative = compState.lineIndexNative * GPU_FRAMEBUFFER_NATIVE_WIDTH; - compState.blockOffsetCustom = compState.lineIndexCustom * compState.lineWidthCustom; - - compState.selectedLayerID = GPULayerID_BG0; - compState.selectedBGLayer = &this->_BGLayer[GPULayerID_BG0]; - compState.backdropColor16 = LE_TO_LOCAL_16(this->_paletteBG[0]) & 0x7FFF; - compState.colorEffect = (ColorEffect)this->_IORegisterMap->BLDCNT.ColorEffect; - compState.blendEVA = this->_BLDALPHA_EVA; - compState.blendEVB = this->_BLDALPHA_EVB; - compState.blendEVY = this->_BLDALPHA_EVY; - compState.blendTable555 = this->_selectedBlendTable555; - compState.brightnessUpTable555 = this->_selectedBrightnessUpTable555; - compState.brightnessUpTable666 = this->_selectedBrightnessUpTable666; - compState.brightnessUpTable888 = this->_selectedBrightnessUpTable888; - compState.brightnessDownTable555 = this->_selectedBrightnessDownTable555; - compState.brightnessDownTable666 = this->_selectedBrightnessDownTable666; - compState.brightnessDownTable888 = this->_selectedBrightnessDownTable888; - - compState.lineColorHeadNative = this->_internalRenderLineTargetNative; - compState.lineColorHeadCustom = this->_internalRenderLineTargetCustom; - compState.lineColorHead = compState.lineColorHeadNative; - compState.lineLayerIDHeadNative = this->_renderLineLayerIDNative; - compState.lineLayerIDHeadCustom = this->_renderLineLayerIDCustom; - compState.lineLayerIDHead = compState.lineLayerIDHeadNative; - - compState.xNative = 0; - compState.xCustom = 0; - compState.lineColorTarget = (void **)&compState.lineColorTarget16; - compState.lineColorTarget16 = (u16 *)compState.lineColorHeadNative; - compState.lineColorTarget32 = (FragmentColor *)compState.lineColorHeadNative; - compState.lineLayerIDTarget = compState.lineLayerIDHead; + for (size_t line = 0; line < GPU_FRAMEBUFFER_NATIVE_HEIGHT; line++) + { + this->_currentCompositorInfo[line].renderState = renderState; + } } void GPUEngineBase::Reset() @@ -1222,37 +1193,39 @@ void GPUEngineBase::ParseReg_DISPCNT() { const IOREG_DISPCNT &DISPCNT = this->_IORegisterMap->DISPCNT; - this->_displayOutputMode = (this->_engineID == GPUEngineID_Main) ? (GPUDisplayMode)DISPCNT.DisplayMode : (GPUDisplayMode)(DISPCNT.DisplayMode & 0x01); + GPUEngineRenderState &renderState = this->_currentRenderState; - this->_WIN0_ENABLED = (DISPCNT.Win0_Enable != 0); - this->_WIN1_ENABLED = (DISPCNT.Win1_Enable != 0); - this->_WINOBJ_ENABLED = (DISPCNT.WinOBJ_Enable != 0); - this->_isAnyWindowEnabled = (this->_WIN0_ENABLED || this->_WIN1_ENABLED || this->_WINOBJ_ENABLED); + renderState.displayOutputMode = (this->_engineID == GPUEngineID_Main) ? (GPUDisplayMode)DISPCNT.DisplayMode : (GPUDisplayMode)(DISPCNT.DisplayMode & 0x01); + renderState.WIN0_ENABLED = (DISPCNT.Win0_Enable != 0); + renderState.WIN1_ENABLED = (DISPCNT.Win1_Enable != 0); + renderState.WINOBJ_ENABLED = (DISPCNT.WinOBJ_Enable != 0); + renderState.isAnyWindowEnabled = (renderState.WIN0_ENABLED || renderState.WIN1_ENABLED || renderState.WINOBJ_ENABLED); + if (DISPCNT.OBJ_Tile_mapping) { //1-d sprite mapping boundaries: //32k, 64k, 128k, 256k - this->_sprBoundary = 5 + DISPCNT.OBJ_Tile_1D_Bound; + renderState.spriteBoundary = 5 + DISPCNT.OBJ_Tile_1D_Bound; //do not be deceived: even though a sprBoundary==8 (256KB region) is impossible to fully address //in GPU_SUB, it is still fully legal to address it with that granularity. //so don't do this: //if((gpu->core == GPU_SUB) && (cnt->OBJ_Tile_1D_Bound == 3)) gpu->sprBoundary = 7; - this->_spriteRenderMode = SpriteRenderMode_Sprite1D; + renderState.spriteRenderMode = SpriteRenderMode_Sprite1D; } else { //2d sprite mapping //boundary : 32k - this->_sprBoundary = 5; - this->_spriteRenderMode = SpriteRenderMode_Sprite2D; + renderState.spriteBoundary = 5; + renderState.spriteRenderMode = SpriteRenderMode_Sprite2D; } if (DISPCNT.OBJ_BMP_1D_Bound && (this->_engineID == GPUEngineID_Main)) - this->_sprBMPBoundary = 8; + renderState.spriteBMPBoundary = 8; else - this->_sprBMPBoundary = 7; + renderState.spriteBMPBoundary = 7; this->ParseReg_BGnCNT(GPULayerID_BG3); this->ParseReg_BGnCNT(GPULayerID_BG2); @@ -1388,35 +1361,35 @@ } template <NDSColorFormat OUTPUTFORMAT> -void GPUEngineBase::_RenderLine_Clear(GPUEngineCompositorInfo &compState) +void GPUEngineBase::_RenderLine_Clear(GPUEngineCompositorInfo &compInfo) { // Clear the current line with the clear color - u16 dstClearColor16 = compState.backdropColor16; + u16 dstClearColor16 = compInfo.renderState.backdropColor16; - if (this->_srcBlendEnable[GPULayerID_Backdrop]) + if (compInfo.renderState.srcBlendEnable[GPULayerID_Backdrop]) { - if (compState.colorEffect == ColorEffect_IncreaseBrightness) + if (compInfo.renderState.colorEffect == ColorEffect_IncreaseBrightness) { - dstClearColor16 = compState.brightnessUpTable555[compState.backdropColor16]; + dstClearColor16 = compInfo.renderState.brightnessUpTable555[compInfo.renderState.backdropColor16]; } - else if (compState.colorEffect == ColorEffect_DecreaseBrightness) + else if (compInfo.renderState.colorEffect == ColorEffect_DecreaseBrightness) { - dstClearColor16 = compState.brightnessDownTable555[compState.backdropColor16]; + dstClearColor16 = compInfo.renderState.brightnessDownTable555[compInfo.renderState.backdropColor16]; } } switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: - memset_u16_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>(compState.lineColorTarget16, dstClearColor16); + memset_u16_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>(*compInfo.target.lineColor, dstClearColor16); break; case NDSColorFormat_BGR666_Rev: - memset_u32_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>(compState.lineColorTarget32, COLOR555TO666(dstClearColor16)); + memset_u32_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>(*compInfo.target.lineColor, COLOR555TO666(dstClearColor16)); break; case NDSColorFormat_BGR888_Rev: - memset_u32_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>(compState.lineColorTarget32, COLOR555TO888(dstClearColor16)); + memset_u32_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>(*compInfo.target.lineColor, COLOR555TO888(dstClearColor16)); break; } @@ -1434,9 +1407,6 @@ template <NDSColorFormat OUTPUTFORMAT> void GPUEngineBase::RenderLine(const u16 l) { - this->_currentCompositorState.lineIndexNative = l; - this->_currentCompositorState.lineIndexCustom = _gpuDstLineIndex[l]; - // By default, do nothing. this->UpdatePropertiesWithoutRender(l); } @@ -1668,11 +1638,11 @@ // PIXEL RENDERING /*****************************************************************************/ template <NDSColorFormat OUTPUTFORMAT, bool ISSRCLAYEROBJ, bool ISDEBUGRENDER, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT> -FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compState, const u16 srcColor16, const u8 srcAlpha) +FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compInfo, const u16 srcColor16, const u8 srcAlpha) { - u16 &dstColor16 = *compState.lineColorTarget16; - FragmentColor &dstColor32 = *compState.lineColorTarget32; - u8 &dstLayerID = *compState.lineLayerIDTarget; + u16 &dstColor16 = *compInfo.target.lineColor16; + FragmentColor &dstColor32 = *compInfo.target.lineColor32; + u8 &dstLayerID = *compInfo.target.lineLayerID; if (ISDEBUGRENDER) { @@ -1696,12 +1666,12 @@ return; } - if ( WILLPERFORMWINDOWTEST && (this->_didPassWindowTestNative[compState.selectedLayerID][compState.xNative] == 0) ) + if ( WILLPERFORMWINDOWTEST && (this->_didPassWindowTestNative[compInfo.renderState.selectedLayerID][compInfo.target.xNative] == 0) ) { return; } - const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectNative[compState.selectedLayerID][compState.xNative] != 0) : true; + const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectNative[compInfo.renderState.selectedLayerID][compInfo.target.xNative] != 0) : true; if (!ISSRCLAYEROBJ && COLOREFFECTDISABLEDHINT) { @@ -1720,18 +1690,18 @@ break; } - dstLayerID = compState.selectedLayerID; + dstLayerID = compInfo.renderState.selectedLayerID; return; } ColorEffect selectedEffect = ColorEffect_Disable; - TBlendTable *selectedBlendTable = compState.blendTable555; - u8 blendEVA = compState.blendEVA; - u8 blendEVB = compState.blendEVB; + TBlendTable *selectedBlendTable = compInfo.renderState.blendTable555; + u8 blendEVA = compInfo.renderState.blendEVA; + u8 blendEVB = compInfo.renderState.blendEVB; if (enableColorEffect) { - const bool dstEffectEnable = (dstLayerID != compState.selectedLayerID) && this->_dstBlendEnable[dstLayerID]; + const bool dstEffectEnable = (dstLayerID != compInfo.renderState.selectedLayerID) && compInfo.renderState.dstBlendEnable[dstLayerID]; // Select the color effect based on the BLDCNT target flags. bool forceBlendEffect = false; @@ -1739,7 +1709,7 @@ if (ISSRCLAYEROBJ) { //translucent-capable OBJ are forcing the function to blend when the second target is satisfied - const OBJMode objMode = (OBJMode)this->_sprType[compState.xNative]; + const OBJMode objMode = (OBJMode)this->_sprType[compInfo.target.xNative]; const bool isObjTranslucentType = (objMode == OBJMode_Transparent) || (objMode == OBJMode_Bitmap); if (isObjTranslucentType && dstEffectEnable) { @@ -1760,14 +1730,14 @@ { selectedEffect = ColorEffect_Blend; } - else if (this->_srcBlendEnable[compState.selectedLayerID]) + else if (compInfo.renderState.srcBlendEnable[compInfo.renderState.selectedLayerID]) { - switch (compState.colorEffect) + switch (compInfo.renderState.colorEffect) { // For the Blend effect, both first and second target flags must be checked. case ColorEffect_Blend: { - if (dstEffectEnable) selectedEffect = compState.colorEffect; + if (dstEffectEnable) selectedEffect = compInfo.renderState.colorEffect; break; } @@ -1775,7 +1745,7 @@ // Test case: Bomberman Land Touch! dialog boxes will render too dark without this check. case ColorEffect_IncreaseBrightness: case ColorEffect_DecreaseBrightness: - selectedEffect = compState.colorEffect; + selectedEffect = compInfo.renderState.colorEffect; break; default: @@ -1812,17 +1782,17 @@ switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: - dstColor16 = compState.brightnessUpTable555[srcColor16 & 0x7FFF]; + dstColor16 = compInfo.renderState.brightnessUpTable555[srcColor16 & 0x7FFF]; dstColor16 |= 0x8000; break; case NDSColorFormat_BGR666_Rev: - dstColor32 = compState.brightnessUpTable666[srcColor16 & 0x7FFF]; + dstColor32 = compInfo.renderState.brightnessUpTable666[srcColor16 & 0x7FFF]; dstColor32.a = 0x1F; break; case NDSColorFormat_BGR888_Rev: - dstColor32 = compState.brightnessUpTable888[srcColor16 & 0x7FFF]; + dstColor32 = compInfo.renderState.brightnessUpTable888[srcColor16 & 0x7FFF]; dstColor32.a = 0xFF; break; } @@ -1834,17 +1804,17 @@ switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: - dstColor16 = compState.brightnessDownTable555[srcColor16 & 0x7FFF]; + dstColor16 = compInfo.renderState.brightnessDownTable555[srcColor16 & 0x7FFF]; dstColor16 |= 0x8000; break; case NDSColorFormat_BGR666_Rev: - dstColor32 = compState.brightnessDownTable666[srcColor16 & 0x7FFF]; + dstColor32 = compInfo.renderState.brightnessDownTable666[srcColor16 & 0x7FFF]; dstColor32.a = 0x1F; break; case NDSColorFormat_BGR888_Rev: - dstColor32 = compState.brightnessDownTable888[srcColor16 & 0x7FFF]; + dstColor32 = compInfo.renderState.brightnessDownTable888[srcColor16 & 0x7FFF]; dstColor32.a = 0xFF; break; } @@ -1878,13 +1848,13 @@ } } - dstLayerID = compState.selectedLayerID; + dstLayerID = compInfo.renderState.selectedLayerID; } #ifdef ENABLE_SSE2 template <NDSColorFormat OUTPUTFORMAT, bool ISSRCLAYEROBJ, bool ISDEBUGRENDER, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> -FORCEINLINE void GPUEngineBase::_RenderPixel16_SSE2(GPUEngineCompositorInfo &compState, +FORCEINLINE void GPUEngineBase::_RenderPixel16_SSE2(GPUEngineCompositorInfo &compInfo, const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0, const __m128i &srcAlpha, const __m128i &srcEffectEnableMask, @@ -1892,7 +1862,7 @@ __m128i &dstLayerID, __m128i &passMask8) { - const __m128i srcLayerID_vec128 = _mm_set1_epi8(compState.selectedLayerID); + const __m128i srcLayerID_vec128 = _mm_set1_epi8(compInfo.renderState.selectedLayerID); __m128i passMask16[2] = { _mm_unpacklo_epi8(passMask8, passMask8), _mm_unpackhi_epi8(passMask8, passMask8) }; @@ -1928,12 +1898,12 @@ if (WILLPERFORMWINDOWTEST) { // Do the window test. - __m128i didPassWindowTest = _mm_cmpeq_epi8( _mm_load_si128((__m128i *)(this->_didPassWindowTestCustom[compState.selectedLayerID] + compState.xCustom)), _mm_set1_epi8(1) ); + __m128i didPassWindowTest = _mm_cmpeq_epi8( _mm_load_si128((__m128i *)(this->_didPassWindowTestCustom[compInfo.renderState.selectedLayerID] + compInfo.target.xCustom)), _mm_set1_epi8(1) ); passMask8 = _mm_and_si128(passMask8, didPassWindowTest); passMask16[0] = _mm_unpacklo_epi8(passMask8, passMask8); passMask16[1] = _mm_unpackhi_epi8(passMask8, passMask8); - enableColorEffectMask = _mm_cmpeq_epi8( _mm_load_si128((__m128i *)(this->_enableColorEffectCustom[compState.selectedLayerID] + compState.xCustom)), _mm_set1_epi8(1) ); + enableColorEffectMask = _mm_cmpeq_epi8( _mm_load_si128((__m128i *)(this->_enableColorEffectCustom[compInfo.renderState.selectedLayerID] + compInfo.target.xCustom)), _mm_set1_epi8(1) ); } else { @@ -1964,30 +1934,30 @@ __m128i dstEffectEnableMask; #ifdef ENABLE_SSSE3 - dstEffectEnableMask = _mm_shuffle_epi8(this->_dstBlendEnable_SSSE3, dstLayerID); + dstEffectEnableMask = _mm_shuffle_epi8(compInfo.renderState.dstBlendEnable_SSSE3, dstLayerID); dstEffectEnableMask = _mm_xor_si128( _mm_cmpeq_epi8(dstEffectEnableMask, _mm_setzero_si128()), _mm_set1_epi32(0xFFFFFFFF) ); #else - dstEffectEnableMask = _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG0)), this->_dstBlendEnable_SSE2[GPULayerID_BG0]); - dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG1)), this->_dstBlendEnable_SSE2[GPULayerID_BG1]) ); - dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG2)), this->_dstBlendEnable_SSE2[GPULayerID_BG2]) ); - dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG3)), this->_dstBlendEnable_SSE2[GPULayerID_BG3]) ); - dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_OBJ)), this->_dstBlendEnable_SSE2[GPULayerID_OBJ]) ); - dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_Backdrop)), this->_dstBlendEnable_SSE2[GPULayerID_Backdrop]) ); + dstEffectEnableMask = _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG0)), compInfo.renderState.dstBlendEnable_SSE2[GPULayerID_BG0]); + dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG1)), compInfo.renderState.dstBlendEnable_SSE2[GPULayerID_BG1]) ); + dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG2)), compInfo.renderState.dstBlendEnable_SSE2[GPULayerID_BG2]) ); + dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG3)), compInfo.renderState.dstBlendEnable_SSE2[GPULayerID_BG3]) ); + dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_OBJ)), compInfo.renderState.dstBlendEnable_SSE2[GPULayerID_OBJ]) ); + dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_Backdrop)), compInfo.renderState.dstBlendEnable_SSE2[GPULayerID_Backdrop]) ); #endif dstEffectEnableMask = _mm_andnot_si128( _mm_cmpeq_epi8(dstLayerID, srcLayerID_vec128), dstEffectEnableMask ); // Select the color effect based on the BLDCNT target flags. __m128i forceBlendEffectMask = _mm_setzero_si128(); - const __m128i colorEffect_vec128 = (WILLPERFORMWINDOWTEST) ? _mm_blendv_epi8(_mm_set1_epi8(ColorEffect_Disable), _mm_set1_epi8(compState.colorEffect), enableColorEffectMask) : _mm_set1_epi8(compState.colorEffect); + const __m128i colorEffect_vec128 = (WILLPERFORMWINDOWTEST) ? _mm_blendv_epi8(_mm_set1_epi8(ColorEffect_Disable), _mm_set1_epi8(compInfo.renderState.colorEffect), enableColorEffectMask) : _mm_set1_epi8(compInfo.renderState.colorEffect); - __m128i eva_vec128 = _mm_set1_epi16(compState.blendEVA); - __m128i evb_vec128 = _mm_set1_epi16(compState.blendEVB); - const __m128i evy_vec128 = _mm_set1_epi16(compState.blendEVY); + __m128i eva_vec128 = _mm_set1_epi16(compInfo.renderState.blendEVA); + __m128i evb_vec128 = _mm_set1_epi16(compInfo.renderState.blendEVB); + const __m128i evy_vec128 = _mm_set1_epi16(compInfo.renderState.blendEVY); if (ISSRCLAYEROBJ) { - const __m128i objMode_vec128 = _mm_loadu_si128((__m128i *)(this->_sprType + compState.xNative)); + const __m128i objMode_vec128 = _mm_loadu_si128((__m128i *)(this->_sprType + compInfo.target.xNative)); const __m128i isObjTranslucentMask = _mm_and_si128( dstEffectEnableMask, _mm_or_si128(_mm_cmpeq_epi8(objMode_vec128, _mm_set1_epi8(OBJMode_Transparent)), _mm_cmpeq_epi8(objMode_vec128, _mm_set1_epi8(OBJMode_Bitmap))) ); forceBlendEffectMask = isObjTranslucentMask; @@ -1999,7 +1969,7 @@ __m128i tmpSrc[4] = {src0, src1, src2, src3}; - switch (compState.colorEffect) + switch (compInfo.renderState.colorEffect) { case ColorEffect_IncreaseBrightness: { @@ -2112,16 +2082,16 @@ // However, GPUEngineBase::_RenderPixel() takes source pixels in RGB555. In order to unify the methods, all pixels // must be processed in RGBA6665. template<NDSColorFormat OUTPUTFORMAT> -FORCEINLINE void GPUEngineBase::_RenderPixel3D(GPUEngineCompositorInfo &compState, const bool enableColorEffect, const FragmentColor srcColor32) +FORCEINLINE void GPUEngineBase::_RenderPixel3D(GPUEngineCompositorInfo &compInfo, const bool enableColorEffect, const FragmentColor srcColor32) { - u16 &dstColor16 = *compState.lineColorTarget16; - FragmentColor &dstColor32 = *compState.lineColorTarget32; - u8 &dstLayerID = *compState.lineLayerIDTarget; + u16 &dstColor16 = *compInfo.target.lineColor16; + FragmentColor &dstColor32 = *compInfo.target.lineColor32; + u8 &dstLayerID = *compInfo.target.lineLayerID; ColorEffect selectedEffect = ColorEffect_Disable; if (enableColorEffect) { - const bool dstEffectEnable = (dstLayerID != GPULayerID_BG0) && this->_dstBlendEnable[dstLayerID]; + const bool dstEffectEnable = (dstLayerID != GPULayerID_BG0) && compInfo.renderState.dstBlendEnable[dstLayerID]; // Select the color effect based on the BLDCNT target flags. bool forceBlendEffect = false; @@ -2135,14 +2105,14 @@ { selectedEffect = ColorEffect_Blend; } - else if (this->_srcBlendEnable[GPULayerID_BG0]) + else if (compInfo.renderState.srcBlendEnable[GPULayerID_BG0]) { - switch (compState.colorEffect) + switch (compInfo.renderState.colorEffect) { // For the Blend effect, both first and second target flags must be checked. case ColorEffect_Blend: { - if (dstEffectEnable) selectedEffect = compState.colorEffect; + if (dstEffectEnable) selectedEffect = compInfo.renderState.colorEffect; break; } @@ -2150,7 +2120,7 @@ // Test case: Bomberman Land Touch! dialog boxes will render too dark without this check. case ColorEffect_IncreaseBrightness: case ColorEffect_DecreaseBrightness: - selectedEffect = compState.colorEffect; + selectedEffect = compInfo.renderState.colorEffect; break; default: @@ -2171,11 +2141,11 @@ break; case ColorEffect_IncreaseBrightness: - dstColor16 = compState.brightnessUpTable555[srcColor16 & 0x7FFF]; + dstColor16 = compInfo.renderState.brightnessUpTable555[srcColor16 & 0x7FFF]; break; case ColorEffect_DecreaseBrightness: - dstColor16 = compState.brightnessDownTable555[srcColor16 & 0x7FFF]; + dstColor16 = compInfo.renderState.brightnessDownTable555[srcColor16 & 0x7FFF]; break; case ColorEffect_Blend: @@ -2194,11 +2164,11 @@ break; case ColorEffect_IncreaseBrightness: - dstColor32 = this->_ColorEffectIncreaseBrightness<OUTPUTFORMAT>(srcColor32, compState.blendEVY); + dstColor32 = this->_ColorEffectIncreaseBrightness<OUTPUTFORMAT>(srcColor32, compInfo.renderState.blendEVY); break; case ColorEffect_DecreaseBrightness: - dstColor32 = this->_ColorEffectDecreaseBrightness(srcColor32, compState.blendEVY); + dstColor32 = this->_ColorEffectDecreaseBrightness(srcColor32, compInfo.renderState.blendEVY); break; case ColorEffect_Blend: @@ -2215,7 +2185,7 @@ #ifdef ENABLE_SSE2 template <NDSColorFormat OUTPUTFORMAT> -FORCEINLINE void GPUEngineBase::_RenderPixel3D_SSE2(GPUEngineCompositorInfo &compState, +FORCEINLINE void GPUEngineBase::_RenderPixel3D_SSE2(GPUEngineCompositorInfo &compInfo, const __m128i &passMask8, const __m128i &enableColorEffectMask, const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0, @@ -2265,11 +2235,11 @@ tmpSrc[3] = src3; } - const __m128i srcEffectEnableMask = this->_srcBlendEnable_SSE2[GPULayerID_BG0]; + const __m128i srcEffectEnableMask = compInfo.renderState.srcBlendEnable_SSE2[GPULayerID_BG0]; __m128i dstEffectEnableMask; #ifdef ENABLE_SSSE3 - dstEffectEnableMask = _mm_shuffle_epi8(this->_dstBlendEnable_SSSE3, dstLayerID); + dstEffectEnableMask = _mm_shuffle_epi8(compInfo.renderState.dstBlendEnable_SSSE3, dstLayerID); dstEffectEnableMask = _mm_xor_si128( _mm_cmpeq_epi8(dstEffectEnableMask, _mm_setzero_si128()), _mm_set1_epi32(0xFFFFFFFF) ); #else dstEffectEnableMask = _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG0)), this->_dstBlendEnable_SSE2[GPULayerID_BG0]); @@ -2283,11 +2253,11 @@ dstEffectEnableMask = _mm_andnot_si128( _mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG0)), dstEffectEnableMask ); // Select the color effect based on the BLDCNT target flags. - const __m128i colorEffect_vec128 = _mm_blendv_epi8(_mm_set1_epi8(ColorEffect_Disable), _mm_set1_epi8(compState.colorEffect), enableColorEffectMask); + const __m128i colorEffect_vec128 = _mm_blendv_epi8(_mm_set1_epi8(ColorEffect_Disable), _mm_set1_epi8(compInfo.renderState.colorEffect), enableColorEffectMask); const __m128i forceBlendEffectMask = _mm_and_si128(enableColorEffectMask, dstEffectEnableMask); - const __m128i evy_vec128 = _mm_set1_epi16(compState.blendEVY); + const __m128i evy_vec128 = _mm_set1_epi16(compInfo.renderState.blendEVY); - switch (compState.colorEffect) + switch (compInfo.renderState.colorEffect) { case ColorEffect_IncreaseBrightness: { @@ -2398,7 +2368,7 @@ //this is fantastically inaccurate. //we do the early return even though it reduces the resulting accuracy //because we need the speed, and because it is inaccurate anyway -void GPUEngineBase::_MosaicSpriteLinePixel(GPUEngineCompositorInfo &compState, const size_t x, u16 *__restrict dst, u8 *__restrict dst_alpha, u8 *__restrict typeTab, u8 *__restrict prioTab) +void GPUEngineBase::_MosaicSpriteLinePixel(GPUEngineCompositorInfo &compInfo, const size_t x, u16 *__restrict dst, u8 *__restrict dst_alpha, u8 *__restrict typeTab, u8 *__restrict prioTab) { const bool enableMosaic = (this->_oamList[this->_sprNum[x]].Mosaic != 0); if (!enableMosaic) @@ -2411,11 +2381,11 @@ objColor.alpha = dst_alpha[x]; objColor.opaque = opaque; - const size_t y = compState.lineIndexNative; + const size_t y = compInfo.line.indexNative; - if (!this->_mosaicWidthOBJ[x].begin || !this->_mosaicHeightOBJ[y].begin) + if (!compInfo.renderState.mosaicWidthOBJ[x].begin || !compInfo.renderState.mosaicHeightOBJ[y].begin) { - objColor = this->_mosaicColors.obj[this->_mosaicWidthOBJ[x].trunc]; + objColor = this->_mosaicColors.obj[compInfo.renderState.mosaicWidthOBJ[x].trunc]; } this->_mosaicColors.obj[x] = objColor; @@ -2425,27 +2395,27 @@ if (!objColor.opaque) prioTab[x] = 0x7F; } -void GPUEngineBase::_MosaicSpriteLine(GPUEngineCompositorInfo &compState, u16 *__restrict dst, u8 *__restrict dst_alpha, u8 *__restrict typeTab, u8 *__restrict prioTab) +void GPUEngineBase::_MosaicSpriteLine(GPUEngineCompositorInfo &compInfo, u16 *__restrict dst, u8 *__restrict dst_alpha, u8 *__restrict typeTab, u8 *__restrict prioTab) { - if (!this->_isOBJMosaicSet) + if (!compInfo.renderState.isOBJMosaicSet) { return; } for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH; i++) { - this->_MosaicSpriteLinePixel(compState, i, dst, dst_alpha, typeTab, prioTab); + this->_MosaicSpriteLinePixel(compInfo, i, dst, dst_alpha, typeTab, prioTab); } } template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> -void GPUEngineBase::_RenderPixelIterate_Final(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) +void GPUEngineBase::_RenderPixelIterate_Final(GPUEngineCompositorInfo &compInfo, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) { - const u16 lineWidth = (ISDEBUGRENDER) ? compState.selectedBGLayer->size.width : GPU_FRAMEBUFFER_NATIVE_WIDTH; + const u16 lineWidth = (ISDEBUGRENDER) ? compInfo.renderState.selectedBGLayer->size.width : GPU_FRAMEBUFFER_NATIVE_WIDTH; const s16 dx = (s16)LOCAL_TO_LE_16(param.BGnPA.value); const s16 dy = (s16)LOCAL_TO_LE_16(param.BGnPC.value); - const s32 wh = compState.selectedBGLayer->size.width; - const s32 ht = compState.selectedBGLayer->size.height; + const s32 wh = compInfo.renderState.selectedBGLayer->size.width; + const s32 ht = compInfo.renderState.selectedBGLayer->size.height; const s32 wmask = wh - 1; const s32 hmask = ht - 1; @@ -2485,7 +2455,7 @@ } else { - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compState, i, srcColor, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compInfo, i, srcColor, (index != 0)); } auxX++; @@ -2516,28 +2486,28 @@ } else { - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compState, i, srcColor, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compInfo, i, srcColor, (index != 0)); } } } } template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> -void GPUEngineBase::_RenderPixelIterate_ApplyWrap(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) +void GPUEngineBase::_RenderPixelIterate_ApplyWrap(GPUEngineCompositorInfo &compInfo, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) { - this->_RenderPixelIterate_Final<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, WRAP>(compState, param, map, tile, pal); + this->_RenderPixelIterate_Final<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, WRAP>(compInfo, param, map, tile, pal); } template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc> -void GPUEngineBase::_RenderPixelIterate(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) +void GPUEngineBase::_RenderPixelIterate(GPUEngineCompositorInfo &compInfo, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) { - if (compState.selectedBGLayer->isDisplayWrapped) + if (compInfo.renderState.selectedBGLayer->isDisplayWrapped) { - this->_RenderPixelIterate_ApplyWrap<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, true>(compState, param, map, tile, pal); + this->_RenderPixelIterate_ApplyWrap<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, true>(compInfo, param, map, tile, pal); } else { - this->_RenderPixelIterate_ApplyWrap<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, false>(compState, param, map, tile, pal); + this->_RenderPixelIterate_ApplyWrap<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, false>(compInfo, param, map, tile, pal); } } @@ -2554,15 +2524,15 @@ } template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT> -FORCEINLINE void GPUEngineBase::_RenderPixelSingle(GPUEngineCompositorInfo &compState, const size_t srcX, u16 srcColor16, const bool opaque) +FORCEINLINE void GPUEngineBase::_RenderPixelSingle(GPUEngineCompositorInfo &compInfo, const size_t srcX, u16 srcColor16, const bool opaque) { bool willRenderColor = opaque; - compState.xNative = srcX; - compState.xCustom = _gpuDstPitchIndex[srcX]; - compState.lineLayerIDTarget = compState.lineLayerIDHeadNative + srcX; - compState.lineColorTarget16 = (u16 *)compState.lineColorHeadNative + srcX; - compState.lineColorTarget32 = (FragmentColor *)compState.lineColorHeadNative + srcX; + compInfo.target.xNative = srcX; + compInfo.target.xCustom = _gpuDstPitchIndex[srcX]; + compInfo.target.lineLayerID = compInfo.target.lineLayerIDHeadNative + srcX; + compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHeadNative + srcX; + compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHeadNative + srcX; if (MOSAIC) { @@ -2572,32 +2542,32 @@ if (!opaque) srcColor16 = 0xFFFF; else srcColor16 &= 0x7FFF; - if (!this->_mosaicWidthBG[srcX].begin || !this->_mosaicHeightBG[compState.lineIndexNative].begin) + if (!compInfo.renderState.mosaicWidthBG[srcX].begin || !compInfo.renderState.mosaicHeightBG[compInfo.line.indexNative].begin) { - srcColor16 = this->_mosaicColors.bg[compState.selectedLayerID][this->_mosaicWidthBG[srcX].trunc]; + srcColor16 = this->_mosaicColors.bg[compInfo.renderState.selectedLayerID][compInfo.renderState.mosaicWidthBG[srcX].trunc]; } - this->_mosaicColors.bg[compState.selectedLayerID][srcX] = srcColor16; + this->_mosaicColors.bg[compInfo.renderState.selectedLayerID][srcX] = srcColor16; willRenderColor = (srcColor16 != 0xFFFF); } if (willRenderColor) { - this->_RenderPixel<OUTPUTFORMAT, false, ISDEBUGRENDER, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compState, - srcColor16, - 0); + this->_RenderPixel<OUTPUTFORMAT, false, ISDEBUGRENDER, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compInfo, + srcColor16, + 0); } } template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT> -void GPUEngineBase::_RenderPixelsCustom(GPUEngineCompositorInfo &compState) +void GPUEngineBase::_RenderPixelsCustom(GPUEngineCompositorInfo &compInfo) { #ifdef ENABLE_SSE2 #ifdef ENABLE_SSSE3 - const bool isIntegerScale = ((compState.lineWidthCustom % GPU_FRAMEBUFFER_NATIVE_WIDTH) == 0); - const size_t scale = compState.lineWidthCustom / GPU_FRAMEBUFFER_NATIVE_WIDTH; + const bool isIntegerScale = ((compInfo.line.widthCustom % GPU_FRAMEBUFFER_NATIVE_WIDTH) == 0); + const size_t scale = compInfo.line.widthCustom / GPU_FRAMEBUFFER_NATIVE_WIDTH; #endif for (size_t x = 0, dstIdx = 0; x < GPU_FRAMEBUFFER_NATIVE_WIDTH; x+=8) @@ -2610,20 +2580,21 @@ const __m128i idxMask = _mm_cmpeq_epi16(_mm_unpacklo_epi8(index_vec128, _mm_setzero_si128()), _mm_setzero_si128()); const __m128i tmpColor_vec128 = _mm_blendv_epi8(_mm_and_si128(col_vec128, _mm_set1_epi16(0x7FFF)), _mm_set1_epi16(0xFFFF), idxMask); - const __m128i mosaicWidthMask = _mm_cmpeq_epi16( _mm_and_si128(_mm_set1_epi16(0x00FF), _mm_loadu_si128((__m128i *)(this->_mosaicWidthBG + x))), _mm_setzero_si128() ); - const __m128i mosaicHeightMask = _mm_cmpeq_epi16(_mm_set1_epi16(this->_mosaicHeightBG[compState.lineIndexNative].begin), _mm_setzero_si128()); + const __m128i mosaicWidthMask = _mm_cmpeq_epi16( _mm_and_si128(_mm_set1_epi16(0x00FF), _mm_loadu_si128((__m128i *)(compInfo.renderState.mosaicWidthBG + x))), _mm_setzero_si128() ); + const __m128i mosaicHeightMask = _mm_cmpeq_epi16(_mm_set1_epi16(compInfo.renderState.mosaicHeightBG[compInfo.line.indexNative].begin), _mm_setzero_si128()); const __m128i mosaicMask = _mm_or_si128(mosaicWidthMask, mosaicHeightMask); - this->_mosaicColors.bg[compState.selectedLayerID][x+0] = (_mm_extract_epi16(mosaicMask, 0) != 0) ? this->_mosaicColors.bg[compState.selectedLayerID][this->_mosaicWidthBG[x+0].trunc] : _mm_extract_epi16(tmpColor_vec128, 0); - this->_mosaicColors.bg[compState.selectedLayerID][x+1] = (_mm_extract_epi16(mosaicMask, 1) != 0) ? this->_mosaicColors.bg[compState.selectedLayerID][this->_mosaicWidthBG[x+1].trunc] : _mm_extract_epi16(tmpColor_vec128, 1); - this->_mosaicColors.bg[compState.selectedLayerID][x+2] = (_mm_extract_epi16(mosaicMask, 2) != 0) ? this->_mosaicColors.bg[compState.selectedLayerID][this->_mosaicWidthBG[x+2].trunc] : _mm_extract_epi16(tmpColor_vec128, 2); - this->_mosaicColors.bg[compState.selectedLayerID][x+3] = (_mm_extract_epi16(mosaicMask, 3) != 0) ? this->_mosaicColors.bg[compState.selectedLayerID][this->_mosaicWidthBG[x+3].trunc] : _mm_extract_epi16(tmpColor_vec128, 3); - this->_mosaicColors.bg[compState.selectedLayerID][x+4] = (_mm_extract_epi16(mosaicMask, 4) != 0) ? this->_mosaicColors.bg[compState.selectedLayerID][this->_mosaicWidthBG[x+4].trunc] : _mm_extract_epi16(tmpColor_vec128, 4); - this->_mosaicColors.bg[compState.selectedLayerID][x+5] = (_mm_extract_epi16(mosaicMask, 5) != 0) ? this->_mosaicColors.bg[compState.selectedLayerID][this->_mosaicWidthBG[x+5].trunc] : _mm_extract_epi16(tmpColor_vec128, 5); - this->_mosaicColors.bg[compState.selectedLayerID][x+6] = (_mm_extract_epi16(mosaicMask, 6) != 0) ? this->_mosaicColors.bg[compState.selectedLayerID][this->_mosaicWidthBG[x+6].trunc] : _mm_extract_epi16(tmpColor_vec128, 6); - this->_mosaicColors.bg[compState.selectedLayerID][x+7] = (_mm_extract_epi16(mosaicMask, 7) != 0) ? this->_mosaicColors.bg[compState.selectedLayerID][this->_mosaicWidthBG[x+7].trunc] : _mm_extract_epi16(tmpColor_vec128, 7); + u16 *mosaicColorBG = this->_mosaicColors.bg[compInfo.renderState.selectedLayerID]; + mosaicColorBG[x+0] = (_mm_extract_epi16(mosaicMask, 0) != 0) ? mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+0].trunc] : _mm_extract_epi16(tmpColor_vec128, 0); + mosaicColorBG[x+1] = (_mm_extract_epi16(mosaicMask, 1) != 0) ? mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+1].trunc] : _mm_extract_epi16(tmpColor_vec128, 1); + mosaicColorBG[x+2] = (_mm_extract_epi16(mosaicMask, 2) != 0) ? mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+2].trunc] : _mm_extract_epi16(tmpColor_vec128, 2); + mosaicColorBG[x+3] = (_mm_extract_epi16(mosaicMask, 3) != 0) ? mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+3].trunc] : _mm_extract_epi16(tmpColor_vec128, 3); + mosaicColorBG[x+4] = (_mm_extract_epi16(mosaicMask, 4) != 0) ? mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+4].trunc] : _mm_extract_epi16(tmpColor_vec128, 4); + mosaicColorBG[x+5] = (_mm_extract_epi16(mosaicMask, 5) != 0) ? mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+5].trunc] : _mm_extract_epi16(tmpColor_vec128, 5); + mosaicColorBG[x+6] = (_mm_extract_epi16(mosaicMask, 6) != 0) ? mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+6].trunc] : _mm_extract_epi16(tmpColor_vec128, 6); + mosaicColorBG[x+7] = (_mm_extract_epi16(mosaicMask, 7) != 0) ? mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+7].trunc] : _mm_extract_epi16(tmpColor_vec128, 7); - const __m128i mosaicColor_vec128 = _mm_loadu_si128((__m128i *)(this->_mosaicColors.bg[compState.selectedLayerID] + x)); + const __m128i mosaicColor_vec128 = _mm_loadu_si128((__m128i *)(mosaicColorBG + x)); const __m128i mosaicColorMask = _mm_cmpeq_epi16(mosaicColor_vec128, _mm_set1_epi16(0xFFFF)); _mm_storel_epi64( (__m128i *)(this->_bgLayerIndex + x), _mm_andnot_si128(_mm_packs_epi16(mosaicColorMask, _mm_setzero_si128()), index_vec128) ); _mm_store_si128( (__m128i *)(this->_bgLayerColor + x), _mm_blendv_epi8(mosaicColor_vec128, col_vec128, mosaicColorMask) ); @@ -2666,12 +2637,12 @@ { u16 tmpColor = (this->_bgLayerIndex[x] == 0) ? 0xFFFF : this->_bgLayerColor[x] & 0x7FFF; - if (!this->_mosaicWidthBG[x].begin || !this->_mosaicHeightBG[compState.lineIndexNative].begin) + if (!compInfo.renderState.mosaicWidthBG[x].begin || !compInfo.renderState.mosaicHeightBG[compInfo.line.indexNative].begin) { - tmpColor = this->_mosaicColors.bg[compState.selectedLayerID][this->_mosaicWidthBG[x].trunc]; + tmpColor = this->_mosaicColors.bg[compInfo.renderState.selectedLayerID][compInfo.renderState.mosaicWidthBG[x].trunc]; } - this->_mosaicColors.bg[compState.selectedLayerID][x] = tmpColor; + this->_mosaicColors.bg[compInfo.renderState.selectedLayerID][x] = tmpColor; if (tmpColor == 0xFFFF) { @@ -2691,36 +2662,36 @@ } #endif - compState.lineColorTarget16 = (u16 *)compState.lineColorHead; - compState.lineColorTarget32 = (FragmentColor *)compState.lineColorHead; - compState.lineLayerIDTarget = compState.lineLayerIDHead; + compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHead; + compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHead; + compInfo.target.lineLayerID = compInfo.target.lineLayerIDHead; #ifdef ENABLE_SSE2 - const size_t ssePixCount = (compState.lineWidthCustom - (compState.lineWidthCustom % 16)); - const __m128i srcEffectEnableMask = this->_srcBlendEnable_SSE2[compState.selectedLayerID]; + const size_t ssePixCount = (compInfo.line.widthCustom - (compInfo.line.widthCustom % 16)); + const __m128i srcEffectEnableMask = compInfo.renderState.srcBlendEnable_SSE2[compInfo.renderState.selectedLayerID]; #endif - for (size_t l = 0; l < compState.lineRenderCount; l++) + for (size_t l = 0; l < compInfo.line.renderCount; l++) { - compState.xNative = 0; - compState.xCustom = 0; + compInfo.target.xNative = 0; + compInfo.target.xCustom = 0; #ifdef ENABLE_SSE2 - for (; compState.xCustom < ssePixCount; compState.xCustom+=16, compState.xNative = _gpuDstToSrcIndex[compState.xCustom], compState.lineColorTarget16+=16, compState.lineColorTarget32+=16, compState.lineLayerIDTarget+=16) + for (; compInfo.target.xCustom < ssePixCount; compInfo.target.xCustom+=16, compInfo.target.xNative = _gpuDstToSrcIndex[compInfo.target.xCustom], compInfo.target.lineColor16+=16, compInfo.target.lineColor32+=16, compInfo.target.lineLayerID+=16) { __m128i src[4]; if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) { - src[0] = _mm_load_si128((__m128i *)(this->_bgLayerColorCustom + compState.xCustom + 0)); - src[1] = _mm_load_si128((__m128i *)(this->_bgLayerColorCustom + compState.xCustom + 8)); + src[0] = _mm_load_si128((__m128i *)(this->_bgLayerColorCustom + compInfo.target.xCustom + 0)); + src[1] = _mm_load_si128((__m128i *)(this->_bgLayerColorCustom + compInfo.target.xCustom + 8)); src[2] = _mm_setzero_si128(); src[3] = _mm_setzero_si128(); } else { - const __m128i src16[2] = { _mm_load_si128((__m128i *)(this->_bgLayerColorCustom + compState.xCustom + 0)), - _mm_load_si128((__m128i *)(this->_bgLayerColorCustom + compState.xCustom + 8)) }; + const __m128i src16[2] = { _mm_load_si128((__m128i *)(this->_bgLayerColorCustom + compInfo.target.xCustom + 0)), + _mm_load_si128((__m128i *)(this->_bgLayerColorCustom + compInfo.target.xCustom + 8)) }; if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { @@ -2736,12 +2707,12 @@ const __m128i srcAlpha = _mm_setzero_si128(); - __m128i dstLayerID_vec128 = _mm_load_si128((__m128i *)compState.lineLayerIDTarget); - __m128i passMask8 = _mm_xor_si128( _mm_cmpeq_epi8(_mm_load_si128((__m128i *)(this->_bgLayerIndexCustom + compState.xCustom)), _mm_setzero_si128()), _mm_set1_epi32(0xFFFFFFFF) ); + __m128i dstLayerID_vec128 = _mm_load_si128((__m128i *)compInfo.target.lineLayerID); + __m128i passMask8 = _mm_xor_si128( _mm_cmpeq_epi8(_mm_load_si128((__m128i *)(this->_bgLayerIndexCustom + compInfo.target.xCustom)), _mm_setzero_si128()), _mm_set1_epi32(0xFFFFFFFF) ); __m128i dst[4]; - dst[0] = _mm_load_si128((__m128i *)*compState.lineColorTarget + 0); - dst[1] = _mm_load_si128((__m128i *)*compState.lineColorTarget + 1); + dst[0] = _mm_load_si128((__m128i *)*compInfo.target.lineColor + 0); + dst[1] = _mm_load_si128((__m128i *)*compInfo.target.lineColor + 1); if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) { @@ -2750,65 +2721,65 @@ } else { - dst[2] = _mm_load_si128((__m128i *)*compState.lineColorTarget + 2); - dst[3] = _mm_load_si128((__m128i *)*compState.lineColorTarget + 3); + dst[2] = _mm_load_si128((__m128i *)*compInfo.target.lineColor + 2); + dst[3] = _mm_load_si128((__m128i *)*compInfo.target.lineColor + 3); } - this->_RenderPixel16_SSE2<OUTPUTFORMAT, false, ISDEBUGRENDER, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, true>(compState, + this->_RenderPixel16_SSE2<OUTPUTFORMAT, false, ISDEBUGRENDER, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, true>(compInfo, src[3], src[2], src[1], src[0], srcAlpha, srcEffectEnableMask, dst[3], dst[2], dst[1], dst[0], dstLayerID_vec128, passMask8); - _mm_store_si128((__m128i *)*compState.lineColorTarget + 0, dst[0]); - _mm_store_si128((__m128i *)*compState.lineColorTarget + 1, dst[1]); + _mm_store_si128((__m128i *)*compInfo.target.lineColor + 0, dst[0]); + _mm_store_si128((__m128i *)*compInfo.target.lineColor + 1, dst[1]); if (OUTPUTFORMAT != NDSColorFormat_BGR555_Rev) { - _mm_store_si128((__m128i *)*compState.lineColorTarget + 2, dst[2]); - _mm_store_si128((__m128i *)*compState.lineColorTarget + 3, dst[3]); + _mm_store_si128((__m128i *)*compInfo.target.lineColor + 2, dst[2]); + _mm_store_si128((__m128i *)*compInfo.target.lineColor + 3, dst[3]); } - _mm_store_si128((__m128i *)compState.lineLayerIDTarget, dstLayerID_vec128); + _mm_store_si128((__m128i *)compInfo.target.lineLayerID, dstLayerID_vec128); } #endif #ifdef ENABLE_SSE2 #pragma LOOPVECTORIZE_DISABLE #endif - for (; compState.xCustom < compState.lineWidthCustom; compState.xCustom++, compState.xNative = _gpuDstToSrcIndex[compState.xCustom], compState.lineColorTarget16++, compState.lineColorTarget32++, compState.lineLayerIDTarget++) + for (; compInfo.target.xCustom < compInfo.line.widthCustom; compInfo.target.xCustom++, compInfo.target.xNative = _gpuDstToSrcIndex[compInfo.target.xCustom], compInfo.target.lineColor16++, compInfo.target.lineColor32++, compInfo.target.lineLayerID++) { - if (this->_bgLayerIndexCustom[compState.xCustom] == 0) + if (this->_bgLayerIndexCustom[compInfo.target.xCustom] == 0) { continue; } - this->_RenderPixel<OUTPUTFORMAT, false, ISDEBUGRENDER, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compState, - this->_bgLayerColorCustom[compState.xCustom], + this->_RenderPixel<OUTPUTFORMAT, false, ISDEBUGRENDER, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compInfo, + this->_bgLayerColorCustom[compInfo.target.xCustom], 0); } } } template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT> -void GPUEngineBase::_RenderPixelsCustomVRAM(GPUEngineCompositorInfo &compState) +void GPUEngineBase::_RenderPixelsCustomVRAM(GPUEngineCompositorInfo &compInfo) { - const u16 *__restrict srcLine = GPU->GetCustomVRAMAddressUsingMappedAddress(compState.selectedBGLayer->BMPAddress) + compState.blockOffsetCustom; + const u16 *__restrict srcLine = GPU->GetCustomVRAMAddressUsingMappedAddress(compInfo.renderState.selectedBGLayer->BMPAddress) + compInfo.line.blockOffsetCustom; - compState.xNative = 0; - compState.xCustom = 0; - compState.lineColorTarget16 = (u16 *)compState.lineColorHead; - compState.lineColorTarget32 = (FragmentColor *)compState.lineColorHead; - compState.lineLayerIDTarget = compState.lineLayerIDHead; + compInfo.target.xNative = 0; + compInfo.target.xCustom = 0; + compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHead; + compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHead; + compInfo.target.lineLayerID = compInfo.target.lineLayerIDHead; size_t i = 0; #ifdef ENABLE_SSE2 - const __m128i srcEff... [truncated message content] |
From: <rog...@us...> - 2016-08-04 01:08:44
|
Revision: 5522 http://sourceforge.net/p/desmume/code/5522 Author: rogerman Date: 2016-08-04 01:08:42 +0000 (Thu, 04 Aug 2016) Log Message: ----------- GPU: - Fix bug where the composited 3D layer could have incorrect colors on non-SSE2 systems. (Regression from r5509.) Revision Links: -------------- http://sourceforge.net/p/desmume/code/5509 Modified Paths: -------------- trunk/desmume/src/GPU.cpp Modified: trunk/desmume/src/GPU.cpp =================================================================== --- trunk/desmume/src/GPU.cpp 2016-08-03 18:14:44 UTC (rev 5521) +++ trunk/desmume/src/GPU.cpp 2016-08-04 01:08:42 UTC (rev 5522) @@ -2171,11 +2171,11 @@ break; case ColorEffect_IncreaseBrightness: - dstColor16 = compState.brightnessUpTable555[srcColor16]; + dstColor16 = compState.brightnessUpTable555[srcColor16 & 0x7FFF]; break; case ColorEffect_DecreaseBrightness: - dstColor16 = compState.brightnessDownTable555[srcColor16]; + dstColor16 = compState.brightnessDownTable555[srcColor16 & 0x7FFF]; break; case ColorEffect_Blend: This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-08-03 18:14:46
|
Revision: 5521 http://sourceforge.net/p/desmume/code/5521 Author: rogerman Date: 2016-08-03 18:14:44 +0000 (Wed, 03 Aug 2016) Log Message: ----------- GPU: - For SSE2 systems, when reading graphics data from main memory, eliminate the extraneous pshufd instruction. Modified Paths: -------------- trunk/desmume/src/GPU.cpp Modified: trunk/desmume/src/GPU.cpp =================================================================== --- trunk/desmume/src/GPU.cpp 2016-07-31 21:10:14 UTC (rev 5520) +++ trunk/desmume/src/GPU.cpp 2016-08-03 18:14:44 UTC (rev 5521) @@ -5972,8 +5972,8 @@ #ifdef ENABLE_SSE2 for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(__m128i); i++) { - __m128i fifoColor = _mm_set_epi32(DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv()); - _mm_store_si128((__m128i *)fifoLineBuffer + i, _mm_shuffle_epi32(fifoColor, 0x1B)); // We need to shuffle the four FIFO values back into the correct order, since they were originally loaded in reverse order. + const __m128i fifoColor = _mm_setr_epi32(DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv()); + _mm_store_si128((__m128i *)fifoLineBuffer + i, fifoColor); } #else for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(u32); i++) @@ -6592,8 +6592,7 @@ const __m128i alphaBit = _mm_set1_epi16(0x8000); for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(__m128i); i++) { - __m128i fifoColor = _mm_set_epi32(DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv()); - fifoColor = _mm_shuffle_epi32(fifoColor, 0x1B); // We need to shuffle the four FIFO values back into the correct order, since they were originally loaded in reverse order. + const __m128i fifoColor = _mm_setr_epi32(DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv()); _mm_store_si128((__m128i *)dst + i, _mm_or_si128(fifoColor, alphaBit)); } #else This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-07-31 21:10:16
|
Revision: 5520 http://sourceforge.net/p/desmume/code/5520 Author: rogerman Date: 2016-07-31 21:10:14 +0000 (Sun, 31 Jul 2016) Log Message: ----------- GPU: - Fix compiling issue with non-SSE2. (Regression from r5512.) Revision Links: -------------- http://sourceforge.net/p/desmume/code/5512 Modified Paths: -------------- trunk/desmume/src/GPU.cpp Modified: trunk/desmume/src/GPU.cpp =================================================================== --- trunk/desmume/src/GPU.cpp 2016-07-30 01:50:54 UTC (rev 5519) +++ trunk/desmume/src/GPU.cpp 2016-07-31 21:10:14 UTC (rev 5520) @@ -2666,12 +2666,12 @@ { u16 tmpColor = (this->_bgLayerIndex[x] == 0) ? 0xFFFF : this->_bgLayerColor[x] & 0x7FFF; - if (!this->_mosaicWidthBG[x].begin || !this->_mosaicHeightBG[lineIndex].begin) + if (!this->_mosaicWidthBG[x].begin || !this->_mosaicHeightBG[compState.lineIndexNative].begin) { - tmpColor = this->_mosaicColors.bg[LAYERID][this->_mosaicWidthBG[x].trunc]; + tmpColor = this->_mosaicColors.bg[compState.selectedLayerID][this->_mosaicWidthBG[x].trunc]; } - this->_mosaicColors.bg[LAYERID][x] = tmpColor; + this->_mosaicColors.bg[compState.selectedLayerID][x] = tmpColor; if (tmpColor == 0xFFFF) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-07-30 01:50:56
|
Revision: 5519 http://sourceforge.net/p/desmume/code/5519 Author: rogerman Date: 2016-07-30 01:50:54 +0000 (Sat, 30 Jul 2016) Log Message: ----------- Cocoa Port: - Do some minor code cleanup. Modified Paths: -------------- trunk/desmume/src/cocoa/OGLDisplayOutput.cpp trunk/desmume/src/cocoa/OGLDisplayOutput.h Modified: trunk/desmume/src/cocoa/OGLDisplayOutput.cpp =================================================================== --- trunk/desmume/src/cocoa/OGLDisplayOutput.cpp 2016-07-30 01:49:47 UTC (rev 5518) +++ trunk/desmume/src/cocoa/OGLDisplayOutput.cpp 2016-07-30 01:50:54 UTC (rev 5519) @@ -6227,16 +6227,16 @@ glGenBuffersARB(1, &_vboElementID); glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboVertexID); - glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(GLfloat) * 4096 * (2 * 4), NULL, GL_STREAM_DRAW_ARB); + glBufferDataARB(GL_ARRAY_BUFFER_ARB, HUD_VERTEX_ATTRIBUTE_BUFFER_SIZE, NULL, GL_STREAM_DRAW_ARB); glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboTexCoordID); - glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(GLfloat) * 4096 * (2 * 4), NULL, GL_STREAM_DRAW_ARB); + glBufferDataARB(GL_ARRAY_BUFFER_ARB, HUD_VERTEX_ATTRIBUTE_BUFFER_SIZE, NULL, GL_STREAM_DRAW_ARB); glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, _vboElementID); - glBufferDataARB(GL_ELEMENT_ARRAY_BUFFER_ARB, sizeof(GLshort) * 4096 * 6, NULL, GL_STATIC_DRAW_ARB); + glBufferDataARB(GL_ELEMENT_ARRAY_BUFFER_ARB, sizeof(GLshort) * HUD_MAX_CHARACTERS * 6, NULL, GL_STATIC_DRAW_ARB); GLshort *idxBufferPtr = (GLshort *)glMapBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB); - for (size_t i = 0; i < 4096; i++) + for (size_t i = 0; i < HUD_MAX_CHARACTERS; i++) { idxBufferPtr[(i*6)+0] = (i*4)+0; idxBufferPtr[(i*6)+1] = (i*4)+1; @@ -6544,8 +6544,6 @@ } const char *cString = this->_statusString.c_str(); - const size_t bufferSize = length * (2 * 4) * sizeof(GLfloat); - const GLfloat charSize = (GLfloat)this->_glyphSize; const GLfloat lineHeight = charSize * 0.8f; const GLfloat textBoxTextOffset = charSize * 0.25f; @@ -6554,7 +6552,7 @@ GLfloat textBoxWidth = 0.0f; glBindBufferARB(GL_ARRAY_BUFFER_ARB, this->_vboVertexID); - glBufferDataARB(GL_ARRAY_BUFFER_ARB, bufferSize, NULL, GL_STREAM_DRAW_ARB); + glBufferDataARB(GL_ARRAY_BUFFER_ARB, HUD_VERTEX_ATTRIBUTE_BUFFER_SIZE, NULL, GL_STREAM_DRAW_ARB); GLfloat *vtxBufferPtr = (GLfloat *)glMapBufferARB(GL_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB); // First, calculate the vertices of the text box. @@ -6664,10 +6662,9 @@ } const char *cString = this->_statusString.c_str(); - const size_t bufferSize = length * (2 * 4) * sizeof(GLfloat); glBindBufferARB(GL_ARRAY_BUFFER_ARB, this->_vboTexCoordID); - glBufferDataARB(GL_ARRAY_BUFFER_ARB, bufferSize, NULL, GL_STREAM_DRAW_ARB); + glBufferDataARB(GL_ARRAY_BUFFER_ARB, HUD_VERTEX_ATTRIBUTE_BUFFER_SIZE, NULL, GL_STREAM_DRAW_ARB); GLfloat *texCoordBufferPtr = (GLfloat *)glMapBufferARB(GL_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB); for (size_t i = 0; i < length; i++) Modified: trunk/desmume/src/cocoa/OGLDisplayOutput.h =================================================================== --- trunk/desmume/src/cocoa/OGLDisplayOutput.h 2016-07-30 01:49:47 UTC (rev 5518) +++ trunk/desmume/src/cocoa/OGLDisplayOutput.h 2016-07-30 01:50:54 UTC (rev 5519) @@ -34,6 +34,8 @@ #include <ft2build.h> #include FT_FREETYPE_H +#define HUD_MAX_CHARACTERS 2048 +#define HUD_VERTEX_ATTRIBUTE_BUFFER_SIZE (sizeof(GLfloat) * HUD_MAX_CHARACTERS * (2 * 4)) #define HUD_TEXTBOX_BASEGLYPHSIZE 64.0 #define HUD_TEXTBOX_BASE_SCALE (1.0/3.0) #define HUD_TEXTBOX_MIN_SCALE 0.70 This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-07-30 01:49:50
|
Revision: 5518 http://sourceforge.net/p/desmume/code/5518 Author: rogerman Date: 2016-07-30 01:49:47 +0000 (Sat, 30 Jul 2016) Log Message: ----------- GPU: - Do some minor code cleanup. Modified Paths: -------------- trunk/desmume/src/GPU.cpp Modified: trunk/desmume/src/GPU.cpp =================================================================== --- trunk/desmume/src/GPU.cpp 2016-07-30 01:38:14 UTC (rev 5517) +++ trunk/desmume/src/GPU.cpp 2016-07-30 01:49:47 UTC (rev 5518) @@ -6636,8 +6636,8 @@ template<bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void GPUEngineA::_LineLarge8bpp(GPUEngineCompositorInfo &compState) { - u16 XBG = this->_IORegisterMap->BGnOFS[compState.selectedLayerID].BGnHOFS.Offset; - u16 YBG = compState.lineIndexNative + this->_IORegisterMap->BGnOFS[compState.selectedLayerID].BGnVOFS.Offset; + u16 XBG = compState.selectedBGLayer->xOffset; + u16 YBG = compState.lineIndexNative + compState.selectedBGLayer->yOffset; u16 lg = compState.selectedBGLayer->size.width; u16 ht = compState.selectedBGLayer->size.height; u16 wmask = (lg-1); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ze...@us...> - 2016-07-30 01:38:16
|
Revision: 5517 http://sourceforge.net/p/desmume/code/5517 Author: zeromus Date: 2016-07-30 01:38:14 +0000 (Sat, 30 Jul 2016) Log Message: ----------- fix #1570 better Modified Paths: -------------- trunk/desmume/src/ctrlssdl.cpp Modified: trunk/desmume/src/ctrlssdl.cpp =================================================================== --- trunk/desmume/src/ctrlssdl.cpp 2016-07-28 19:34:12 UTC (rev 5516) +++ trunk/desmume/src/ctrlssdl.cpp 2016-07-30 01:38:14 UTC (rev 5517) @@ -370,7 +370,7 @@ Note: button constants have a 1bit offset. */ case SDL_JOYAXISMOTION: key_code = ((event->jaxis.which & 15) << 12) | JOY_AXIS << 8 | ((event->jaxis.axis & 127) << 1); - if( (u32)(abs(event->jaxis.value) >> 14) != 0 ) + if( ((u32)abs(event->jaxis.value) >> 14) != 0 ) { if (event->jaxis.value > 0) key_code |= 1; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-07-28 19:34:15
|
Revision: 5516 http://sourceforge.net/p/desmume/code/5516 Author: rogerman Date: 2016-07-28 19:34:12 +0000 (Thu, 28 Jul 2016) Log Message: ----------- GPU: - Fix bug where the OBJ layer wasn?\226?\128?\153t doing the window test. Fixes graphical issues in Mario Kart DS. (Regression from r5515. Fixes bug #1572 and #1574.) - The NOWINDOWSENABLEDHINT template parameter is no longer an optional hint; it is now required functionality. It has been renamed to WILLPERFORMWINDOWTEST to reflect this change. Revision Links: -------------- http://sourceforge.net/p/desmume/code/5515 Modified Paths: -------------- trunk/desmume/src/GPU.cpp trunk/desmume/src/GPU.h Modified: trunk/desmume/src/GPU.cpp =================================================================== --- trunk/desmume/src/GPU.cpp 2016-07-27 16:58:39 UTC (rev 5515) +++ trunk/desmume/src/GPU.cpp 2016-07-28 19:34:12 UTC (rev 5516) @@ -1667,7 +1667,7 @@ /*****************************************************************************/ // PIXEL RENDERING /*****************************************************************************/ -template <NDSColorFormat OUTPUTFORMAT, bool ISSRCLAYEROBJ, bool ISDEBUGRENDER, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> +template <NDSColorFormat OUTPUTFORMAT, bool ISSRCLAYEROBJ, bool ISDEBUGRENDER, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT> FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compState, const u16 srcColor16, const u8 srcAlpha) { u16 &dstColor16 = *compState.lineColorTarget16; @@ -1696,16 +1696,12 @@ return; } - if (!NOWINDOWSENABLEDHINT) + if ( WILLPERFORMWINDOWTEST && (this->_didPassWindowTestNative[compState.selectedLayerID][compState.xNative] == 0) ) { - const bool didPassWindowTest = (this->_didPassWindowTestNative[compState.selectedLayerID][compState.xNative] != 0); - if (!didPassWindowTest) - { - return; - } + return; } - const bool enableColorEffect = (this->_enableColorEffectNative[compState.selectedLayerID][compState.xNative] != 0); + const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectNative[compState.selectedLayerID][compState.xNative] != 0) : true; if (!ISSRCLAYEROBJ && COLOREFFECTDISABLEDHINT) { @@ -1887,7 +1883,7 @@ #ifdef ENABLE_SSE2 -template <NDSColorFormat OUTPUTFORMAT, bool ISSRCLAYEROBJ, bool ISDEBUGRENDER, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template <NDSColorFormat OUTPUTFORMAT, bool ISSRCLAYEROBJ, bool ISDEBUGRENDER, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> FORCEINLINE void GPUEngineBase::_RenderPixel16_SSE2(GPUEngineCompositorInfo &compState, const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0, const __m128i &srcAlpha, @@ -1929,12 +1925,8 @@ __m128i enableColorEffectMask; - if (NOWINDOWSENABLEDHINT) + if (WILLPERFORMWINDOWTEST) { - enableColorEffectMask = _mm_set1_epi8(0xFF); - } - else - { // Do the window test. __m128i didPassWindowTest = _mm_cmpeq_epi8( _mm_load_si128((__m128i *)(this->_didPassWindowTestCustom[compState.selectedLayerID] + compState.xCustom)), _mm_set1_epi8(1) ); passMask8 = _mm_and_si128(passMask8, didPassWindowTest); @@ -1943,6 +1935,10 @@ enableColorEffectMask = _mm_cmpeq_epi8( _mm_load_si128((__m128i *)(this->_enableColorEffectCustom[compState.selectedLayerID] + compState.xCustom)), _mm_set1_epi8(1) ); } + else + { + enableColorEffectMask = _mm_set1_epi8(0xFF); + } if ( (!ISSRCLAYEROBJ && COLOREFFECTDISABLEDHINT) || (_mm_movemask_epi8(srcEffectEnableMask) == 0) ) { @@ -1983,7 +1979,7 @@ // Select the color effect based on the BLDCNT target flags. __m128i forceBlendEffectMask = _mm_setzero_si128(); - const __m128i colorEffect_vec128 = (NOWINDOWSENABLEDHINT) ? _mm_set1_epi8(compState.colorEffect) : _mm_blendv_epi8(_mm_set1_epi8(ColorEffect_Disable), _mm_set1_epi8(compState.colorEffect), enableColorEffectMask); + const __m128i colorEffect_vec128 = (WILLPERFORMWINDOWTEST) ? _mm_blendv_epi8(_mm_set1_epi8(ColorEffect_Disable), _mm_set1_epi8(compState.colorEffect), enableColorEffectMask) : _mm_set1_epi8(compState.colorEffect); __m128i eva_vec128 = _mm_set1_epi16(compState.blendEVA); __m128i evb_vec128 = _mm_set1_epi16(compState.blendEVB); @@ -2442,7 +2438,7 @@ } } -template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> +template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> void GPUEngineBase::_RenderPixelIterate_Final(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) { const u16 lineWidth = (ISDEBUGRENDER) ? compState.selectedBGLayer->size.width : GPU_FRAMEBUFFER_NATIVE_WIDTH; @@ -2489,7 +2485,7 @@ } else { - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, i, srcColor, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compState, i, srcColor, (index != 0)); } auxX++; @@ -2520,28 +2516,28 @@ } else { - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, i, srcColor, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compState, i, srcColor, (index != 0)); } } } } -template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> +template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> void GPUEngineBase::_RenderPixelIterate_ApplyWrap(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) { - this->_RenderPixelIterate_Final<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, WRAP>(compState, param, map, tile, pal); + this->_RenderPixelIterate_Final<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, WRAP>(compState, param, map, tile, pal); } -template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc> +template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc> void GPUEngineBase::_RenderPixelIterate(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) { if (compState.selectedBGLayer->isDisplayWrapped) { - this->_RenderPixelIterate_ApplyWrap<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, true>(compState, param, map, tile, pal); + this->_RenderPixelIterate_ApplyWrap<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, true>(compState, param, map, tile, pal); } else { - this->_RenderPixelIterate_ApplyWrap<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, false>(compState, param, map, tile, pal); + this->_RenderPixelIterate_ApplyWrap<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, false>(compState, param, map, tile, pal); } } @@ -2557,7 +2553,7 @@ return theTileEntry; } -template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> +template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT> FORCEINLINE void GPUEngineBase::_RenderPixelSingle(GPUEngineCompositorInfo &compState, const size_t srcX, u16 srcColor16, const bool opaque) { bool willRenderColor = opaque; @@ -2588,13 +2584,13 @@ if (willRenderColor) { - this->_RenderPixel<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, + this->_RenderPixel<OUTPUTFORMAT, false, ISDEBUGRENDER, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compState, srcColor16, 0); } } -template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> +template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT> void GPUEngineBase::_RenderPixelsCustom(GPUEngineCompositorInfo &compState) { #ifdef ENABLE_SSE2 @@ -2758,7 +2754,7 @@ dst[3] = _mm_load_si128((__m128i *)*compState.lineColorTarget + 3); } - this->_RenderPixel16_SSE2<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, true>(compState, + this->_RenderPixel16_SSE2<OUTPUTFORMAT, false, ISDEBUGRENDER, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, true>(compState, src[3], src[2], src[1], src[0], srcAlpha, srcEffectEnableMask, @@ -2788,14 +2784,14 @@ continue; } - this->_RenderPixel<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, + this->_RenderPixel<OUTPUTFORMAT, false, ISDEBUGRENDER, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compState, this->_bgLayerColorCustom[compState.xCustom], 0); } } } -template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> +template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT> void GPUEngineBase::_RenderPixelsCustomVRAM(GPUEngineCompositorInfo &compState) { const u16 *__restrict srcLine = GPU->GetCustomVRAMAddressUsingMappedAddress(compState.selectedBGLayer->BMPAddress) + compState.blockOffsetCustom; @@ -2860,7 +2856,7 @@ dst[3] = _mm_load_si128((__m128i *)*compState.lineColorTarget + 3); } - this->_RenderPixel16_SSE2<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, true>(compState, + this->_RenderPixel16_SSE2<OUTPUTFORMAT, false, ISDEBUGRENDER, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, true>(compState, src[3], src[2], src[1], src[0], srcAlpha, srcEffectEnableMask, @@ -2890,7 +2886,7 @@ continue; } - this->_RenderPixel<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, + this->_RenderPixel<OUTPUTFORMAT, false, ISDEBUGRENDER, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compState, srcLine[i], 0); } @@ -2900,7 +2896,7 @@ // BACKGROUND RENDERING -TEXT- /*****************************************************************************/ // render a text background to the combined pixelbuffer -template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void GPUEngineBase::_RenderLine_BGText(GPUEngineCompositorInfo &compState, const u16 XBG, const u16 YBG) { const IOREG_DISPCNT &DISPCNT = this->_IORegisterMap->DISPCNT; @@ -2948,7 +2944,7 @@ { index = *tileColorIdx & 0x0F; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, x, color, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compState, x, color, (index != 0)); } x++; @@ -2967,7 +2963,7 @@ { index = *tileColorIdx >> 4; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, x, color, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compState, x, color, (index != 0)); } x++; @@ -2984,7 +2980,7 @@ { index = *tileColorIdx & 0x0F; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, x, color, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compState, x, color, (index != 0)); } x++; @@ -3007,7 +3003,7 @@ { index = *tileColorIdx >> 4; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, x, color, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compState, x, color, (index != 0)); } x++; @@ -3026,7 +3022,7 @@ { index = *tileColorIdx & 0x0F; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, x, color, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compState, x, color, (index != 0)); } x++; @@ -3043,7 +3039,7 @@ { index = *tileColorIdx >> 4; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, x, color, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compState, x, color, (index != 0)); } x++; @@ -3088,20 +3084,20 @@ { const u8 index = *tileColorIdx; const u16 color = LE_TO_LOCAL_16(tilePal[index]); - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, x, color, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compState, x, color, (index != 0)); } } } } } -template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void GPUEngineBase::_RenderLine_BGAffine(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m) { - this->_RenderPixelIterate<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_tiled_8bit_entry>(compState, param, compState.selectedBGLayer->tileMapAddress, compState.selectedBGLayer->tileEntryAddress, this->_paletteBG); + this->_RenderPixelIterate<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_tiled_8bit_entry>(compState, param, compState.selectedBGLayer->tileMapAddress, compState.selectedBGLayer->tileEntryAddress, this->_paletteBG); } -template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void GPUEngineBase::_RenderLine_BGExtended(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, bool &outUseCustomVRAM) { const IOREG_DISPCNT &DISPCNT = this->_IORegisterMap->DISPCNT; @@ -3112,17 +3108,17 @@ { if (DISPCNT.ExBGxPalette_Enable) { - this->_RenderPixelIterate< OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_tiled_16bit_entry<true> >(compState, param, compState.selectedBGLayer->tileMapAddress, compState.selectedBGLayer->tileEntryAddress, *(compState.selectedBGLayer->extPalette)); + this->_RenderPixelIterate< OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_tiled_16bit_entry<true> >(compState, param, compState.selectedBGLayer->tileMapAddress, compState.selectedBGLayer->tileEntryAddress, *(compState.selectedBGLayer->extPalette)); } else { - this->_RenderPixelIterate< OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_tiled_16bit_entry<false> >(compState, param, compState.selectedBGLayer->tileMapAddress, compState.selectedBGLayer->tileEntryAddress, this->_paletteBG); + this->_RenderPixelIterate< OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_tiled_16bit_entry<false> >(compState, param, compState.selectedBGLayer->tileMapAddress, compState.selectedBGLayer->tileEntryAddress, this->_paletteBG); } break; } case BGType_AffineExt_256x1: // 256 colors - this->_RenderPixelIterate<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_256_map>(compState, param, compState.selectedBGLayer->BMPAddress, 0, this->_paletteBG); + this->_RenderPixelIterate<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_256_map>(compState, param, compState.selectedBGLayer->BMPAddress, 0, this->_paletteBG); break; case BGType_AffineExt_Direct: // direct colors / BMP @@ -3171,7 +3167,7 @@ if (!outUseCustomVRAM) { - this->_RenderPixelIterate<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_BMP_map>(compState, param, compState.selectedBGLayer->BMPAddress, 0, this->_paletteBG); + this->_RenderPixelIterate<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_BMP_map>(compState, param, compState.selectedBGLayer->BMPAddress, 0, this->_paletteBG); } else { @@ -3202,7 +3198,7 @@ } case BGType_Large8bpp: // large screen 256 colors - this->_RenderPixelIterate<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_256_map>(compState, param, compState.selectedBGLayer->largeBMPAddress, 0, this->_paletteBG); + this->_RenderPixelIterate<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_256_map>(compState, param, compState.selectedBGLayer->largeBMPAddress, 0, this->_paletteBG); break; default: @@ -3214,49 +3210,49 @@ // BACKGROUND RENDERING -HELPER FUNCTIONS- /*****************************************************************************/ -template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void GPUEngineBase::_LineText(GPUEngineCompositorInfo &compState) { if (ISDEBUGRENDER) { - this->_RenderLine_BGText<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState, 0, compState.lineIndexNative); + this->_RenderLine_BGText<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState, 0, compState.lineIndexNative); } else { - this->_RenderLine_BGText<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState, compState.selectedBGLayer->xOffset, compState.lineIndexNative + compState.selectedBGLayer->yOffset); + this->_RenderLine_BGText<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState, compState.selectedBGLayer->xOffset, compState.lineIndexNative + compState.selectedBGLayer->yOffset); } } -template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void GPUEngineBase::_LineRot(GPUEngineCompositorInfo &compState) { if (ISDEBUGRENDER) { static const IOREG_BGnParameter debugParams = {256, 0, 0, -77, 0, compState.blockOffsetNative}; - this->_RenderLine_BGAffine<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState, debugParams); + this->_RenderLine_BGAffine<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState, debugParams); } else { IOREG_BGnParameter *__restrict bgParams = (compState.selectedLayerID == GPULayerID_BG2) ? (IOREG_BGnParameter *)&this->_IORegisterMap->BG2Param : (IOREG_BGnParameter *)&this->_IORegisterMap->BG3Param; - this->_RenderLine_BGAffine<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState, *bgParams); + this->_RenderLine_BGAffine<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState, *bgParams); bgParams->BGnX.value += bgParams->BGnPB.value; bgParams->BGnY.value += bgParams->BGnPD.value; } } -template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void GPUEngineBase::_LineExtRot(GPUEngineCompositorInfo &compState, bool &outUseCustomVRAM) { if (ISDEBUGRENDER) { static const IOREG_BGnParameter debugParams = {256, 0, 0, -77, 0, compState.blockOffsetNative}; - this->_RenderLine_BGExtended<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState, debugParams, outUseCustomVRAM); + this->_RenderLine_BGExtended<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState, debugParams, outUseCustomVRAM); } else { IOREG_BGnParameter *__restrict bgParams = (compState.selectedLayerID == GPULayerID_BG2) ? (IOREG_BGnParameter *)&this->_IORegisterMap->BG2Param : (IOREG_BGnParameter *)&this->_IORegisterMap->BG3Param; - this->_RenderLine_BGExtended<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState, *bgParams, outUseCustomVRAM); + this->_RenderLine_BGExtended<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState, *bgParams, outUseCustomVRAM); bgParams->BGnX.value += bgParams->BGnPB.value; bgParams->BGnY.value += bgParams->BGnPD.value; @@ -3914,7 +3910,7 @@ } } -template <NDSColorFormat OUTPUTFORMAT, bool NOWINDOWSENABLEDHINT> +template <NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> void GPUEngineBase::_RenderLine_Layers(const size_t l) { const NDSDisplayInfo &dispInfo = GPU->GetDisplayInfo(); @@ -3969,7 +3965,7 @@ this->_RenderLine_SetupSprites(compState); } - if (!NOWINDOWSENABLEDHINT) + if (WILLPERFORMWINDOWTEST) { this->_PerformWindowTesting(compState); } @@ -3996,18 +3992,18 @@ { if ( (layerID == GPULayerID_BG0) && GPU->GetEngineMain()->WillRender3DLayer() ) { - GPU->GetEngineMain()->RenderLine_Layer3D<OUTPUTFORMAT, NOWINDOWSENABLEDHINT>(compState); + GPU->GetEngineMain()->RenderLine_Layer3D<OUTPUTFORMAT, WILLPERFORMWINDOWTEST>(compState); continue; } } if (this->isLineRenderNative[compState.lineIndexNative]) { - this->_RenderLine_LayerBG<OUTPUTFORMAT, false, NOWINDOWSENABLEDHINT, false>(compState); + this->_RenderLine_LayerBG<OUTPUTFORMAT, false, WILLPERFORMWINDOWTEST, false>(compState); } else { - this->_RenderLine_LayerBG<OUTPUTFORMAT, false, NOWINDOWSENABLEDHINT, true>(compState); + this->_RenderLine_LayerBG<OUTPUTFORMAT, false, WILLPERFORMWINDOWTEST, true>(compState); } } //layer enabled } @@ -4018,7 +4014,7 @@ { compState.selectedLayerID = GPULayerID_OBJ; compState.selectedBGLayer = NULL; - this->_RenderLine_LayerOBJ<OUTPUTFORMAT>(compState, item); + this->_RenderLine_LayerOBJ<OUTPUTFORMAT, WILLPERFORMWINDOWTEST>(compState, item); } } } @@ -4051,7 +4047,7 @@ } } -template <NDSColorFormat OUTPUTFORMAT> +template <NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> void GPUEngineBase::_RenderLine_LayerOBJ(GPUEngineCompositorInfo &compState, itemsForPriority_t *__restrict item) { if (this->vramBlockOBJIndex != VRAM_NO_3D_USAGE) @@ -4114,9 +4110,9 @@ compState.lineColorTarget32 = (FragmentColor *)compState.lineColorHead + srcX; compState.lineLayerIDTarget = compState.lineLayerIDHead + srcX; - this->_RenderPixel<OUTPUTFORMAT, true, false, false, false>(compState, - this->_sprColor[srcX], - this->_sprAlpha[srcX]); + this->_RenderPixel<OUTPUTFORMAT, true, false, WILLPERFORMWINDOWTEST, false>(compState, + this->_sprColor[srcX], + this->_sprAlpha[srcX]); } } else @@ -4144,9 +4140,9 @@ compState.lineColorTarget32 = (FragmentColor *)dstColorPtr + dstX; compState.lineLayerIDTarget = dstLayerIDPtr + dstX; - this->_RenderPixel<OUTPUTFORMAT, true, false, false, false>(compState, - (useCustomVRAM) ? srcLine[dstX] : this->_sprColor[srcX], - this->_sprAlpha[srcX]); + this->_RenderPixel<OUTPUTFORMAT, true, false, WILLPERFORMWINDOWTEST, false>(compState, + (useCustomVRAM) ? srcLine[dstX] : this->_sprColor[srcX], + this->_sprAlpha[srcX]); } } @@ -4482,7 +4478,7 @@ // Window 0 has the highest priority, so always check this first. if (this->_WIN0_ENABLED && this->_IsWindowInsideVerticalRange<0>(compState)) { - if (this->_h_win[0][i] == 1) + if (this->_h_win[0][i] != 0) { this->_didPassWindowTestNative[layerID][i] = this->_WIN0_enable[layerID]; this->_enableColorEffectNative[layerID][i] = this->_WIN0_enable[WINDOWCONTROL_EFFECTFLAG]; @@ -4493,7 +4489,7 @@ // Window 1 has medium priority, and is checked after Window 0. if (this->_WIN1_ENABLED && this->_IsWindowInsideVerticalRange<1>(compState)) { - if (this->_h_win[1][i] == 1) + if (this->_h_win[1][i] != 0) { this->_didPassWindowTestNative[layerID][i] = this->_WIN1_enable[layerID]; this->_enableColorEffectNative[layerID][i] = this->_WIN1_enable[WINDOWCONTROL_EFFECTFLAG]; @@ -4504,7 +4500,7 @@ // Window OBJ has low priority, and is checked after both Window 0 and Window 1. if (this->_WINOBJ_ENABLED) { - if (this->_sprWin[i] == 1) + if (this->_sprWin[i] != 0) { this->_didPassWindowTestNative[layerID][i] = this->_WINOBJ_enable[layerID]; this->_enableColorEffectNative[layerID][i] = this->_WINOBJ_enable[WINDOWCONTROL_EFFECTFLAG]; @@ -4561,17 +4557,17 @@ } } -template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void GPUEngineBase::_RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compState) { bool useCustomVRAM = false; switch (compState.selectedBGLayer->baseType) { - case BGType_Text: this->_LineText<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState); break; - case BGType_Affine: this->_LineRot<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState); break; - case BGType_AffineExt: this->_LineExtRot<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState, useCustomVRAM); break; - case BGType_Large8bpp: this->_LineExtRot<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState, useCustomVRAM); break; + case BGType_Text: this->_LineText<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState); break; + case BGType_Affine: this->_LineRot<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState); break; + case BGType_AffineExt: this->_LineExtRot<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState, useCustomVRAM); break; + case BGType_Large8bpp: this->_LineExtRot<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState, useCustomVRAM); break; case BGType_Invalid: PROGINFO("Attempting to render an invalid BG type\n"); break; @@ -4588,35 +4584,35 @@ if (useCustomVRAM) { - this->_RenderPixelsCustomVRAM<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState); + this->_RenderPixelsCustomVRAM<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compState); } else { - this->_RenderPixelsCustom<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState); + this->_RenderPixelsCustom<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compState); } } } -template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void GPUEngineBase::_RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compState) { - this->_RenderLine_LayerBG_Final<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState); + this->_RenderLine_LayerBG_Final<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState); } -template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED> void GPUEngineBase::_RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compState) { if (compState.colorEffect == ColorEffect_Disable) { - this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, true, ISCUSTOMRENDERINGNEEDED>(compState); + this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, true, ISCUSTOMRENDERINGNEEDED>(compState); } else { - this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, false, ISCUSTOMRENDERINGNEEDED>(compState); + this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, false, ISCUSTOMRENDERINGNEEDED>(compState); } } -template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool NOWINDOWSENABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED> void GPUEngineBase::_RenderLine_LayerBG(GPUEngineCompositorInfo &compState) { if (ISDEBUGRENDER) @@ -4628,12 +4624,12 @@ #ifndef DISABLE_MOSAIC if (compState.selectedBGLayer->isMosaic && this->_isBGMosaicSet) { - this->_RenderLine_LayerBG_ApplyMosaic<OUTPUTFORMAT, ISDEBUGRENDER, true, NOWINDOWSENABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState); + this->_RenderLine_LayerBG_ApplyMosaic<OUTPUTFORMAT, ISDEBUGRENDER, true, WILLPERFORMWINDOWTEST, ISCUSTOMRENDERINGNEEDED>(compState); } else #endif { - this->_RenderLine_LayerBG_ApplyMosaic<OUTPUTFORMAT, ISDEBUGRENDER, false, NOWINDOWSENABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState); + this->_RenderLine_LayerBG_ApplyMosaic<OUTPUTFORMAT, ISDEBUGRENDER, false, WILLPERFORMWINDOWTEST, ISCUSTOMRENDERINGNEEDED>(compState); } } } @@ -5331,11 +5327,11 @@ { if (this->_isAnyWindowEnabled) { - this->_RenderLine_Layers<OUTPUTFORMAT, false>(l); + this->_RenderLine_Layers<OUTPUTFORMAT, true>(l); } else { - this->_RenderLine_Layers<OUTPUTFORMAT, true>(l); + this->_RenderLine_Layers<OUTPUTFORMAT, false>(l); } } @@ -5377,7 +5373,7 @@ } } -template <NDSColorFormat OUTPUTFORMAT, bool NOWINDOWSENABLEDHINT> +template <NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> void GPUEngineA::RenderLine_Layer3D(GPUEngineCompositorInfo &compState) { const FragmentColor *__restrict framebuffer3D = CurrentRenderer->GetFramebuffer(); @@ -5441,17 +5437,17 @@ __m128i passMask8; __m128i enableColorEffectMask; - if (NOWINDOWSENABLEDHINT) + if (WILLPERFORMWINDOWTEST) { - passMask8 = _mm_set1_epi8(0xFF); - enableColorEffectMask = _mm_set1_epi8(0xFF); - } - else - { // Do the window test. passMask8 = _mm_cmpeq_epi8( _mm_load_si128((__m128i *)(this->_didPassWindowTestCustom[compState.selectedLayerID] + compState.xCustom)), _mm_set1_epi8(1) ); enableColorEffectMask = _mm_cmpeq_epi8( _mm_load_si128((__m128i *)(this->_enableColorEffectCustom[compState.selectedLayerID] + compState.xCustom)), _mm_set1_epi8(1) ); } + else + { + passMask8 = _mm_set1_epi8(0xFF); + enableColorEffectMask = _mm_set1_epi8(0xFF); + } // Do the alpha test. Pixels with an alpha value of 0 are rejected. passMask8 = _mm_andnot_si128(_mm_cmpeq_epi8(srcAlpha, _mm_setzero_si128()), passMask8); @@ -5509,12 +5505,12 @@ #endif for (; compState.xCustom < compState.lineWidthCustom; srcLinePtr++, compState.xCustom++, compState.xNative = _gpuDstToSrcIndex[compState.xCustom], compState.lineColorTarget16++, compState.lineColorTarget32++, compState.lineLayerIDTarget++) { - if ( (srcLinePtr->a == 0) || (!NOWINDOWSENABLEDHINT && (this->_didPassWindowTestCustom[compState.selectedLayerID][compState.xCustom] != 0)) ) + if ( (srcLinePtr->a == 0) || (WILLPERFORMWINDOWTEST && (this->_didPassWindowTestCustom[compState.selectedLayerID][compState.xCustom] == 0)) ) { continue; } - const bool enableColorEffect = (NOWINDOWSENABLEDHINT) ? true : (this->_enableColorEffectCustom[compState.selectedLayerID][compState.xCustom] != 0); + const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectCustom[compState.selectedLayerID][compState.xCustom] != 0) : true; this->_RenderPixel3D<OUTPUTFORMAT>(compState, enableColorEffect, @@ -5528,7 +5524,7 @@ { for (compState.xNative = 0, compState.xCustom = 0; compState.xCustom < compState.lineWidthCustom; compState.xCustom++, compState.xNative = _gpuDstToSrcIndex[compState.xCustom], compState.lineColorTarget16++, compState.lineColorTarget32++, compState.lineLayerIDTarget++) { - if ( !NOWINDOWSENABLEDHINT && (this->_didPassWindowTestCustom[compState.selectedLayerID][compState.xCustom] == 0) ) + if ( WILLPERFORMWINDOWTEST && (this->_didPassWindowTestCustom[compState.selectedLayerID][compState.xCustom] == 0) ) { continue; } @@ -5545,7 +5541,7 @@ } compState.xNative = _gpuDstToSrcIndex[compState.xCustom]; - const bool enableColorEffect = (NOWINDOWSENABLEDHINT) ? true : (this->_enableColorEffectCustom[compState.selectedLayerID][compState.xCustom] != 0); + const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectCustom[compState.selectedLayerID][compState.xCustom] != 0) : true; this->_RenderPixel3D<OUTPUTFORMAT>(compState, enableColorEffect, @@ -6637,7 +6633,7 @@ } } -template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template<bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void GPUEngineA::_LineLarge8bpp(GPUEngineCompositorInfo &compState) { u16 XBG = this->_IORegisterMap->BGnOFS[compState.selectedLayerID].BGnHOFS.Offset; @@ -6666,7 +6662,7 @@ { const u8 index = map[XBG]; const u16 color = LE_TO_LOCAL_16(this->_paletteBG[index]); - this->_RenderPixelSingle<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, x, color, (color != 0)); + this->_RenderPixelSingle<ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT>(compState, x, color, (color != 0)); } } } @@ -6745,11 +6741,11 @@ { if (this->_isAnyWindowEnabled) { - this->_RenderLine_Layers<OUTPUTFORMAT, false>(l); + this->_RenderLine_Layers<OUTPUTFORMAT, true>(l); } else { - this->_RenderLine_Layers<OUTPUTFORMAT, true>(l); + this->_RenderLine_Layers<OUTPUTFORMAT, false>(l); } this->_HandleDisplayModeNormal<OUTPUTFORMAT>(l); Modified: trunk/desmume/src/GPU.h =================================================================== --- trunk/desmume/src/GPU.h 2016-07-27 16:58:39 UTC (rev 5515) +++ trunk/desmume/src/GPU.h 2016-07-28 19:34:12 UTC (rev 5516) @@ -1373,26 +1373,26 @@ void _MosaicSpriteLinePixel(GPUEngineCompositorInfo &compState, const size_t x, u16 *__restrict dst, u8 *__restrict dst_alpha, u8 *__restrict typeTab, u8 *__restrict prioTab); void _MosaicSpriteLine(GPUEngineCompositorInfo &compState, u16 *__restrict dst, u8 *__restrict dst_alpha, u8 *__restrict typeTab, u8 *__restrict prioTab); - template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> void _RenderPixelIterate_Final(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal); - template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> void _RenderPixelIterate_ApplyWrap(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal); - template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc> void _RenderPixelIterate(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> void _RenderPixelIterate_Final(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> void _RenderPixelIterate_ApplyWrap(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc> void _RenderPixelIterate(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal); TILEENTRY _GetTileEntry(const u32 tileMapAddress, const u16 xOffset, const u16 layerWidthMask); - template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> FORCEINLINE void _RenderPixelSingle(GPUEngineCompositorInfo &compState, const size_t srcX, u16 srcColor16, const bool opaque); - template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> void _RenderPixelsCustom(GPUEngineCompositorInfo &compState); - template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> void _RenderPixelsCustomVRAM(GPUEngineCompositorInfo &compState); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT> FORCEINLINE void _RenderPixelSingle(GPUEngineCompositorInfo &compState, const size_t srcX, u16 srcColor16, const bool opaque); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT> void _RenderPixelsCustom(GPUEngineCompositorInfo &compState); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT> void _RenderPixelsCustomVRAM(GPUEngineCompositorInfo &compState); - template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_BGText(GPUEngineCompositorInfo &compState, const u16 XBG, const u16 YBG); - template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_BGAffine(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m); - template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_BGExtended(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, bool &outUseCustomVRAM); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_BGText(GPUEngineCompositorInfo &compState, const u16 XBG, const u16 YBG); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_BGAffine(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_BGExtended(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, bool &outUseCustomVRAM); - template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _LineText(GPUEngineCompositorInfo &compState); - template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _LineRot(GPUEngineCompositorInfo &compState); - template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _LineExtRot(GPUEngineCompositorInfo &compState, bool &outUseCustomVRAM); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _LineText(GPUEngineCompositorInfo &compState); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _LineRot(GPUEngineCompositorInfo &compState); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _LineExtRot(GPUEngineCompositorInfo &compState, bool &outUseCustomVRAM); template<NDSColorFormat OUTPUTFORMAT> void _RenderLine_Clear(GPUEngineCompositorInfo &compState); void _RenderLine_SetupSprites(GPUEngineCompositorInfo &compState); - template<NDSColorFormat OUTPUTFORMAT, bool NOWINDOWSENABLEDHINT> void _RenderLine_Layers(const size_t l); + template<NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> void _RenderLine_Layers(const size_t l); template<NDSColorFormat OUTPUTFORMAT> void _HandleDisplayModeOff(const size_t l); template<NDSColorFormat OUTPUTFORMAT> void _HandleDisplayModeNormal(const size_t l); @@ -1401,14 +1401,14 @@ template<size_t WIN_NUM> bool _IsWindowInsideVerticalRange(GPUEngineCompositorInfo &compState); void _PerformWindowTesting(GPUEngineCompositorInfo &compState); - template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compState); - template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compState); - template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compState); - template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool NOWINDOWSENABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_LayerBG(GPUEngineCompositorInfo &compState); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compState); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compState); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compState); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_LayerBG(GPUEngineCompositorInfo &compState); - template<NDSColorFormat OUTPUTFORMAT> void _RenderLine_LayerOBJ(GPUEngineCompositorInfo &compState, itemsForPriority_t *__restrict item); + template<NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> void _RenderLine_LayerOBJ(GPUEngineCompositorInfo &compState, itemsForPriority_t *__restrict item); - template<NDSColorFormat OUTPUTFORMAT, bool ISSRCLAYEROBJ, bool ISDEBUGRENDER, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> FORCEINLINE void _RenderPixel(GPUEngineCompositorInfo &compState, const u16 srcColor16, const u8 srcAlpha); + template<NDSColorFormat OUTPUTFORMAT, bool ISSRCLAYEROBJ, bool ISDEBUGRENDER, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT> FORCEINLINE void _RenderPixel(GPUEngineCompositorInfo &compState, const u16 srcColor16, const u8 srcAlpha); template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _RenderPixel3D(GPUEngineCompositorInfo &compState, const bool enableColorEffect, const FragmentColor srcColor32); FORCEINLINE u16 _ColorEffectBlend(const u16 colA, const u16 colB, const u16 blendEVA, const u16 blendEVB); @@ -1430,7 +1430,7 @@ template<NDSColorFormat COLORFORMAT> FORCEINLINE __m128i _ColorEffectIncreaseBrightness(const __m128i &col, const __m128i &blendEVY); template<NDSColorFormat COLORFORMAT> FORCEINLINE __m128i _ColorEffectDecreaseBrightness(const __m128i &col, const __m128i &blendEVY); template<bool ISCUSTOMRENDERINGNEEDED> FORCEINLINE void _RenderPixel_CheckWindows16_SSE2(GPUEngineCompositorInfo &compState, const size_t dstX, __m128i &didPassWindowTest, __m128i &enableColorEffect) const; - template<NDSColorFormat OUTPUTFORMAT, bool ISSRCLAYEROBJ, bool ISDEBUGRENDER, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> FORCEINLINE void _RenderPixel16_SSE2(GPUEngineCompositorInfo &compState, const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0, const __m128i &srcAlpha, const __m128i &srcEffectEnableMask, __m128i &dst3, __m128i &dst2, __m128i &dst1, __m128i &dst0, __m128i &dstLayerID, __m128i &passMask8); + template<NDSColorFormat OUTPUTFORMAT, bool ISSRCLAYEROBJ, bool ISDEBUGRENDER, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> FORCEINLINE void _RenderPixel16_SSE2(GPUEngineCompositorInfo &compState, const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0, const __m128i &srcAlpha, const __m128i &srcEffectEnableMask, __m128i &dst3, __m128i &dst2, __m128i &dst1, __m128i &dst0, __m128i &dstLayerID, __m128i &passMask8); template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _RenderPixel3D_SSE2(GPUEngineCompositorInfo &compState, const __m128i &passMask8, const __m128i &enableColorEffectMask, const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0, __m128i &dst3, __m128i &dst2, __m128i &dst1, __m128i &dst0, __m128i &dstLayerID); #endif @@ -1543,7 +1543,7 @@ DISPCAPCNT_parsed _dispCapCnt; - template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _LineLarge8bpp(GPUEngineCompositorInfo &compState); + template<bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _LineLarge8bpp(GPUEngineCompositorInfo &compState); template<NDSColorFormat OUTPUTFORMAT, size_t CAPTURELENGTH> void _RenderLine_DisplayCapture(const u16 l); void _RenderLine_DispCapture_FIFOToBuffer(u16 *fifoLineBuffer); @@ -1590,7 +1590,7 @@ virtual void Reset(); template<NDSColorFormat OUTPUTFORMAT> void RenderLine(const u16 l); - template<NDSColorFormat OUTPUTFORMAT, bool NOWINDOWSENABLEDHINT> void RenderLine_Layer3D(GPUEngineCompositorInfo &compState); + template<NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> void RenderLine_Layer3D(GPUEngineCompositorInfo &compState); }; class GPUEngineB : public GPUEngineBase This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-07-27 16:58:41
|
Revision: 5515 http://sourceforge.net/p/desmume/code/5515 Author: rogerman Date: 2016-07-27 16:58:39 +0000 (Wed, 27 Jul 2016) Log Message: ----------- GPU: - Window testing is now a per-scanline operation instead of a per-pixel operation. Removes the performance penalty of window testing at larger framebuffer sizes. Modified Paths: -------------- trunk/desmume/src/GPU.cpp trunk/desmume/src/GPU.h Modified: trunk/desmume/src/GPU.cpp =================================================================== --- trunk/desmume/src/GPU.cpp 2016-07-26 17:17:36 UTC (rev 5514) +++ trunk/desmume/src/GPU.cpp 2016-07-27 16:58:39 UTC (rev 5515) @@ -125,8 +125,9 @@ static size_t _gpuVRAMBlockOffset = GPU_VRAM_BLOCK_LINES * GPU_FRAMEBUFFER_NATIVE_WIDTH; static u16 *_gpuDstToSrcIndex = NULL; // Key: Destination pixel index / Value: Source pixel index -static u8 *_gpuDstToSrcSSSE3_u8 = NULL; -static u8 *_gpuDstToSrcSSSE3_u16 = NULL; +static u8 *_gpuDstToSrcSSSE3_u8_8e = NULL; +static u8 *_gpuDstToSrcSSSE3_u8_16e = NULL; +static u8 *_gpuDstToSrcSSSE3_u16_8e = NULL; static CACHE_ALIGN size_t _gpuDstPitchCount[GPU_FRAMEBUFFER_NATIVE_WIDTH]; // Key: Source pixel index in x-dimension / Value: Number of x-dimension destination pixels for the source pixel static CACHE_ALIGN size_t _gpuDstPitchIndex[GPU_FRAMEBUFFER_NATIVE_WIDTH]; // Key: Source pixel index in x-dimension / Value: First destination pixel that maps to the source pixel @@ -166,12 +167,37 @@ {{128,128}, {256,256}, {512,256}, {512,512}}, //affine ext direct }; -const CACHE_ALIGN u8 GPUEngineBase::_winEmpty[GPU_FRAMEBUFFER_NATIVE_WIDTH] = { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -}; +static void ExpandLine8(u8 *__restrict dst, const u8 *__restrict src, size_t dstLength) +{ +#ifdef ENABLE_SSSE3 + const bool isIntegerScale = ((dstLength % GPU_FRAMEBUFFER_NATIVE_WIDTH) == 0); + if (isIntegerScale) + { + const size_t scale = dstLength / GPU_FRAMEBUFFER_NATIVE_WIDTH; + + for (size_t srcX = 0, dstX = 0; srcX < GPU_FRAMEBUFFER_NATIVE_WIDTH; srcX+=16, dstX+=(scale*16)) + { + const __m128i src_vec128 = _mm_load_si128((__m128i *)(src + srcX)); + + for (size_t s = 0; s < scale; s++) + { + const __m128i ssse3idx_u8 = _mm_load_si128((__m128i *)(_gpuDstToSrcSSSE3_u8_16e + (s * 16))); + _mm_store_si128( (__m128i *)(dst + dstX + (s * 16)), _mm_shuffle_epi8(src_vec128, ssse3idx_u8) ); + } + } + } + else +#endif + { + for (size_t x = 0; x < GPU_FRAMEBUFFER_NATIVE_WIDTH; x++) + { + for (size_t p = 0; p < _gpuDstPitchCount[x]; p++) + { + dst[_gpuDstPitchIndex[x] + p] = src[x]; + } + } + } +} /*****************************************************************************/ // BACKGROUND RENDERING -ROTOSCALE- @@ -375,6 +401,20 @@ _renderLineLayerIDCustom = NULL; _bgLayerIndexCustom = NULL; _bgLayerColorCustom = NULL; + + _didPassWindowTestCustomMasterPtr = NULL; + _didPassWindowTestCustom[GPULayerID_BG0] = NULL; + _didPassWindowTestCustom[GPULayerID_BG1] = NULL; + _didPassWindowTestCustom[GPULayerID_BG2] = NULL; + _didPassWindowTestCustom[GPULayerID_BG3] = NULL; + _didPassWindowTestCustom[GPULayerID_OBJ] = NULL; + + _enableColorEffectCustomMasterPtr = NULL; + _enableColorEffectCustom[GPULayerID_BG0] = NULL; + _enableColorEffectCustom[GPULayerID_BG1] = NULL; + _enableColorEffectCustom[GPULayerID_BG2] = NULL; + _enableColorEffectCustom[GPULayerID_BG3] = NULL; + _enableColorEffectCustom[GPULayerID_OBJ] = NULL; } GPUEngineBase::~GPUEngineBase() @@ -387,6 +427,21 @@ this->_bgLayerIndexCustom = NULL; free_aligned(this->_bgLayerColorCustom); this->_bgLayerColorCustom = NULL; + + free_aligned(this->_didPassWindowTestCustomMasterPtr); + this->_didPassWindowTestCustomMasterPtr = NULL; + this->_didPassWindowTestCustom[GPULayerID_BG0] = NULL; + this->_didPassWindowTestCustom[GPULayerID_BG1] = NULL; + this->_didPassWindowTestCustom[GPULayerID_BG2] = NULL; + this->_didPassWindowTestCustom[GPULayerID_BG3] = NULL; + this->_didPassWindowTestCustom[GPULayerID_OBJ] = NULL; + + this->_enableColorEffectCustomMasterPtr = NULL; + this->_enableColorEffectCustom[GPULayerID_BG0] = NULL; + this->_enableColorEffectCustom[GPULayerID_BG1] = NULL; + this->_enableColorEffectCustom[GPULayerID_BG2] = NULL; + this->_enableColorEffectCustom[GPULayerID_BG3] = NULL; + this->_enableColorEffectCustom[GPULayerID_OBJ] = NULL; } void GPUEngineBase::_Reset_Base() @@ -399,6 +454,10 @@ memset(this->_sprPrio, 0x7F, sizeof(this->_sprPrio)); memset(this->_sprNum, 0, sizeof(this->_sprNum)); + memset(this->_didPassWindowTestNative, 1, 5 * GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u8)); + memset(this->_enableColorEffectNative, 1, 5 * GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u8)); + memset(this->_didPassWindowTestCustomMasterPtr, 1, 10 * dispInfo.customWidth * sizeof(u8)); + memset(this->_h_win[0], 0, sizeof(this->_h_win[0])); memset(this->_h_win[1], 0, sizeof(this->_h_win[1])); memset(&this->_mosaicColors, 0, sizeof(MosaicColor)); @@ -478,8 +537,6 @@ this->_isBGMosaicSet = false; this->_isOBJMosaicSet = false; - this->_curr_win[0] = GPUEngineBase::_winEmpty; - this->_curr_win[1] = GPUEngineBase::_winEmpty; this->_needUpdateWINH[0] = true; this->_needUpdateWINH[1] = true; @@ -501,20 +558,7 @@ this->_WIN1_ENABLED = false; this->_WINOBJ_ENABLED = false; this->_isAnyWindowEnabled = false; - this->_isWindowInsideVerticalRange[0] = false; - this->_isWindowInsideVerticalRange[1] = false; -#ifdef ENABLE_SSE2 - this->_windowLeftCustom[0] = 0; - this->_windowLeftCustom[1] = 0; - this->_windowRightCustom[0] = 0; - this->_windowRightCustom[1] = 0; - this->_windowLeftCustom_SSE2[0] = _mm_setzero_si128(); - this->_windowLeftCustom_SSE2[1] = _mm_setzero_si128(); - this->_windowRightCustom_SSE2[0] = _mm_setzero_si128(); - this->_windowRightCustom_SSE2[1] = _mm_setzero_si128(); -#endif - this->_BLDALPHA_EVA = 0; this->_BLDALPHA_EVB = 0; this->_BLDALPHA_EVY = 0; @@ -559,33 +603,33 @@ #endif #endif - this->_WIN0_enable[GPULayerID_BG0] = false; - this->_WIN0_enable[GPULayerID_BG1] = false; - this->_WIN0_enable[GPULayerID_BG2] = false; - this->_WIN0_enable[GPULayerID_BG3] = false; - this->_WIN0_enable[GPULayerID_OBJ] = false; - this->_WIN0_enable[WINDOWCONTROL_EFFECTFLAG] = false; + this->_WIN0_enable[GPULayerID_BG0] = 0; + this->_WIN0_enable[GPULayerID_BG1] = 0; + this->_WIN0_enable[GPULayerID_BG2] = 0; + this->_WIN0_enable[GPULayerID_BG3] = 0; + this->_WIN0_enable[GPULayerID_OBJ] = 0; + this->_WIN0_enable[WINDOWCONTROL_EFFECTFLAG] = 0; - this->_WIN1_enable[GPULayerID_BG0] = false; - this->_WIN1_enable[GPULayerID_BG1] = false; - this->_WIN1_enable[GPULayerID_BG2] = false; - this->_WIN1_enable[GPULayerID_BG3] = false; - this->_WIN1_enable[GPULayerID_OBJ] = false; - this->_WIN1_enable[WINDOWCONTROL_EFFECTFLAG] = false; + this->_WIN1_enable[GPULayerID_BG0] = 0; + this->_WIN1_enable[GPULayerID_BG1] = 0; + this->_WIN1_enable[GPULayerID_BG2] = 0; + this->_WIN1_enable[GPULayerID_BG3] = 0; + this->_WIN1_enable[GPULayerID_OBJ] = 0; + this->_WIN1_enable[WINDOWCONTROL_EFFECTFLAG] = 0; - this->_WINOUT_enable[GPULayerID_BG0] = false; - this->_WINOUT_enable[GPULayerID_BG1] = false; - this->_WINOUT_enable[GPULayerID_BG2] = false; - this->_WINOUT_enable[GPULayerID_BG3] = false; - this->_WINOUT_enable[GPULayerID_OBJ] = false; - this->_WINOUT_enable[WINDOWCONTROL_EFFECTFLAG] = false; + this->_WINOUT_enable[GPULayerID_BG0] = 0; + this->_WINOUT_enable[GPULayerID_BG1] = 0; + this->_WINOUT_enable[GPULayerID_BG2] = 0; + this->_WINOUT_enable[GPULayerID_BG3] = 0; + this->_WINOUT_enable[GPULayerID_OBJ] = 0; + this->_WINOUT_enable[WINDOWCONTROL_EFFECTFLAG] = 0; - this->_WINOBJ_enable[GPULayerID_BG0] = false; - this->_WINOBJ_enable[GPULayerID_BG1] = false; - this->_WINOBJ_enable[GPULayerID_BG2] = false; - this->_WINOBJ_enable[GPULayerID_BG3] = false; - this->_WINOBJ_enable[GPULayerID_OBJ] = false; - this->_WINOBJ_enable[WINDOWCONTROL_EFFECTFLAG] = false; + this->_WINOBJ_enable[GPULayerID_BG0] = 0; + this->_WINOBJ_enable[GPULayerID_BG1] = 0; + this->_WINOBJ_enable[GPULayerID_BG2] = 0; + this->_WINOBJ_enable[GPULayerID_BG3] = 0; + this->_WINOBJ_enable[GPULayerID_OBJ] = 0; + this->_WINOBJ_enable[WINDOWCONTROL_EFFECTFLAG] = 0; #if defined(ENABLE_SSE2) this->_WIN0_enable_SSE2[GPULayerID_BG0] = _mm_setzero_si128(); @@ -1609,218 +1653,18 @@ const size_t lineWidth = GPU->GetDisplayInfo().customWidth; const size_t lineCount = _gpuDstLineCount[l]; - u8 *dstLinePtr = dstBuffer; - u8 *dst = dstLinePtr; - const u8 *src = srcBuffer; + ExpandLine8(dstBuffer, srcBuffer, lineWidth); - for (size_t x = 0; x < GPU_FRAMEBUFFER_NATIVE_WIDTH; x++) - { - for (size_t p = 0; p < _gpuDstPitchCount[x]; p++) - { - dst[_gpuDstPitchIndex[x] + p] = src[x]; - } - } - - dst = dstLinePtr + lineWidth; - + u8 *__restrict dstLineInc = dstBuffer + lineWidth; for (size_t line = 1; line < lineCount; line++) { - memcpy(dst, dstLinePtr, lineWidth * sizeof(u8)); - dst += lineWidth; + memcpy(dstLineInc, dstBuffer, lineWidth * sizeof(u8)); + dstLineInc += lineWidth; } } } /*****************************************************************************/ -// ROUTINES FOR INSIDE / OUTSIDE WINDOW CHECKS -/*****************************************************************************/ - -FORCEINLINE void GPUEngineBase::_RenderPixel_CheckWindows(GPUEngineCompositorInfo &compState, bool &didPassWindowTest, bool &enableColorEffect) const -{ - didPassWindowTest = true; - enableColorEffect = true; - - // If no windows are enabled, then we don't need to perform any window tests. - // In this case, the pixel always passes and the color effect is always processed. - if (!this->_isAnyWindowEnabled) - { - return; - } - - // Window 0 has the highest priority, so always check this first. - if (this->_WIN0_ENABLED) - { - if (this->_curr_win[0][compState.xNative] == 1) - { - //INFO("bg%i passed win0 : (%i %i) was within (%i %i)(%i %i)\n", bgnum, x, gpu->_currentScanline, gpu->WIN0H0, gpu->WIN0V0, gpu->WIN0H1, gpu->WIN0V1); - didPassWindowTest = this->_WIN0_enable[compState.selectedLayerID]; - enableColorEffect = this->_WIN0_enable[WINDOWCONTROL_EFFECTFLAG]; - return; - } - } - - // Window 1 has medium priority, and is checked after Window 0. - if (this->_WIN1_ENABLED) - { - if (this->_curr_win[1][compState.xNative] == 1) - { - //INFO("bg%i passed win1 : (%i %i) was within (%i %i)(%i %i)\n", bgnum, x, gpu->_currentScanline, gpu->WIN1H0, gpu->WIN1V0, gpu->WIN1H1, gpu->WIN1V1); - didPassWindowTest = this->_WIN1_enable[compState.selectedLayerID]; - enableColorEffect = this->_WIN1_enable[WINDOWCONTROL_EFFECTFLAG]; - return; - } - } - - // Window OBJ has low priority, and is checked after both Window 0 and Window 1. - if (this->_WINOBJ_ENABLED) - { - if (this->_sprWin[compState.xNative] == 1) - { - didPassWindowTest = this->_WINOBJ_enable[compState.selectedLayerID]; - enableColorEffect = this->_WINOBJ_enable[WINDOWCONTROL_EFFECTFLAG]; - return; - } - } - - // If the pixel isn't inside any windows, then the pixel is outside, and therefore uses the WINOUT flags. - // This has the lowest priority, and is always checked last. - didPassWindowTest = this->_WINOUT_enable[compState.selectedLayerID]; - enableColorEffect = this->_WINOUT_enable[WINDOWCONTROL_EFFECTFLAG]; -} - -#ifdef ENABLE_SSE2 - -template <bool ISCUSTOMRENDERINGNEEDED> -FORCEINLINE void GPUEngineBase::_RenderPixel_CheckWindows16_SSE2(GPUEngineCompositorInfo &compState, const size_t dstX, __m128i &didPassWindowTest, __m128i &enableColorEffect) const -{ - didPassWindowTest = _mm_set1_epi8(0xFF); - enableColorEffect = _mm_set1_epi8(0xFF); - - // If no windows are enabled, then we don't need to perform any window tests. - // In this case, the pixel always passes and the color effect is always processed. - if (!this->_isAnyWindowEnabled) - { - return; - } - - __m128i win_vec128; - - __m128i win0HandledMask = _mm_setzero_si128(); - __m128i win1HandledMask = _mm_setzero_si128(); - __m128i winOBJHandledMask = _mm_setzero_si128(); - __m128i winOUTHandledMask = _mm_setzero_si128(); - - const __m128i dstLocLo = _mm_adds_epu16( _mm_set1_epi16(dstX), _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0) ); - const __m128i dstLocHi = _mm_adds_epu16( _mm_set1_epi16(dstX), _mm_set_epi16(15, 14 ,13, 12, 11, 10, 9, 8) ); - - // Window 0 has the highest priority, so always check this first. - if (this->_WIN0_ENABLED) - { - if (this->_isWindowInsideVerticalRange[0]) - { - if (ISCUSTOMRENDERINGNEEDED) - { - if (this->_windowLeftCustom[0] > this->_windowRightCustom[0]) - { - win_vec128 = _mm_packs_epi16( _mm_and_si128(_mm_cmplt_epi16(dstLocLo, this->_windowLeftCustom_SSE2[0]), _mm_cmpgt_epi16(dstLocLo, this->_windowRightCustom_SSE2[0])), - _mm_and_si128(_mm_cmplt_epi16(dstLocHi, this->_windowLeftCustom_SSE2[0]), _mm_cmpgt_epi16(dstLocHi, this->_windowRightCustom_SSE2[0])) ); - win_vec128 = _mm_xor_si128(win_vec128, _mm_set1_epi32(0xFFFFFFFF)); - } - else - { - win_vec128 = _mm_packs_epi16( _mm_andnot_si128(_mm_cmplt_epi16(dstLocLo, this->_windowLeftCustom_SSE2[0]), _mm_cmplt_epi16(dstLocLo, this->_windowRightCustom_SSE2[0])), - _mm_andnot_si128(_mm_cmplt_epi16(dstLocHi, this->_windowLeftCustom_SSE2[0]), _mm_cmplt_epi16(dstLocHi, this->_windowRightCustom_SSE2[0])) ); - } - - win0HandledMask = win_vec128; - } - else - { - win_vec128 = _mm_loadu_si128((__m128i *)(this->_curr_win[0] + dstX)); - win0HandledMask = _mm_cmpeq_epi8(win_vec128, _mm_set1_epi8(1)); - } - - didPassWindowTest = _mm_and_si128(win0HandledMask, this->_WIN0_enable_SSE2[compState.selectedLayerID]); - enableColorEffect = _mm_and_si128(win0HandledMask, this->_WIN0_enable_SSE2[WINDOWCONTROL_EFFECTFLAG]); - } - else - { - didPassWindowTest = _mm_setzero_si128(); - enableColorEffect = _mm_setzero_si128(); - } - } - - // Window 1 has medium priority, and is checked after Window 0. - if (this->_WIN1_ENABLED && this->_isWindowInsideVerticalRange[1]) - { - if (ISCUSTOMRENDERINGNEEDED) - { - if (this->_windowLeftCustom[1] > this->_windowRightCustom[1]) - { - win_vec128 = _mm_packs_epi16( _mm_and_si128(_mm_cmplt_epi16(dstLocLo, this->_windowLeftCustom_SSE2[1]), _mm_cmpgt_epi16(dstLocLo, this->_windowRightCustom_SSE2[1])), - _mm_and_si128(_mm_cmplt_epi16(dstLocHi, this->_windowLeftCustom_SSE2[1]), _mm_cmpgt_epi16(dstLocHi, this->_windowRightCustom_SSE2[1])) ); - win_vec128 = _mm_xor_si128(win_vec128, _mm_set1_epi32(0xFFFFFFFF)); - } - else - { - win_vec128 = _mm_packs_epi16( _mm_andnot_si128(_mm_cmplt_epi16(dstLocLo, this->_windowLeftCustom_SSE2[1]), _mm_cmplt_epi16(dstLocLo, this->_windowRightCustom_SSE2[1])), - _mm_andnot_si128(_mm_cmplt_epi16(dstLocHi, this->_windowLeftCustom_SSE2[1]), _mm_cmplt_epi16(dstLocHi, this->_windowRightCustom_SSE2[1])) ); - } - - win1HandledMask = _mm_andnot_si128(win0HandledMask, win_vec128); - } - else - { - win_vec128 = _mm_loadu_si128((__m128i *)(this->_curr_win[1] + dstX)); - win1HandledMask = _mm_andnot_si128(win0HandledMask, _mm_cmpeq_epi8(win_vec128, _mm_set1_epi8(1))); - } - - didPassWindowTest = _mm_or_si128( didPassWindowTest, _mm_and_si128(win1HandledMask, this->_WIN1_enable_SSE2[compState.selectedLayerID]) ); - enableColorEffect = _mm_or_si128( enableColorEffect, _mm_and_si128(win1HandledMask, this->_WIN1_enable_SSE2[WINDOWCONTROL_EFFECTFLAG]) ); - } - - // Window OBJ has low priority, and is checked after both Window 0 and Window 1. - if (this->_WINOBJ_ENABLED) - { - if (ISCUSTOMRENDERINGNEEDED) - { - win_vec128 = _mm_set_epi8(this->_sprWin[_gpuDstToSrcIndex[dstX+15]], - this->_sprWin[_gpuDstToSrcIndex[dstX+14]], - this->_sprWin[_gpuDstToSrcIndex[dstX+13]], - this->_sprWin[_gpuDstToSrcIndex[dstX+12]], - this->_sprWin[_gpuDstToSrcIndex[dstX+11]], - this->_sprWin[_gpuDstToSrcIndex[dstX+10]], - this->_sprWin[_gpuDstToSrcIndex[dstX+ 9]], - this->_sprWin[_gpuDstToSrcIndex[dstX+ 8]], - this->_sprWin[_gpuDstToSrcIndex[dstX+ 7]], - this->_sprWin[_gpuDstToSrcIndex[dstX+ 6]], - this->_sprWin[_gpuDstToSrcIndex[dstX+ 5]], - this->_sprWin[_gpuDstToSrcIndex[dstX+ 4]], - this->_sprWin[_gpuDstToSrcIndex[dstX+ 3]], - this->_sprWin[_gpuDstToSrcIndex[dstX+ 2]], - this->_sprWin[_gpuDstToSrcIndex[dstX+ 1]], - this->_sprWin[_gpuDstToSrcIndex[dstX+ 0]]); - } - else - { - win_vec128 = _mm_loadu_si128((__m128i *)(this->_sprWin + dstX)); - } - - winOBJHandledMask = _mm_andnot_si128( _mm_or_si128(win0HandledMask, win1HandledMask), _mm_cmpeq_epi8(win_vec128, _mm_set1_epi8(1)) ); - didPassWindowTest = _mm_or_si128( didPassWindowTest, _mm_and_si128(winOBJHandledMask, this->_WINOBJ_enable_SSE2[compState.selectedLayerID]) ); - enableColorEffect = _mm_or_si128( enableColorEffect, _mm_and_si128(winOBJHandledMask, this->_WINOBJ_enable_SSE2[WINDOWCONTROL_EFFECTFLAG]) ); - } - - // If the pixel isn't inside any windows, then the pixel is outside, and therefore uses the WINOUT flags. - // This has the lowest priority, and is always checked last. - winOUTHandledMask = _mm_xor_si128( _mm_or_si128(win0HandledMask, _mm_or_si128(win1HandledMask, winOBJHandledMask)), _mm_set1_epi32(0xFFFFFFFF) ); - didPassWindowTest = _mm_or_si128( didPassWindowTest, _mm_and_si128(winOUTHandledMask, this->_WINOUT_enable_SSE2[compState.selectedLayerID]) ); - enableColorEffect = _mm_or_si128( enableColorEffect, _mm_and_si128(winOUTHandledMask, this->_WINOUT_enable_SSE2[WINDOWCONTROL_EFFECTFLAG]) ); -} - -#endif - -/*****************************************************************************/ // PIXEL RENDERING /*****************************************************************************/ template <NDSColorFormat OUTPUTFORMAT, bool ISSRCLAYEROBJ, bool ISDEBUGRENDER, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> @@ -1852,19 +1696,17 @@ return; } - bool enableColorEffect = true; - if (!NOWINDOWSENABLEDHINT) { - bool didPassWindowTest; - this->_RenderPixel_CheckWindows(compState, didPassWindowTest, enableColorEffect); - + const bool didPassWindowTest = (this->_didPassWindowTestNative[compState.selectedLayerID][compState.xNative] != 0); if (!didPassWindowTest) { return; } } + const bool enableColorEffect = (this->_enableColorEffectNative[compState.selectedLayerID][compState.xNative] != 0); + if (!ISSRCLAYEROBJ && COLOREFFECTDISABLEDHINT) { switch (OUTPUTFORMAT) @@ -2047,7 +1889,6 @@ template <NDSColorFormat OUTPUTFORMAT, bool ISSRCLAYEROBJ, bool ISDEBUGRENDER, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> FORCEINLINE void GPUEngineBase::_RenderPixel16_SSE2(GPUEngineCompositorInfo &compState, - const size_t dstX, const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0, const __m128i &srcAlpha, const __m128i &srcEffectEnableMask, @@ -2086,17 +1927,21 @@ return; } - __m128i enableColorEffectMask = _mm_set1_epi8(0xFF); + __m128i enableColorEffectMask; - if (!NOWINDOWSENABLEDHINT) + if (NOWINDOWSENABLEDHINT) { + enableColorEffectMask = _mm_set1_epi8(0xFF); + } + else + { // Do the window test. - __m128i didPassWindowTest; - this->_RenderPixel_CheckWindows16_SSE2<ISCUSTOMRENDERINGNEEDED>(compState, dstX, didPassWindowTest, enableColorEffectMask); - + __m128i didPassWindowTest = _mm_cmpeq_epi8( _mm_load_si128((__m128i *)(this->_didPassWindowTestCustom[compState.selectedLayerID] + compState.xCustom)), _mm_set1_epi8(1) ); passMask8 = _mm_and_si128(passMask8, didPassWindowTest); passMask16[0] = _mm_unpacklo_epi8(passMask8, passMask8); passMask16[1] = _mm_unpackhi_epi8(passMask8, passMask8); + + enableColorEffectMask = _mm_cmpeq_epi8( _mm_load_si128((__m128i *)(this->_enableColorEffectCustom[compState.selectedLayerID] + compState.xCustom)), _mm_set1_epi8(1) ); } if ( (!ISSRCLAYEROBJ && COLOREFFECTDISABLEDHINT) || (_mm_movemask_epi8(srcEffectEnableMask) == 0) ) @@ -2146,7 +1991,7 @@ if (ISSRCLAYEROBJ) { - const __m128i objMode_vec128 = _mm_loadu_si128((__m128i *)(this->_sprType + dstX)); + const __m128i objMode_vec128 = _mm_loadu_si128((__m128i *)(this->_sprType + compState.xNative)); const __m128i isObjTranslucentMask = _mm_and_si128( dstEffectEnableMask, _mm_or_si128(_mm_cmpeq_epi8(objMode_vec128, _mm_set1_epi8(OBJMode_Transparent)), _mm_cmpeq_epi8(objMode_vec128, _mm_set1_epi8(OBJMode_Bitmap))) ); forceBlendEffectMask = isObjTranslucentMask; @@ -2752,13 +2597,11 @@ template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> void GPUEngineBase::_RenderPixelsCustom(GPUEngineCompositorInfo &compState) { - const size_t lineWidth = GPU->GetDisplayInfo().customWidth; - #ifdef ENABLE_SSE2 #ifdef ENABLE_SSSE3 - const bool isIntegerScale = ((lineWidth % GPU_FRAMEBUFFER_NATIVE_WIDTH) == 0); - const size_t scale = lineWidth / GPU_FRAMEBUFFER_NATIVE_WIDTH; + const bool isIntegerScale = ((compState.lineWidthCustom % GPU_FRAMEBUFFER_NATIVE_WIDTH) == 0); + const size_t scale = compState.lineWidthCustom / GPU_FRAMEBUFFER_NATIVE_WIDTH; #endif for (size_t x = 0, dstIdx = 0; x < GPU_FRAMEBUFFER_NATIVE_WIDTH; x+=8) @@ -2798,8 +2641,8 @@ for (size_t s = 0; s < scale; s++) { - const __m128i ssse3idx_u8 = _mm_loadl_epi64((__m128i *)(_gpuDstToSrcSSSE3_u8 + (s * 8))); - const __m128i ssse3idx_u16 = _mm_load_si128((__m128i *)(_gpuDstToSrcSSSE3_u16 + (s * 16))); + const __m128i ssse3idx_u8 = _mm_loadl_epi64((__m128i *)(_gpuDstToSrcSSSE3_u8_8e + (s * 8))); + const __m128i ssse3idx_u16 = _mm_load_si128((__m128i *)(_gpuDstToSrcSSSE3_u16_8e + (s * 16))); _mm_storel_epi64( (__m128i *)(this->_bgLayerIndexCustom + dstIdx + (s * 8)), _mm_shuffle_epi8(index_vec128, ssse3idx_u8) ); _mm_store_si128( (__m128i *)(this->_bgLayerColorCustom + dstIdx + (s * 8)), _mm_shuffle_epi8(col_vec128, ssse3idx_u16) ); @@ -2852,37 +2695,36 @@ } #endif - compState.xNative = 0; - compState.xCustom = 0; compState.lineColorTarget16 = (u16 *)compState.lineColorHead; compState.lineColorTarget32 = (FragmentColor *)compState.lineColorHead; compState.lineLayerIDTarget = compState.lineLayerIDHead; #ifdef ENABLE_SSE2 - const size_t ssePixCount = (lineWidth - (lineWidth % 16)); + const size_t ssePixCount = (compState.lineWidthCustom - (compState.lineWidthCustom % 16)); const __m128i srcEffectEnableMask = this->_srcBlendEnable_SSE2[compState.selectedLayerID]; #endif for (size_t l = 0; l < compState.lineRenderCount; l++) { - size_t i = 0; + compState.xNative = 0; + compState.xCustom = 0; #ifdef ENABLE_SSE2 - for (; i < ssePixCount; i+=16, compState.lineColorTarget16+=16, compState.lineColorTarget32+=16, compState.lineLayerIDTarget+=16) + for (; compState.xCustom < ssePixCount; compState.xCustom+=16, compState.xNative = _gpuDstToSrcIndex[compState.xCustom], compState.lineColorTarget16+=16, compState.lineColorTarget32+=16, compState.lineLayerIDTarget+=16) { __m128i src[4]; if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) { - src[0] = _mm_load_si128((__m128i *)(this->_bgLayerColorCustom + i + 0)); - src[1] = _mm_load_si128((__m128i *)(this->_bgLayerColorCustom + i + 8)); + src[0] = _mm_load_si128((__m128i *)(this->_bgLayerColorCustom + compState.xCustom + 0)); + src[1] = _mm_load_si128((__m128i *)(this->_bgLayerColorCustom + compState.xCustom + 8)); src[2] = _mm_setzero_si128(); src[3] = _mm_setzero_si128(); } else { - const __m128i src16[2] = { _mm_load_si128((__m128i *)(this->_bgLayerColorCustom + i + 0)), - _mm_load_si128((__m128i *)(this->_bgLayerColorCustom + i + 8)) }; + const __m128i src16[2] = { _mm_load_si128((__m128i *)(this->_bgLayerColorCustom + compState.xCustom + 0)), + _mm_load_si128((__m128i *)(this->_bgLayerColorCustom + compState.xCustom + 8)) }; if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { @@ -2899,7 +2741,7 @@ const __m128i srcAlpha = _mm_setzero_si128(); __m128i dstLayerID_vec128 = _mm_load_si128((__m128i *)compState.lineLayerIDTarget); - __m128i passMask8 = _mm_xor_si128( _mm_cmpeq_epi8(_mm_load_si128((__m128i *)(this->_bgLayerIndexCustom + i)), _mm_setzero_si128()), _mm_set1_epi32(0xFFFFFFFF) ); + __m128i passMask8 = _mm_xor_si128( _mm_cmpeq_epi8(_mm_load_si128((__m128i *)(this->_bgLayerIndexCustom + compState.xCustom)), _mm_setzero_si128()), _mm_set1_epi32(0xFFFFFFFF) ); __m128i dst[4]; dst[0] = _mm_load_si128((__m128i *)*compState.lineColorTarget + 0); @@ -2917,7 +2759,6 @@ } this->_RenderPixel16_SSE2<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, true>(compState, - i, src[3], src[2], src[1], src[0], srcAlpha, srcEffectEnableMask, @@ -2940,15 +2781,15 @@ #ifdef ENABLE_SSE2 #pragma LOOPVECTORIZE_DISABLE #endif - for (; i < lineWidth; i++, compState.xCustom++, compState.xNative = _gpuDstToSrcIndex[compState.xCustom], compState.lineColorTarget16++, compState.lineColorTarget32++, compState.lineLayerIDTarget++) + for (; compState.xCustom < compState.lineWidthCustom; compState.xCustom++, compState.xNative = _gpuDstToSrcIndex[compState.xCustom], compState.lineColorTarget16++, compState.lineColorTarget32++, compState.lineLayerIDTarget++) { - if (this->_bgLayerIndexCustom[i] == 0) + if (this->_bgLayerIndexCustom[compState.xCustom] == 0) { continue; } this->_RenderPixel<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, - this->_bgLayerColorCustom[i], + this->_bgLayerColorCustom[compState.xCustom], 0); } } @@ -2971,7 +2812,7 @@ const __m128i srcEffectEnableMask = this->_srcBlendEnable_SSE2[compState.selectedLayerID]; const size_t ssePixCount = (compState.linePixelCount - (compState.linePixelCount % 16)); - for (; i < ssePixCount; i+=16, compState.lineColorTarget16+=16, compState.lineColorTarget32+=16, compState.lineLayerIDTarget+=16) + for (; i < ssePixCount; i+=16, compState.xCustom+=16, compState.xNative = _gpuDstToSrcIndex[compState.xCustom], compState.lineColorTarget16+=16, compState.lineColorTarget32+=16, compState.lineLayerIDTarget+=16) { const __m128i src16[2] = { _mm_load_si128((__m128i *)(srcLine + i + 0)), _mm_load_si128((__m128i *)(srcLine + i + 8)) }; @@ -3020,7 +2861,6 @@ } this->_RenderPixel16_SSE2<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, true>(compState, - i, src[3], src[2], src[1], src[0], srcAlpha, srcEffectEnableMask, @@ -3577,7 +3417,6 @@ { for (size_t i = 0; i < lg; i++, sprX++, x += xdir) { - //_gpuSprWin[sprX] = (src[x])?1:0; if (src[(x & 7) + ((x & 0xFFF8) << 3)]) { this->_sprWin[sprX] = 1; @@ -4075,7 +3914,7 @@ } } -template <NDSColorFormat OUTPUTFORMAT> +template <NDSColorFormat OUTPUTFORMAT, bool NOWINDOWSENABLEDHINT> void GPUEngineBase::_RenderLine_Layers(const size_t l) { const NDSDisplayInfo &dispInfo = GPU->GetDisplayInfo(); @@ -4122,12 +3961,6 @@ compState.lineColorTarget32 = (FragmentColor *)compState.lineColorHeadNative; compState.lineLayerIDTarget = compState.lineLayerIDHead; - //cache some parameters which are assumed to be stable throughout the rendering of the entire line - if (this->_needUpdateWINH[0]) this->_UpdateWINH<0>(); - if (this->_needUpdateWINH[1]) this->_UpdateWINH<1>(); - this->_SetupWindows<0>(compState); - this->_SetupWindows<1>(compState); - this->_RenderLine_Clear<OUTPUTFORMAT>(compState); // for all the pixels in the line @@ -4136,6 +3969,11 @@ this->_RenderLine_SetupSprites(compState); } + if (!NOWINDOWSENABLEDHINT) + { + this->_PerformWindowTesting(compState); + } + // paint lower priorities first // then higher priorities on top for (size_t prio = NB_PRIORITIES; prio > 0; ) @@ -4148,27 +3986,28 @@ for (size_t i = 0; i < item->nbBGs; i++) { const GPULayerID layerID = (GPULayerID)item->BGs[i]; - compState.selectedLayerID = layerID; - compState.selectedBGLayer = &this->_BGLayer[layerID]; if (this->_enableLayer[layerID]) { + compState.selectedLayerID = layerID; + compState.selectedBGLayer = &this->_BGLayer[layerID]; + if (this->_engineID == GPUEngineID_Main) { if ( (layerID == GPULayerID_BG0) && GPU->GetEngineMain()->WillRender3DLayer() ) { - GPU->GetEngineMain()->RenderLine_Layer3D<OUTPUTFORMAT>(compState); + GPU->GetEngineMain()->RenderLine_Layer3D<OUTPUTFORMAT, NOWINDOWSENABLEDHINT>(compState); continue; } } if (this->isLineRenderNative[compState.lineIndexNative]) { - this->_RenderLine_LayerBG<OUTPUTFORMAT, false, false>(compState); + this->_RenderLine_LayerBG<OUTPUTFORMAT, false, NOWINDOWSENABLEDHINT, false>(compState); } else { - this->_RenderLine_LayerBG<OUTPUTFORMAT, false, true>(compState); + this->_RenderLine_LayerBG<OUTPUTFORMAT, false, NOWINDOWSENABLEDHINT, true>(compState); } } //layer enabled } @@ -4515,8 +4354,8 @@ } } -template<size_t WIN_NUM> -void GPUEngineBase::_SetupWindows(GPUEngineCompositorInfo &compState) +template <size_t WIN_NUM> +bool GPUEngineBase::_IsWindowInsideVerticalRange(GPUEngineCompositorInfo &compState) { const u16 windowTop = (WIN_NUM == 0) ? this->_IORegisterMap->WIN0V.Top : this->_IORegisterMap->WIN1V.Top; const u16 windowBottom = (WIN_NUM == 0) ? this->_IORegisterMap->WIN0V.Bottom : this->_IORegisterMap->WIN1V.Bottom; @@ -4526,24 +4365,21 @@ if (windowTop > windowBottom) { - if((compState.lineIndexNative < windowTop) && (compState.lineIndexNative > windowBottom)) goto allout; + if ((compState.lineIndexNative < windowTop) && (compState.lineIndexNative > windowBottom)) goto allout; } else { - if((compState.lineIndexNative < windowTop) || (compState.lineIndexNative >= windowBottom)) goto allout; + if ((compState.lineIndexNative < windowTop) || (compState.lineIndexNative >= windowBottom)) goto allout; } //the x windows will apply for this scanline - this->_curr_win[WIN_NUM] = this->_h_win[WIN_NUM]; - this->_isWindowInsideVerticalRange[WIN_NUM] = true; - return; + return true; allout: - this->_curr_win[WIN_NUM] = GPUEngineBase::_winEmpty; - this->_isWindowInsideVerticalRange[WIN_NUM] = false; + return false; } -template<size_t WIN_NUM> +template <size_t WIN_NUM> void GPUEngineBase::_UpdateWINH() { //dont even waste any time in here if the window isnt enabled @@ -4568,25 +4404,131 @@ { memset(this->_h_win[WIN_NUM], 1, GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u8)); memset(this->_h_win[WIN_NUM] + windowRight + 1, 0, (windowLeft - (windowRight + 1)) * sizeof(u8)); - -#ifdef ENABLE_SSE2 - this->_windowLeftCustom[WIN_NUM] = _gpuDstPitchIndex[windowLeft]; - this->_windowRightCustom[WIN_NUM] = _gpuDstPitchIndex[windowRight] + _gpuDstPitchCount[windowRight] - 1; - this->_windowLeftCustom_SSE2[WIN_NUM] = _mm_set1_epi16(this->_windowLeftCustom[WIN_NUM]); - this->_windowRightCustom_SSE2[WIN_NUM] = _mm_set1_epi16(this->_windowRightCustom[WIN_NUM]); -#endif } else { memset(this->_h_win[WIN_NUM], 0, GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u8)); memset(this->_h_win[WIN_NUM] + windowLeft, 1, (windowRight - windowLeft) * sizeof(u8)); + } +} + +void GPUEngineBase::_PerformWindowTesting(GPUEngineCompositorInfo &compState) +{ + if (this->_needUpdateWINH[0]) this->_UpdateWINH<0>(); + if (this->_needUpdateWINH[1]) this->_UpdateWINH<1>(); + + for (size_t layerID = GPULayerID_BG0; layerID <= GPULayerID_OBJ; layerID++) + { + if (!this->_enableLayer[layerID]) + { + continue; + } #ifdef ENABLE_SSE2 - this->_windowLeftCustom[WIN_NUM] = _gpuDstPitchIndex[windowLeft] + _gpuDstPitchCount[windowLeft] - 1; - this->_windowRightCustom[WIN_NUM] = _gpuDstPitchIndex[windowRight]; - this->_windowLeftCustom_SSE2[WIN_NUM] = _mm_set1_epi16(this->_windowLeftCustom[WIN_NUM]); - this->_windowRightCustom_SSE2[WIN_NUM] = _mm_set1_epi16(this->_windowRightCustom[WIN_NUM]); + for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH; i+=16) + { + __m128i win_vec128; + + __m128i didPassWindowTest = _mm_setzero_si128(); + __m128i enableColorEffect = _mm_setzero_si128(); + + __m128i win0HandledMask = _mm_setzero_si128(); + __m128i win1HandledMask = _mm_setzero_si128(); + __m128i winOBJHandledMask = _mm_setzero_si128(); + __m128i winOUTHandledMask = _mm_setzero_si128(); + + // Window 0 has the highest priority, so always check this first. + if (this->_WIN0_ENABLED && this->_IsWindowInsideVerticalRange<0>(compState)) + { + win_vec128 = _mm_load_si128((__m128i *)(this->_h_win[0] + i)); + win0HandledMask = _mm_cmpeq_epi8(win_vec128, _mm_set1_epi8(1)); + + didPassWindowTest = _mm_and_si128(win0HandledMask, this->_WIN0_enable_SSE2[layerID]); + enableColorEffect = _mm_and_si128(win0HandledMask, this->_WIN0_enable_SSE2[WINDOWCONTROL_EFFECTFLAG]); + } + + // Window 1 has medium priority, and is checked after Window 0. + if (this->_WIN1_ENABLED && this->_IsWindowInsideVerticalRange<1>(compState)) + { + win_vec128 = _mm_load_si128((__m128i *)(this->_h_win[1] + i)); + win1HandledMask = _mm_andnot_si128(win0HandledMask, _mm_cmpeq_epi8(win_vec128, _mm_set1_epi8(1))); + + didPassWindowTest = _mm_or_si128( didPassWindowTest, _mm_and_si128(win1HandledMask, this->_WIN1_enable_SSE2[layerID]) ); + enableColorEffect = _mm_or_si128( enableColorEffect, _mm_and_si128(win1HandledMask, this->_WIN1_enable_SSE2[WINDOWCONTROL_EFFECTFLAG]) ); + } + + // Window OBJ has low priority, and is checked after both Window 0 and Window 1. + if (this->_WINOBJ_ENABLED) + { + win_vec128 = _mm_load_si128((__m128i *)(this->_sprWin + i)); + winOBJHandledMask = _mm_andnot_si128( _mm_or_si128(win0HandledMask, win1HandledMask), _mm_cmpeq_epi8(win_vec128, _mm_set1_epi8(1)) ); + + didPassWindowTest = _mm_or_si128( didPassWindowTest, _mm_and_si128(winOBJHandledMask, this->_WINOBJ_enable_SSE2[layerID]) ); + enableColorEffect = _mm_or_si128( enableColorEffect, _mm_and_si128(winOBJHandledMask, this->_WINOBJ_enable_SSE2[WINDOWCONTROL_EFFECTFLAG]) ); + } + + // If the pixel isn't inside any windows, then the pixel is outside, and therefore uses the WINOUT flags. + // This has the lowest priority, and is always checked last. + winOUTHandledMask = _mm_xor_si128( _mm_or_si128(win0HandledMask, _mm_or_si128(win1HandledMask, winOBJHandledMask)), _mm_set1_epi32(0xFFFFFFFF) ); + didPassWindowTest = _mm_or_si128( didPassWindowTest, _mm_and_si128(winOUTHandledMask, this->_WINOUT_enable_SSE2[layerID]) ); + enableColorEffect = _mm_or_si128( enableColorEffect, _mm_and_si128(winOUTHandledMask, this->_WINOUT_enable_SSE2[WINDOWCONTROL_EFFECTFLAG]) ); + + _mm_store_si128((__m128i *)(this->_didPassWindowTestNative[layerID] + i), _mm_and_si128(didPassWindowTest, _mm_set1_epi8(0x01))); + _mm_store_si128((__m128i *)(this->_enableColorEffectNative[layerID] + i), _mm_and_si128(enableColorEffect, _mm_set1_epi8(0x01))); + } +#else + for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH; i++) + { + // Window 0 has the highest priority, so always check this first. + if (this->_WIN0_ENABLED && this->_IsWindowInsideVerticalRange<0>(compState)) + { + if (this->_h_win[0][i] == 1) + { + this->_didPassWindowTestNative[layerID][i] = this->_WIN0_enable[layerID]; + this->_enableColorEffectNative[layerID][i] = this->_WIN0_enable[WINDOWCONTROL_EFFECTFLAG]; + continue; + } + } + + // Window 1 has medium priority, and is checked after Window 0. + if (this->_WIN1_ENABLED && this->_IsWindowInsideVerticalRange<1>(compState)) + { + if (this->_h_win[1][i] == 1) + { + this->_didPassWindowTestNative[layerID][i] = this->_WIN1_enable[layerID]; + this->_enableColorEffectNative[layerID][i] = this->_WIN1_enable[WINDOWCONTROL_EFFECTFLAG]; + continue; + } + } + + // Window OBJ has low priority, and is checked after both Window 0 and Window 1. + if (this->_WINOBJ_ENABLED) + { + if (this->_sprWin[i] == 1) + { + this->_didPassWindowTestNative[layerID][i] = this->_WINOBJ_enable[layerID]; + this->_enableColorEffectNative[layerID][i] = this->_WINOBJ_enable[WINDOWCONTROL_EFFECTFLAG]; + continue; + } + } + + // If the pixel isn't inside any windows, then the pixel is outside, and therefore uses the WINOUT flags. + // This has the lowest priority, and is always checked last. + this->_didPassWindowTestNative[layerID][i] = this->_WINOUT_enable[layerID]; + this->_enableColorEffectNative[layerID][i] = this->_WINOUT_enable[WINDOWCONTROL_EFFECTFLAG]; + } #endif + + if (GPU->GetDisplayInfo().isCustomSizeRequested) + { + ExpandLine8(this->_didPassWindowTestCustom[layerID], this->_didPassWindowTestNative[layerID], compState.lineWidthCustom); + ExpandLine8(this->_enableColorEffectCustom[layerID], this->_enableColorEffectNative[layerID], compState.lineWidthCustom); + } + else + { + memcpy(this->_didPassWindowTestCustom[layerID], this->_didPassWindowTestNative[layerID], GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u8)); + memcpy(this->_enableColorEffectCustom[layerID], this->_enableColorEffectNative[layerID], GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u8)); + } } } @@ -4662,7 +4604,7 @@ } template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> -void GPUEngineBase::_RenderLine_LayerBG_ApplyNoWindowsEnabledHint(GPUEngineCompositorInfo &compState) +void GPUEngineBase::_RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compState) { if (compState.colorEffect == ColorEffect_Disable) { @@ -4674,20 +4616,7 @@ } } -template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> -void GPUEngineBase::_RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compState) -{ - if (this->_isAnyWindowEnabled) - { - this->_RenderLine_LayerBG_ApplyNoWindowsEnabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, false, ISCUSTOMRENDERINGNEEDED>(compState); - } - else - { - this->_RenderLine_LayerBG_ApplyNoWindowsEnabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, true, ISCUSTOMRENDERINGNEEDED>(compState); - } -} - -template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool ISCUSTOMRENDERINGNEEDED> +template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool NOWINDOWSENABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void GPUEngineBase::_RenderLine_LayerBG(GPUEngineCompositorInfo &compState) { if (ISDEBUGRENDER) @@ -4699,12 +4628,12 @@ #ifndef DISABLE_MOSAIC if (compState.selectedBGLayer->isMosaic && this->_isBGMosaicSet) { - this->_RenderLine_LayerBG_ApplyMosaic<OUTPUTFORMAT, ISDEBUGRENDER, true, ISCUSTOMRENDERINGNEEDED>(compState); + this->_RenderLine_LayerBG_ApplyMosaic<OUTPUTFORMAT, ISDEBUGRENDER, true, NOWINDOWSENABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState); } else #endif { - this->_RenderLine_LayerBG_ApplyMosaic<OUTPUTFORMAT, ISDEBUGRENDER, false, ISCUSTOMRENDERINGNEEDED>(compState); + this->_RenderLine_LayerBG_ApplyMosaic<OUTPUTFORMAT, ISDEBUGRENDER, false, NOWINDOWSENABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compState); } } } @@ -4756,7 +4685,7 @@ compState.lineColorHeadNative = compState.lineColorHead; compState.lineColorHeadCustom = compState.lineColorHead; - this->_RenderLine_LayerBG<NDSColorFormat_BGR555_Rev, true, false>(compState); + this->_RenderLine_LayerBG<NDSColorFormat_BGR555_Rev, true, true, false>(compState); } } @@ -4792,7 +4721,7 @@ } } -template<size_t WINNUM> +template <size_t WINNUM> void GPUEngineBase::ParseReg_WINnH() { this->_needUpdateWINH[WINNUM] = true; @@ -4800,19 +4729,19 @@ void GPUEngineBase::ParseReg_WININ() { - this->_WIN0_enable[GPULayerID_BG0] = (this->_IORegisterMap->WIN0IN.BG0_Enable != 0); - this->_WIN0_enable[GPULayerID_BG1] = (this->_IORegisterMap->WIN0IN.BG1_Enable != 0); - this->_WIN0_enable[GPULayerID_BG2] = (this->_IORegisterMap->WIN0IN.BG2_Enable != 0); - this->_WIN0_enable[GPULayerID_BG3] = (this->_IORegisterMap->WIN0IN.BG3_Enable != 0); - this->_WIN0_enable[GPULayerID_OBJ] = (this->_IORegisterMap->WIN0IN.OBJ_Enable != 0); - this->_WIN0_enable[WINDOWCONTROL_EFFECTFLAG] = (this->_IORegisterMap->WIN0IN.Effect_Enable != 0); + this->_WIN0_enable[GPULayerID_BG0] = this->_IORegisterMap->WIN0IN.BG0_Enable; + this->_WIN0_enable[GPULayerID_BG1] = this->_IORegisterMap->WIN0IN.BG1_Enable; + this->_WIN0_enable[GPULayerID_BG2] = this->_IORegisterMap->WIN0IN.BG2_Enable; + this->_WIN0_enable[GPULayerID_BG3] = this->_IORegisterMap->WIN0IN.BG3_Enable; + this->_WIN0_enable[GPULayerID_OBJ] = this->_IORegisterMap->WIN0IN.OBJ_Enable; + this->_WIN0_enable[WINDOWCONTROL_EFFECTFLAG] = this->_IORegisterMap->WIN0IN.Effect_Enable; - this->_WIN1_enable[GPULayerID_BG0] = (this->_IORegisterMap->WIN1IN.BG0_Enable != 0); - this->_WIN1_enable[GPULayerID_BG1] = (this->_IORegisterMap->WIN1IN.BG1_Enable != 0); - this->_WIN1_enable[GPULayerID_BG2] = (this->_IORegisterMap->WIN1IN.BG2_Enable != 0); - this->_WIN1_enable[GPULayerID_BG3] = (this->_IORegisterMap->WIN1IN.BG3_Enable != 0); - this->_WIN1_enable[GPULayerID_OBJ] = (this->_IORegisterMap->WIN1IN.OBJ_Enable != 0); - this->_WIN1_enable[WINDOWCONTROL_EFFECTFLAG] = (this->_IORegisterMap->WIN1IN.Effect_Enable != 0); + this->_WIN1_enable[GPULayerID_BG0] = this->_IORegisterMap->WIN1IN.BG0_Enable; + this->_WIN1_enable[GPULayerID_BG1] = this->_IORegisterMap->WIN1IN.BG1_Enable; + this->_WIN1_enable[GPULayerID_BG2] = this->_IORegisterMap->WIN1IN.BG2_Enable; + this->_WIN1_enable[GPULayerID_BG3] = this->_IORegisterMap->WIN1IN.BG3_Enable; + this->_WIN1_enable[GPULayerID_OBJ] = this->_IORegisterMap->WIN1IN.OBJ_Enable; + this->_WIN1_enable[WINDOWCONTROL_EFFECTFLAG] = this->_IORegisterMap->WIN1IN.Effect_Enable; #if defined(ENABLE_SSE2) this->_WIN0_enable_SSE2[GPULayerID_BG0] = _mm_set1_epi8((this->_IORegisterMap->WIN0IN.BG0_Enable != 0) ? 0xFF : 0x00); @@ -4833,19 +4762,19 @@ void GPUEngineBase::ParseReg_WINOUT() { - this->_WINOUT_enable[GPULayerID_BG0] = (this->_IORegisterMap->WINOUT.BG0_Enable != 0); - this->_WINOUT_enable[GPULayerID_BG1] = (this->_IORegisterMap->WINOUT.BG1_Enable != 0); - this->_WINOUT_enable[GPULayerID_BG2] = (this->_IORegisterMap->WINOUT.BG2_Enable != 0); - this->_WINOUT_enable[GPULayerID_BG3] = (this->_IORegisterMap->WINOUT.BG3_Enable != 0); - this->_WINOUT_enable[GPULayerID_OBJ] = (this->_IORegisterMap->WINOUT.OBJ_Enable != 0); - this->_WINOUT_enable[WINDOWCONTROL_EFFECTFLAG] = (this->_IORegisterMap->WINOUT.Effect_Enable != 0); + this->_WINOUT_enable[GPULayerID_BG0] = this->_IORegisterMap->WINOUT.BG0_Enable; + this->_WINOUT_enable[GPULayerID_BG1] = this->_IORegisterMap->WINOUT.BG1_Enable; + this->_WINOUT_enable[GPULayerID_BG2] = this->_IORegisterMap->WINOUT.BG2_Enable; + this->_WINOUT_enable[GPULayerID_BG3] = this->_IORegisterMap->WINOUT.BG3_Enable; + this->_WINOUT_enable[GPULayerID_OBJ] = this->_IORegisterMap->WINOUT.OBJ_Enable; + this->_WINOUT_enable[WINDOWCONTROL_EFFECTFLAG] = this->_IORegisterMap->WINOUT.Effect_Enable; - this->_WINOBJ_enable[GPULayerID_BG0] = (this->_IORegisterMap->WINOBJ.BG0_Enable != 0); - this->_WINOBJ_enable[GPULayerID_BG1] = (this->_IORegisterMap->WINOBJ.BG1_Enable != 0); - this->_WINOBJ_enable[GPULayerID_BG2] = (this->_IORegisterMap->WINOBJ.BG2_Enable != 0); - this->_WINOBJ_enable[GPULayerID_BG3] = (this->_IORegisterMap->WINOBJ.BG3_Enable != 0); - this->_WINOBJ_enable[GPULayerID_OBJ] = (this->_IORegisterMap->WINOBJ.OBJ_Enable != 0); - this->_WINOBJ_enable[WINDOWCONTROL_EFFECTFLAG] = (this->_IORegisterMap->WINOBJ.Effect_Enable != 0); + this->_WINOBJ_enable[GPULayerID_BG0] = this->_IORegisterMap->WINOBJ.BG0_Enable; + this->_WINOBJ_enable[GPULayerID_BG1] = this->_IORegisterMap->WINOBJ.BG1_Enable; + this->_WINOBJ_enable[GPULayerID_BG2] = this->_IORegisterMap->WINOBJ.BG2_Enable; + this->_WINOBJ_enable[GPULayerID_BG3] = this->_IORegisterMap->WINOBJ.BG3_Enable; + this->_WINOBJ_enable[GPULayerID_OBJ] = this->_IORegisterMap->WINOBJ.OBJ_Enable; + this->_WINOBJ_enable[WINDOWCONTROL_EFFECTFLAG] = this->_IORegisterMap->WINOBJ.Effect_Enable; #if defined(ENABLE_SSE2) this->_WINOUT_enable_SSE2[GPULayerID_BG0] = _mm_set1_epi8((this->_IORegisterMap->WINOUT.BG0_Enable != 0) ? 0xFF : 0x00); @@ -4970,32 +4899,50 @@ void GPUEngineBase::SetCustomFramebufferSize(size_t w, size_t h) { - void *oldWorkingScanline = this->_internalRenderLineTargetCustom; - u8 *oldBGPixels = this->_renderLineLayerIDCustom; + void *oldWorkingLineColor = this->_internalRenderLineTargetCustom; + u8 *oldWorkingLineLayerID = this->_renderLineLayerIDCustom; u8 *oldBGLayerIndexCustom = this->_bgLayerIndexCustom; u16 *oldBGLayerColorCustom = this->_bgLayerColorCustom; + u8 *oldDidPassWindowTestCustomMasterPtr = this->_didPassWindowTestCustomMasterPtr; - void *newWorkingScanline = malloc_alignedCacheLine(w * _gpuLargestDstLineCount * GPU->GetDisplayInfo().pixelBytes); - u8 *newBGPixels = (u8 *)malloc_alignedCacheLine(w * _gpuLargestDstLineCount * 4 * sizeof(u8)); // yes indeed, this is oversized. map debug tools try to write to it - u8 *newBGLayerIndexCustom = (u8 *)malloc_alignedCacheLine(w * _gpuLargestDstLineCount * sizeof(u8)); - u16 *newBGLayerColorCustom = (u16 *)malloc_alignedCacheLine(w * _gpuLargestDstLineCount * sizeof(u16)); + void *newWorkingLineColor = malloc_alignedCacheLine(w * _gpuLargestDstLineCount * GPU->GetDisplayInfo().pixelBytes); + u8 *newWorkingLineLayerID = (u8 *)malloc_alignedCacheLine(w * _gpuLargestDstLineCount * 4 * sizeof(u8)); // yes indeed, this is oversized. map debug tools try to write to it + u8 *newBGLayerIndexCustom = (u8 *)malloc_alignedCacheLine(w * sizeof(u8)); + u16 *newBGLayerColorCustom = (u16 *)malloc_alignedCacheLine(w * sizeof(u16)); + u8 *newDidPassWindowTestCustomMasterPtr = (u8 *)malloc_alignedCacheLine(w * 10 * sizeof(u8)); - this->_internalRenderLineTargetCustom = newWorkingScanline; - this->_renderLineLayerIDCustom = newBGPixels; + this->_internalRenderLineTargetCustom = newWorkingLineColor; + this->_renderLineLayerIDCustom = newWorkingLineLayerID; this->nativeBuffer = GPU->GetDisplayInfo().nativeBuffer[this->_targetDisplayID]; this->customBuffer = GPU->GetDisplayInfo().customBuffer[this->_targetDisplayID]; this->_bgLayerIndexCustom = newBGLayerIndexCustom; this->_bgLayerColorCustom = newBGLayerColorCustom; + this->_didPassWindowTestCustomMasterPtr = newDidPassWindowTestCustomMasterPtr; + this->_didPassWindowTestCustom[GPULayerID_BG0] = this->_didPassWindowTestCustomMasterPtr + (0 * w * sizeof(u8)); + this->_didPassWindowTestCustom[GPULayerID_BG1] = this->_didPassWindowTestCustomMasterPtr + (1 * w * sizeof(u8)); + this->_didPassWindowTestCustom[GPULayerID_BG2] = this->_didPassWindowTestCustomMasterPtr + (2 * w * sizeof(u8)); + this->_didPassWindowTestCustom[GPULayerID_BG3] = this->_didPassWindowTestCustomMasterPtr + (3 * w * sizeof(u8)); + this->_didPassWindowTestCustom[GPULayerID_OBJ] = this->_didPassWindowTestCustomMasterPtr + (4 * w * sizeof(u8)); + + this->_enableColorEffectCustomMasterPtr = newDidPassWindowTestCustomMasterPtr + (w * 5 * sizeof(u8)); + this->_enableColorEffectCustom[GPULayerID_BG0] = this->_enableColorEffectCustomMasterPtr + (0 * w * sizeof(u8)); + this->_enableColorEffectCustom[GPULayerID_BG1] = this->_enableColorEffectCustomMasterPtr + (1 * w * sizeof(u8)); + this->_enableColorEffectCustom[GPULayerID_BG2] = this->_enableColorEffectCustomMasterPtr + (2 * w * sizeof(u8)); + this->_enableColorEffectCustom[GPULayerID_BG3] = this->_enableColorEffectCustomMasterPtr + (3 * w * sizeof(u8)); + this->_enableColorEffectCustom[GPULayerID_OBJ] = this->_enableColorEffectCustomMasterPtr + (4 * w * sizeof(u8)); + this->_needUpdateWINH[0] = true; this->_needUpdateWINH[1] = true; - free_aligned(oldWorkingScanline); - free_aligned(oldBGPixels); + free_aligned(oldWorkingLineColor); + free_aligned(oldWorkingLineLayerID); free_aligned(oldBGLayerIndexCustom); free_aligned(oldBGLayerColorCustom); + free_aligned(oldDidPassWindowTestCustomMasterPtr); } +template <NDSColorFormat OUTPUTFORMAT> void GPUEngineBase::ResolveCustomRendering() { const NDSDisplayInfo &dispInfo = GPU->GetDisplayInfo(); @@ -5013,7 +4960,7 @@ } // Resolve any remaining native lines to the custom buffer - if (dispInfo.pixelBytes == 2) + if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) { for (size_t y = 0; y < GPU_FRAMEBUFFER_NATIVE_HEIGHT; y++) { @@ -5024,7 +4971,7 @@ } } } - else if (dispInfo.pixelBytes == 4) + else { for (size_t y = 0; y < GPU_FRAMEBUFFER_NATIVE_HEIGHT; y++) { @@ -5334,12 +5281,18 @@ return ( this->_enableLayer[GPULayerID_BG0] && (this->_IORegisterMap->DISPCNT.BG0_3D != 0) ); } -bool GPUEngineA::WillCapture3DLayerDirect() +bool GPUEngineA::WillCapture3DLayerDirect(const size_t l) { const IOREG_DISPCAPCNT &DISPCAPCNT = this->_IORegisterMap->DISPCAPCNT; - return ( (DISPCAPCNT.CaptureEnable != 0) && (DISPCAPCNT.SrcA != 0) && (DISPCAPCNT.CaptureSrc != 1) && (vramConfiguration.banks[DISPCAPCNT.VRAMWriteBlock].purpose == VramConfiguration::LCDC) ); + return ( this->WillDisplayCapture(l) && (DISPCAPCNT.SrcA != 0) && (DISPCAPCNT.CaptureSrc != 1) ); } +bool GPUEngineA::WillDisplayCapture(const size_t l) +{ + const IOREG_DISPCAPCNT &DISPCAPCNT = this->_IORegisterMap->DISPCAPCNT; + return (DISPCAPCNT.CaptureEnable != 0) && (vramConfiguration.banks[DISPCAPCNT.VRAMWriteBlock].purpose == VramConfiguration::LCDC) && (l < this->_dispCapCnt.capy); +} + bool GPUEngineA::VerifyVRAMLineDidChange(const size_t blockID, const size_t l) { // This method must be called for ALL instances where captured lines in VRAM may be read back. @@ -5371,9 +5324,20 @@ void GPUEngineA::RenderLine(const u16 l) { const IOREG_DISPCAPCNT &DISPCAPCNT = this->_IORegisterMap->DISPCAPCNT; + const bool isDisplayCaptureNeeded = this->WillDisplayCapture(l); // Render the line - this->_RenderLine_Layers<OUTPUTFORMAT>(l); + if ( (this->_displayOutputMode == GPUDisplayMode_Normal) || isDisplayCaptureNeeded ) + { + if (this->_isAnyWindowEnabled) + { + this->_RenderLine_Layers<OUTPUTFORMAT, false>(l); + } + else + { + this->_RenderLine_Layers<OUTPUTFORMAT, true>(l); + } + } // Fill the display output switch (this->_displayOutputMode) @@ -5400,7 +5364,7 @@ //BUG!!! if someone is capturing and displaying both from the fifo, then it will have been //consumed above by the display before we get here //(is that even legal? i think so) - if ((DISPCAPCNT.CaptureEnable != 0) && (vramConfiguration.banks[DISPCAPCNT.VRAMWriteBlock].purpose == VramConfiguration::LCDC) && (l < this->_dispCapCnt.capy)) + if (isDisplayCaptureNeeded) { if (DISPCAPCNT.CaptureSize == DisplayCaptureSize_128x128) { @@ -5413,7 +5377,7 @@ } } -template <NDSColorFormat OUTPUTFORMAT> +template <NDSColorFormat OUTPUTFORMAT, bool NOWINDOWSENABLEDHINT> void GPUEngineA::RenderLine_Layer3D(GPUEngineCompositorInfo &compState) { const FragmentColor *__restrict framebuffer3D = CurrentRenderer->GetFramebuffer(); @@ -5447,25 +5411,24 @@ const float customWidthScale = (float)compState.lineWidthCustom / (float)GPU_FRAMEBUFFER_NATIVE_WIDTH; const FragmentColor *__restrict srcLinePtr = framebuffer3D + compState.blockOffsetCustom; - compState.xNative = 0; - compState.xCustom = 0; compState.lineColorTarget16 = (u16 *)compState.lineColorHead; compState.lineColorTarget32 = (FragmentColor *)compState.lineColorHead; compState.lineLayerIDTarget = compState.lineLayerIDHead; // Horizontally offset the 3D layer by this amount. // Test case: Blowing up large objects in Nanostray 2 will cause the main screen to shake horizontally. - const u16 hofs = (u16)( ((float)this->_BGLayer[GPULayerID_BG0].xOffset * customWidthScale) + 0.5f ); + const u16 hofs = (u16)( ((float)compState.selectedBGLayer->xOffset * customWidthScale) + 0.5f ); if (hofs == 0) { for (size_t line = 0; line < compState.lineRenderCount; line++) { - size_t dstX = 0; + compState.xNative = 0; + compState.xCustom = 0; #ifdef ENABLE_SSE2 const size_t ssePixCount = compState.lineWidthCustom - (compState.lineWidthCustom % 16); - for (; dstX < ssePixCount; dstX+=16, srcLinePtr+=16, compState.lineColorTarget16+=16, compState.lineColorTarget32+=16, compState.lineLayerIDTarget+=16) + for (; compState.xCustom < ssePixCount; srcLinePtr+=16, compState.xCustom+=16, compState.xNative = _gpuDstToSrcIndex[compState.xCustom], compState.lineColorTarget16+=16, compState.lineColorTarget32+=16, compState.lineLayerIDTarget+=16) { const __m128i src[4] = { _mm_load_si128((__m128i *)srcLinePtr + 0), _mm_load_si128((__m128i *)srcLinePtr + 1), @@ -5475,12 +5438,21 @@ // Determine which pixels pass by doing the alpha test and the window test. const __m128i srcAlpha = _mm_packs_epi16( _mm_packs_epi32(_mm_srli_epi32(src[0], 24), _mm_srli_epi32(src[1], 24)), _mm_packs_epi32(_mm_srli_epi32(src[2], 24), _mm_srli_epi32(src[3], 24)) ); - - // Do the window test. __m128i passMask8; __m128i enableColorEffectMask; - this->_RenderPixel_CheckWindows16_SSE2<true>(compState, dstX, passMask8, enableColorEffectMask); + if (NOWINDOWSENABLEDHINT) + { + passMask8 = _mm_set1_epi8(0xFF); + enableColorEffectMask = _mm_set1_epi8(0xFF); + } + else + { + // Do the window test. + passMask8 = _mm_cmpeq_epi8( _mm_load_si128((__m128i *)(this->_didPassWindowTestCustom[compState.selectedLayerID] + compState.xCustom)), _mm_set1_epi8(1) ); + enableColorEffectMask = _mm_cmpeq_epi8( _mm_load_si128((__m128i *)(this->_enableColorEffectCustom[compState.selectedLayerID] + compState.xCustom)), _mm_set1_epi8(1) ); + } + // Do the alpha test. Pixels with an alpha value of 0 are rejected. passMask8 = _mm_andnot_si128(_mm_cmpeq_epi8(srcAlpha, _mm_setzero_si128()), passMask8); @@ -5535,23 +5507,15 @@ #ifdef ENABLE_SSE2 #pragma LOOPVECTORIZE_DISABLE #endif - - for (; dstX < compState.lineWidthCustom; dstX++, srcLinePtr++, compState.xCustom++, compState.xNative = _gpuDstToSrcIndex[compState.xCustom], compState.lineColorTarget16++, compState.lineColorTarget32++, compState.lineLayerIDTarget++) + for (; compState.xCustom < compState.lineWidthCustom; srcLinePtr++, compState.xCustom++, compState.xNative = _gpuDstToSrcIndex[compState.xCustom], compState.lineColorTarget16++, compState.lineColorTarget32++, compState.lineLayerIDTarget++) { - if (srcLinePtr->a == 0) + if ( (srcLinePtr->a == 0) || (!NOWINDOWSENABLEDHINT && (this->_didPassWindowTestCustom[compState.selectedLayerID][compState.xCustom] != 0)) ) { continue; } - bool didPassWindowTest; - bool enableColorEffect; - this->_RenderPixel_CheckWindows(compState, didPassWindowTest, enableColorEffect); + const bool enableColorEffect = (NOWINDOWSENABLEDHINT) ? true : (this->_enableColorEffectCustom[compState.selectedLayerID][compState.xCustom] != 0); - if (!didPassWindowTest) - { - continue; - } - this->_RenderPixel3D<OUTPUTFORMAT>(compState, enableColorEffect, *srcLinePtr); @@ -5562,30 +5526,27 @@ { for (size_t line = 0; line < compState.lineRenderCount; line++) { - for (size_t dstX = 0; dstX < compState.lineWidthCustom; dstX++, compState.xCustom++, compState.lineColorTarget16++, compState.lineColorTarget32++, compState.lineLayerIDTarget++) + for (compState.xNative = 0, compState.xCustom = 0; compState.xCustom < compState.lineWidthCustom; compState.xCustom++, compState.xNative = _gpuDstToSrcIndex[compState.xCustom], compState.lineColorTarget16++, compState.lineColorTarget32++, compState.lineLayerIDTarget++) { - size_t srcX = dstX + hofs; + if ( !NOWINDOWSENABLEDHINT && (this->_didPassWindowTestCustom[compState.selectedLayerID][compState.xCustom] == 0) ) + { + continue; + } + + size_t srcX = compState.xCustom + hofs; if (srcX >= compState.lineWidthCustom * 2) { srcX -= compState.lineWidthCustom * 2; } - compState.xNative = _gpuDstToSrcIndex[compState.xCustom]; - - if (srcX >= compState.lineWidthCustom || srcLinePtr[srcX].a == 0) + if ( (srcX >= compState.lineWidthCustom) || (srcLinePtr[srcX].a == 0) ) { continue; } - bool didPassWindowTest; - bool enableColorEffect; - this->_RenderPixel_CheckWindows(compState, didPassWindowTest, enableColorEffect); + compState.xNative = _gpuDstToSrcIndex[compState.xCustom]; + const bool enableColorEffect = (NOWINDOWSENABLEDHINT) ? true : (this->_enableColorEffectCustom[compState.selectedLayerID][compState.xCustom] != 0); - if (!didPassWindowTest) - { - continue; - } - this->_RenderPixel3D<OUTPUTFORMAT>(compState, enableColorEffect, srcLinePtr[srcX]); @@ -6781,10 +6742,20 @@ break; case GPUDisplayMode_Normal: // Display BG and OBJ layers - this->_RenderLine_Layers<OUTPUTFORMAT>(l); + { + if (this->_isAnyWindowEnabled) + { + this->_RenderLine_Layers<OUTPUTFORMAT, false>(l); + } + else + { + this->_RenderLine_Layers<OUTPUTFORMAT, true>(l); + } + this->_HandleDisplayModeNormal<OUTPUTFORMAT>(l); break; - + } + default: break; } @@ -6875,10 +6846,12 @@ free_aligned(_gpuDstToSrcIndex); _gpuDstToSrcIndex = NULL; - free_aligned(_gpuDstToSrcSSSE3_u8); - _gpuDstToSrcSSSE3_u8 = NULL; - free_aligned(_gpuDstToSrcSSSE3_u16); - _gpuDstToSrcSSSE3_u16 = NULL; + free_aligned(_gpuDstToSrcSSSE3_u8_8e); + _gpuDstToSrcSSSE3_u8_8e = NULL; + free_aligned(_gpuDstToSrcSSSE3_u8_16e); + _gpuDstToSrcSSSE3_u8_16e = NULL; + free_aligned(_gpuDstToSrcSSSE3_u16_8e); + _gpuDstToSrcSSSE3_u16_8e = NULL; delete _displayMain; delete _displayTouch; @@ -7077,8 +7050,9 @@ const float newGpuLargestDstLineCount = (size_t)ceilf(customHeightScale); u16 *oldGpuDstToSrcIndexPtr = _gpuDstToSrcIndex; - u8 *oldGpuDstToSrcSSSE3_u8 = _gpuDstToSrcSSSE3_u8; - u8 *oldGpuDstToSrcSSSE3_u16 = _gpuDstToSrcSSSE3_u16; + u8 *oldGpuDstToSrcSSSE3_u8_8e = _gpuDstToSrcSSSE3_u8_8e; + u8 *oldGpuDstToSrcSSSE3_u8_16e = _gpuDstToSrcSSSE3_u8_16e; + u8 *oldGpuDstToSrcSSSE3_u16_8e = _gpuDstToSrcSSSE3_u16_8e; for (size_t srcX = 0, currentPitchCount = 0; srcX < GPU_FRAMEBUFFER_NATIVE_WIDTH; srcX++) { @@ -7130,24 +7104,29 @@ dstIdx += (w * (_gpuDstLineCount[y] - 1)); } - u8 *newGpuDstToSrcSSSE3_u8 = (u8 *)malloc_alignedCacheLine(w * sizeof(u8)); - u8 *newGpuDstToSrcSSSE3_u16 = (u8 *)malloc_alignedCacheLine(w * 2 * sizeof(u8)); + u8 *newGpuDstToSrcSSSE3_u8_8e = (u8 *)malloc_alignedCacheLine(w * sizeof(u8)); + u8 *newGpuDstToSrcSSSE3_u8_16e = (u8 *)malloc_alignedCacheLine(w * sizeof(u8)); + u8 *newGpuDstToSrcSSSE3_u16_8e = (u8 *)malloc_alignedCacheLine(w * sizeof(u16)); for (size_t i = 0; i < w; i++) { - const u8 value_u8 = newGpuDstToSrcIndex[i] & 0x0007; - const u8 value_u16 = (value_u8 << 1); + const u8 value_u8_8 = newGpuDstToSrcIndex[i] & 0x07; + const u8 value_u8_16 = newGpuDstToSrcIndex[i] & 0x0F; + const u8 value_u16 = (value_u8_8 << 1); - newGpuDstToSrcSSSE3_u8[i] = value_u8; - newGpuDstToSrcSSSE3_u16[(i << 1)] = value_u16; - newGpuDstToSrcSSSE3_u16[(i << 1) + 1] = value_u16 + 1; + newGpuDstToSrcSSSE3_u8_8e[i] = value_u8_8; + newGpuDstToSrcSSSE3... [truncated message content] |
From: <ze...@us...> - 2016-07-26 17:17:39
|
Revision: 5514 http://sourceforge.net/p/desmume/code/5514 Author: zeromus Date: 2016-07-26 17:17:36 +0000 (Tue, 26 Jul 2016) Log Message: ----------- apply suggestions from #1570 Modified Paths: -------------- trunk/desmume/src/MMU_timing.h trunk/desmume/src/ctrlssdl.cpp Modified: trunk/desmume/src/MMU_timing.h =================================================================== --- trunk/desmume/src/MMU_timing.h 2016-07-26 17:15:27 UTC (rev 5513) +++ trunk/desmume/src/MMU_timing.h 2016-07-26 17:17:36 UTC (rev 5514) @@ -155,8 +155,8 @@ enum { ASSOCIATIVITY = 1 << ASSOCIATIVESHIFT }; enum { BLOCKSIZE = 1 << BLOCKSIZESHIFT }; enum { TAGSHIFT = SIZESHIFT - ASSOCIATIVESHIFT }; - enum { TAGMASK = (u32)(~0 << TAGSHIFT) }; - enum { BLOCKMASK = ((u32)~0 >> (32 - TAGSHIFT)) & (u32)(~0 << BLOCKSIZESHIFT) }; + enum { TAGMASK = (u32)(~0U << TAGSHIFT) }; + enum { BLOCKMASK = ((u32)~0U >> (32 - TAGSHIFT)) & (u32)(~0U << BLOCKSIZESHIFT) }; enum { WORDSIZE = sizeof(u32) }; enum { WORDSPERBLOCK = (1 << BLOCKSIZESHIFT) / WORDSIZE }; enum { DATAPERWORD = WORDSIZE * ASSOCIATIVITY }; Modified: trunk/desmume/src/ctrlssdl.cpp =================================================================== --- trunk/desmume/src/ctrlssdl.cpp 2016-07-26 17:15:27 UTC (rev 5513) +++ trunk/desmume/src/ctrlssdl.cpp 2016-07-26 17:17:36 UTC (rev 5514) @@ -200,7 +200,7 @@ break; case SDL_JOYAXISMOTION: /* Dead zone of 50% */ - if( (abs(event.jaxis.value) >> 14) != 0 ) + if( ((u32)abs(event.jaxis.value) >> 14) != 0 ) { key = ((event.jaxis.which & 15) << 12) | JOY_AXIS << 8 | ((event.jaxis.axis & 127) << 1); if (event.jaxis.value > 0) { @@ -370,7 +370,7 @@ Note: button constants have a 1bit offset. */ case SDL_JOYAXISMOTION: key_code = ((event->jaxis.which & 15) << 12) | JOY_AXIS << 8 | ((event->jaxis.axis & 127) << 1); - if( (abs(event->jaxis.value) >> 14) != 0 ) + if( (u32)(abs(event->jaxis.value) >> 14) != 0 ) { if (event->jaxis.value > 0) key_code |= 1; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ze...@us...> - 2016-07-26 17:15:29
|
Revision: 5513 http://sourceforge.net/p/desmume/code/5513 Author: zeromus Date: 2016-07-26 17:15:27 +0000 (Tue, 26 Jul 2016) Log Message: ----------- fix infinite loop bugs in commandline parsing Modified Paths: -------------- trunk/desmume/src/commandline.cpp Modified: trunk/desmume/src/commandline.cpp =================================================================== --- trunk/desmume/src/commandline.cpp 2016-07-22 19:30:24 UTC (rev 5512) +++ trunk/desmume/src/commandline.cpp 2016-07-26 17:15:27 UTC (rev 5513) @@ -254,7 +254,8 @@ }; int c = getopt_long(argc,argv,"",long_options,&option_index); - if(c == -1) + if(c == -1) break; + if(c == '?') break; switch(c) @@ -361,8 +362,7 @@ int remain = argc-optind; if(remain==1) nds_file = argv[optind]; - else if(remain>1) - return false; + else if(remain>1) return false; return true; } @@ -443,9 +443,7 @@ void CommandLine::errorHelp(const char* binName) { - //TODO - strip this down to just the filename - printerror("USAGE: %s [options] [nds-file]\n", binName); - printerror("USAGE: %s --help - for help\n", binName); + printerror(help_string); } void CommandLine::process_movieCommands() This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-07-22 19:30:27
|
Revision: 5512 http://sourceforge.net/p/desmume/code/5512 Author: rogerman Date: 2016-07-22 19:30:24 +0000 (Fri, 22 Jul 2016) Log Message: ----------- GPU: - Use a GPUEngineCompositorInfo struct for managing compositor states, instead of using individual variables for everything. Modified Paths: -------------- trunk/desmume/src/GPU.cpp trunk/desmume/src/GPU.h Modified: trunk/desmume/src/GPU.cpp =================================================================== --- trunk/desmume/src/GPU.cpp 2016-07-22 18:32:31 UTC (rev 5511) +++ trunk/desmume/src/GPU.cpp 2016-07-22 19:30:24 UTC (rev 5512) @@ -110,8 +110,12 @@ }; //instantiate static instance -u16 GPUEngineBase::_fadeInColors[17][0x8000]; -u16 GPUEngineBase::_fadeOutColors[17][0x8000]; +u16 GPUEngineBase::_brightnessUpTable555[17][0x8000]; +FragmentColor GPUEngineBase::_brightnessUpTable666[17][0x8000]; +FragmentColor GPUEngineBase::_brightnessUpTable888[17][0x8000]; +u16 GPUEngineBase::_brightnessDownTable555[17][0x8000]; +FragmentColor GPUEngineBase::_brightnessDownTable666[17][0x8000]; +FragmentColor GPUEngineBase::_brightnessDownTable888[17][0x8000]; u8 GPUEngineBase::_blendTable555[17][17][32][32]; GPUEngineBase::MosaicLookup GPUEngineBase::_mosaicLookup; @@ -318,14 +322,18 @@ cur.bits.green = (cur.bits.green + ((31 - cur.bits.green) * i / 16)); cur.bits.blue = (cur.bits.blue + ((31 - cur.bits.blue) * i / 16)); cur.bits.alpha = 0; - GPUEngineBase::_fadeInColors[i][j] = cur.val; + GPUEngineBase::_brightnessUpTable555[i][j] = cur.val; + GPUEngineBase::_brightnessUpTable666[i][j].color = COLOR555TO666(cur.val); + GPUEngineBase::_brightnessUpTable888[i][j].color = COLOR555TO888(cur.val); cur.val = j; cur.bits.red = (cur.bits.red - (cur.bits.red * i / 16)); cur.bits.green = (cur.bits.green - (cur.bits.green * i / 16)); cur.bits.blue = (cur.bits.blue - (cur.bits.blue * i / 16)); cur.bits.alpha = 0; - GPUEngineBase::_fadeOutColors[i][j] = cur.val; + GPUEngineBase::_brightnessDownTable555[i][j] = cur.val; + GPUEngineBase::_brightnessDownTable666[i][j].color = COLOR555TO666(cur.val); + GPUEngineBase::_brightnessDownTable888[i][j].color = COLOR555TO888(cur.val); } } @@ -510,9 +518,13 @@ this->_BLDALPHA_EVA = 0; this->_BLDALPHA_EVB = 0; this->_BLDALPHA_EVY = 0; - this->_blendTable = (TBlendTable *)&GPUEngineBase::_blendTable555[this->_BLDALPHA_EVA][this->_BLDALPHA_EVB][0][0]; - this->_currentFadeInColors = &GPUEngineBase::_fadeInColors[this->_BLDALPHA_EVY][0]; - this->_currentFadeOutColors = &GPUEngineBase::_fadeOutColors[this->_BLDALPHA_EVY][0]; + this->_selectedBlendTable555 = (TBlendTable *)&GPUEngineBase::_blendTable555[this->_BLDALPHA_EVA][this->_BLDALPHA_EVB][0][0]; + this->_selectedBrightnessUpTable555 = &GPUEngineBase::_brightnessUpTable555[this->_BLDALPHA_EVY][0]; + this->_selectedBrightnessUpTable666 = &GPUEngineBase::_brightnessUpTable666[this->_BLDALPHA_EVY][0]; + this->_selectedBrightnessUpTable888 = &GPUEngineBase::_brightnessUpTable888[this->_BLDALPHA_EVY][0]; + this->_selectedBrightnessDownTable555 = &GPUEngineBase::_brightnessDownTable555[this->_BLDALPHA_EVY][0]; + this->_selectedBrightnessDownTable666 = &GPUEngineBase::_brightnessDownTable666[this->_BLDALPHA_EVY][0]; + this->_selectedBrightnessDownTable888 = &GPUEngineBase::_brightnessDownTable888[this->_BLDALPHA_EVY][0]; this->_srcBlendEnable[GPULayerID_BG0] = false; this->_srcBlendEnable[GPULayerID_BG1] = false; @@ -617,6 +629,44 @@ this->renderedWidth = GPU_FRAMEBUFFER_NATIVE_WIDTH; this->renderedHeight = GPU_FRAMEBUFFER_NATIVE_HEIGHT; this->renderedBuffer = this->nativeBuffer; + + GPUEngineCompositorInfo &compState = this->_currentCompositorState; + compState.lineIndexNative = 0; + compState.lineIndexCustom = 0; + compState.lineWidthCustom = GPU_FRAMEBUFFER_NATIVE_WIDTH; + compState.lineRenderCount = 1; + compState.linePixelCount = compState.lineWidthCustom * compState.lineRenderCount; + compState.blockOffsetNative = compState.lineIndexNative * GPU_FRAMEBUFFER_NATIVE_WIDTH; + compState.blockOffsetCustom = compState.lineIndexCustom * compState.lineWidthCustom; + + compState.selectedLayerID = GPULayerID_BG0; + compState.selectedBGLayer = &this->_BGLayer[GPULayerID_BG0]; + compState.backdropColor16 = LE_TO_LOCAL_16(this->_paletteBG[0]) & 0x7FFF; + compState.colorEffect = (ColorEffect)this->_IORegisterMap->BLDCNT.ColorEffect; + compState.blendEVA = this->_BLDALPHA_EVA; + compState.blendEVB = this->_BLDALPHA_EVB; + compState.blendEVY = this->_BLDALPHA_EVY; + compState.blendTable555 = this->_selectedBlendTable555; + compState.brightnessUpTable555 = this->_selectedBrightnessUpTable555; + compState.brightnessUpTable666 = this->_selectedBrightnessUpTable666; + compState.brightnessUpTable888 = this->_selectedBrightnessUpTable888; + compState.brightnessDownTable555 = this->_selectedBrightnessDownTable555; + compState.brightnessDownTable666 = this->_selectedBrightnessDownTable666; + compState.brightnessDownTable888 = this->_selectedBrightnessDownTable888; + + compState.lineColorHeadNative = this->_internalRenderLineTargetNative; + compState.lineColorHeadCustom = this->_internalRenderLineTargetCustom; + compState.lineColorHead = compState.lineColorHeadNative; + compState.lineLayerIDHeadNative = this->_renderLineLayerIDNative; + compState.lineLayerIDHeadCustom = this->_renderLineLayerIDCustom; + compState.lineLayerIDHead = compState.lineLayerIDHeadNative; + + compState.xNative = 0; + compState.xCustom = 0; + compState.lineColorTarget = (void **)&compState.lineColorTarget16; + compState.lineColorTarget16 = (u16 *)compState.lineColorHeadNative; + compState.lineColorTarget32 = (FragmentColor *)compState.lineColorHeadNative; + compState.lineLayerIDTarget = compState.lineLayerIDHead; } void GPUEngineBase::Reset() @@ -773,11 +823,6 @@ return blendedColor; } -FORCEINLINE u16 GPUEngineBase::_ColorEffectIncreaseBrightness(const u16 col) -{ - return this->_currentFadeInColors[col]; -} - FORCEINLINE u16 GPUEngineBase::_ColorEffectIncreaseBrightness(const u16 col, const u16 blendEVY) { u16 r = col & 0x001F; @@ -817,11 +862,6 @@ return newColor; } -FORCEINLINE u16 GPUEngineBase::_ColorEffectDecreaseBrightness(const u16 col) -{ - return this->_currentFadeOutColors[col]; -} - FORCEINLINE u16 GPUEngineBase::_ColorEffectDecreaseBrightness(const u16 col, const u16 blendEVY) { u16 r = col & 0x001F; @@ -1304,37 +1344,35 @@ } template <NDSColorFormat OUTPUTFORMAT> -void GPUEngineBase::_RenderLine_Clear(const u16 clearColor, const u16 l, void *renderLineTarget) +void GPUEngineBase::_RenderLine_Clear(GPUEngineCompositorInfo &compState) { // Clear the current line with the clear color - const IOREG_BLDCNT &BLDCNT = this->_IORegisterMap->BLDCNT; + u16 dstClearColor16 = compState.backdropColor16; - u16 dstClearColor16 = clearColor; - if (this->_srcBlendEnable[GPULayerID_Backdrop]) { - if (BLDCNT.ColorEffect == ColorEffect_IncreaseBrightness) + if (compState.colorEffect == ColorEffect_IncreaseBrightness) { - dstClearColor16 = this->_currentFadeInColors[clearColor]; + dstClearColor16 = compState.brightnessUpTable555[compState.backdropColor16]; } - else if (BLDCNT.ColorEffect == ColorEffect_DecreaseBrightness) + else if (compState.colorEffect == ColorEffect_DecreaseBrightness) { - dstClearColor16 = this->_currentFadeOutColors[clearColor]; + dstClearColor16 = compState.brightnessDownTable555[compState.backdropColor16]; } } switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: - memset_u16_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>(renderLineTarget, dstClearColor16); + memset_u16_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>(compState.lineColorTarget16, dstClearColor16); break; case NDSColorFormat_BGR666_Rev: - memset_u32_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>(renderLineTarget, COLOR555TO666(dstClearColor16)); + memset_u32_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>(compState.lineColorTarget32, COLOR555TO666(dstClearColor16)); break; case NDSColorFormat_BGR888_Rev: - memset_u32_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>(renderLineTarget, COLOR555TO888(dstClearColor16)); + memset_u32_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>(compState.lineColorTarget32, COLOR555TO888(dstClearColor16)); break; } @@ -1352,6 +1390,9 @@ template <NDSColorFormat OUTPUTFORMAT> void GPUEngineBase::RenderLine(const u16 l) { + this->_currentCompositorState.lineIndexNative = l; + this->_currentCompositorState.lineIndexCustom = _gpuDstLineIndex[l]; + // By default, do nothing. this->UpdatePropertiesWithoutRender(l); } @@ -1594,7 +1635,7 @@ // ROUTINES FOR INSIDE / OUTSIDE WINDOW CHECKS /*****************************************************************************/ -FORCEINLINE void GPUEngineBase::_RenderPixel_CheckWindows(const size_t srcX, const GPULayerID srcLayerID, bool &didPassWindowTest, bool &enableColorEffect) const +FORCEINLINE void GPUEngineBase::_RenderPixel_CheckWindows(GPUEngineCompositorInfo &compState, bool &didPassWindowTest, bool &enableColorEffect) const { didPassWindowTest = true; enableColorEffect = true; @@ -1609,10 +1650,10 @@ // Window 0 has the highest priority, so always check this first. if (this->_WIN0_ENABLED) { - if (this->_curr_win[0][srcX] == 1) + if (this->_curr_win[0][compState.xNative] == 1) { //INFO("bg%i passed win0 : (%i %i) was within (%i %i)(%i %i)\n", bgnum, x, gpu->_currentScanline, gpu->WIN0H0, gpu->WIN0V0, gpu->WIN0H1, gpu->WIN0V1); - didPassWindowTest = this->_WIN0_enable[srcLayerID]; + didPassWindowTest = this->_WIN0_enable[compState.selectedLayerID]; enableColorEffect = this->_WIN0_enable[WINDOWCONTROL_EFFECTFLAG]; return; } @@ -1621,10 +1662,10 @@ // Window 1 has medium priority, and is checked after Window 0. if (this->_WIN1_ENABLED) { - if (this->_curr_win[1][srcX] == 1) + if (this->_curr_win[1][compState.xNative] == 1) { //INFO("bg%i passed win1 : (%i %i) was within (%i %i)(%i %i)\n", bgnum, x, gpu->_currentScanline, gpu->WIN1H0, gpu->WIN1V0, gpu->WIN1H1, gpu->WIN1V1); - didPassWindowTest = this->_WIN1_enable[srcLayerID]; + didPassWindowTest = this->_WIN1_enable[compState.selectedLayerID]; enableColorEffect = this->_WIN1_enable[WINDOWCONTROL_EFFECTFLAG]; return; } @@ -1633,9 +1674,9 @@ // Window OBJ has low priority, and is checked after both Window 0 and Window 1. if (this->_WINOBJ_ENABLED) { - if (this->_sprWin[srcX] == 1) + if (this->_sprWin[compState.xNative] == 1) { - didPassWindowTest = this->_WINOBJ_enable[srcLayerID]; + didPassWindowTest = this->_WINOBJ_enable[compState.selectedLayerID]; enableColorEffect = this->_WINOBJ_enable[WINDOWCONTROL_EFFECTFLAG]; return; } @@ -1643,14 +1684,14 @@ // If the pixel isn't inside any windows, then the pixel is outside, and therefore uses the WINOUT flags. // This has the lowest priority, and is always checked last. - didPassWindowTest = this->_WINOUT_enable[srcLayerID]; + didPassWindowTest = this->_WINOUT_enable[compState.selectedLayerID]; enableColorEffect = this->_WINOUT_enable[WINDOWCONTROL_EFFECTFLAG]; } #ifdef ENABLE_SSE2 template <bool ISCUSTOMRENDERINGNEEDED> -FORCEINLINE void GPUEngineBase::_RenderPixel_CheckWindows16_SSE2(const size_t dstX, const GPULayerID srcLayerID, __m128i &didPassWindowTest, __m128i &enableColorEffect) const +FORCEINLINE void GPUEngineBase::_RenderPixel_CheckWindows16_SSE2(GPUEngineCompositorInfo &compState, const size_t dstX, __m128i &didPassWindowTest, __m128i &enableColorEffect) const { didPassWindowTest = _mm_set1_epi8(0xFF); enableColorEffect = _mm_set1_epi8(0xFF); @@ -1699,7 +1740,7 @@ win0HandledMask = _mm_cmpeq_epi8(win_vec128, _mm_set1_epi8(1)); } - didPassWindowTest = _mm_and_si128(win0HandledMask, this->_WIN0_enable_SSE2[srcLayerID]); + didPassWindowTest = _mm_and_si128(win0HandledMask, this->_WIN0_enable_SSE2[compState.selectedLayerID]); enableColorEffect = _mm_and_si128(win0HandledMask, this->_WIN0_enable_SSE2[WINDOWCONTROL_EFFECTFLAG]); } else @@ -1734,7 +1775,7 @@ win1HandledMask = _mm_andnot_si128(win0HandledMask, _mm_cmpeq_epi8(win_vec128, _mm_set1_epi8(1))); } - didPassWindowTest = _mm_or_si128( didPassWindowTest, _mm_and_si128(win1HandledMask, this->_WIN1_enable_SSE2[srcLayerID]) ); + didPassWindowTest = _mm_or_si128( didPassWindowTest, _mm_and_si128(win1HandledMask, this->_WIN1_enable_SSE2[compState.selectedLayerID]) ); enableColorEffect = _mm_or_si128( enableColorEffect, _mm_and_si128(win1HandledMask, this->_WIN1_enable_SSE2[WINDOWCONTROL_EFFECTFLAG]) ); } @@ -1766,14 +1807,14 @@ } winOBJHandledMask = _mm_andnot_si128( _mm_or_si128(win0HandledMask, win1HandledMask), _mm_cmpeq_epi8(win_vec128, _mm_set1_epi8(1)) ); - didPassWindowTest = _mm_or_si128( didPassWindowTest, _mm_and_si128(winOBJHandledMask, this->_WINOBJ_enable_SSE2[srcLayerID]) ); + didPassWindowTest = _mm_or_si128( didPassWindowTest, _mm_and_si128(winOBJHandledMask, this->_WINOBJ_enable_SSE2[compState.selectedLayerID]) ); enableColorEffect = _mm_or_si128( enableColorEffect, _mm_and_si128(winOBJHandledMask, this->_WINOBJ_enable_SSE2[WINDOWCONTROL_EFFECTFLAG]) ); } // If the pixel isn't inside any windows, then the pixel is outside, and therefore uses the WINOUT flags. // This has the lowest priority, and is always checked last. winOUTHandledMask = _mm_xor_si128( _mm_or_si128(win0HandledMask, _mm_or_si128(win1HandledMask, winOBJHandledMask)), _mm_set1_epi32(0xFFFFFFFF) ); - didPassWindowTest = _mm_or_si128( didPassWindowTest, _mm_and_si128(winOUTHandledMask, this->_WINOUT_enable_SSE2[srcLayerID]) ); + didPassWindowTest = _mm_or_si128( didPassWindowTest, _mm_and_si128(winOUTHandledMask, this->_WINOUT_enable_SSE2[compState.selectedLayerID]) ); enableColorEffect = _mm_or_si128( enableColorEffect, _mm_and_si128(winOUTHandledMask, this->_WINOUT_enable_SSE2[WINDOWCONTROL_EFFECTFLAG]) ); } @@ -1783,10 +1824,11 @@ // PIXEL RENDERING /*****************************************************************************/ template <NDSColorFormat OUTPUTFORMAT, bool ISSRCLAYEROBJ, bool ISDEBUGRENDER, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> -FORCEINLINE void GPUEngineBase::_RenderPixel(const size_t srcX, const u16 srcColor16, const u8 srcAlpha, const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerIDLine) +FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compState, const u16 srcColor16, const u8 srcAlpha) { - u16 &dstColor16 = *(u16 *)dstColorLine; - FragmentColor &dstColor32 = *(FragmentColor *)dstColorLine; + u16 &dstColor16 = *compState.lineColorTarget16; + FragmentColor &dstColor32 = *compState.lineColorTarget32; + u8 &dstLayerID = *compState.lineLayerIDTarget; if (ISDEBUGRENDER) { @@ -1807,7 +1849,6 @@ break; } - *dstLayerIDLine = srcLayerID; return; } @@ -1816,7 +1857,7 @@ if (!NOWINDOWSENABLEDHINT) { bool didPassWindowTest; - this->_RenderPixel_CheckWindows(srcX, srcLayerID, didPassWindowTest, enableColorEffect); + this->_RenderPixel_CheckWindows(compState, didPassWindowTest, enableColorEffect); if (!didPassWindowTest) { @@ -1841,20 +1882,18 @@ break; } - *dstLayerIDLine = srcLayerID; + dstLayerID = compState.selectedLayerID; return; } ColorEffect selectedEffect = ColorEffect_Disable; - TBlendTable *selectedBlendTable = this->_blendTable; - u8 blendEVA = this->_BLDALPHA_EVA; - u8 blendEVB = this->_BLDALPHA_EVB; + TBlendTable *selectedBlendTable = compState.blendTable555; + u8 blendEVA = compState.blendEVA; + u8 blendEVB = compState.blendEVB; if (enableColorEffect) { - const IOREG_BLDCNT &BLDCNT = this->_IORegisterMap->BLDCNT; - const GPULayerID dstLayerID = (GPULayerID)*dstLayerIDLine; - const bool dstEffectEnable = (dstLayerID != srcLayerID) && this->_dstBlendEnable[dstLayerID]; + const bool dstEffectEnable = (dstLayerID != compState.selectedLayerID) && this->_dstBlendEnable[dstLayerID]; // Select the color effect based on the BLDCNT target flags. bool forceBlendEffect = false; @@ -1862,7 +1901,7 @@ if (ISSRCLAYEROBJ) { //translucent-capable OBJ are forcing the function to blend when the second target is satisfied - const OBJMode objMode = (OBJMode)this->_sprType[srcX]; + const OBJMode objMode = (OBJMode)this->_sprType[compState.xNative]; const bool isObjTranslucentType = (objMode == OBJMode_Transparent) || (objMode == OBJMode_Bitmap); if (isObjTranslucentType && dstEffectEnable) { @@ -1883,14 +1922,14 @@ { selectedEffect = ColorEffect_Blend; } - else if (this->_srcBlendEnable[srcLayerID]) + else if (this->_srcBlendEnable[compState.selectedLayerID]) { - switch ((ColorEffect)BLDCNT.ColorEffect) + switch (compState.colorEffect) { // For the Blend effect, both first and second target flags must be checked. case ColorEffect_Blend: { - if (dstEffectEnable) selectedEffect = (ColorEffect)BLDCNT.ColorEffect; + if (dstEffectEnable) selectedEffect = compState.colorEffect; break; } @@ -1898,7 +1937,7 @@ // Test case: Bomberman Land Touch! dialog boxes will render too dark without this check. case ColorEffect_IncreaseBrightness: case ColorEffect_DecreaseBrightness: - selectedEffect = (ColorEffect)BLDCNT.ColorEffect; + selectedEffect = compState.colorEffect; break; default: @@ -1915,7 +1954,8 @@ switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: - dstColor16 = srcColor16 | 0x8000; + dstColor16 = srcColor16; + dstColor16 |= 0x8000; break; case NDSColorFormat_BGR666_Rev: @@ -1931,20 +1971,21 @@ case ColorEffect_IncreaseBrightness: { - const u16 finalDstColor16 = this->_ColorEffectIncreaseBrightness(srcColor16 & 0x7FFF); - switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: - dstColor16 = finalDstColor16 | 0x8000; + dstColor16 = compState.brightnessUpTable555[srcColor16 & 0x7FFF]; + dstColor16 |= 0x8000; break; case NDSColorFormat_BGR666_Rev: - dstColor32.color = ConvertColor555To6665Opaque<false>(finalDstColor16); + dstColor32 = compState.brightnessUpTable666[srcColor16 & 0x7FFF]; + dstColor32.a = 0x1F; break; case NDSColorFormat_BGR888_Rev: - dstColor32.color = ConvertColor555To8888Opaque<false>(finalDstColor16); + dstColor32 = compState.brightnessUpTable888[srcColor16 & 0x7FFF]; + dstColor32.a = 0xFF; break; } break; @@ -1952,20 +1993,21 @@ case ColorEffect_DecreaseBrightness: { - const u16 finalDstColor16 = this->_ColorEffectDecreaseBrightness(srcColor16 & 0x7FFF); - switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: - dstColor16 = finalDstColor16 | 0x8000; + dstColor16 = compState.brightnessDownTable555[srcColor16 & 0x7FFF]; + dstColor16 |= 0x8000; break; case NDSColorFormat_BGR666_Rev: - dstColor32.color = ConvertColor555To6665Opaque<false>(finalDstColor16); + dstColor32 = compState.brightnessDownTable666[srcColor16 & 0x7FFF]; + dstColor32.a = 0x1F; break; case NDSColorFormat_BGR888_Rev: - dstColor32.color = ConvertColor555To8888Opaque<false>(finalDstColor16); + dstColor32 = compState.brightnessDownTable888[srcColor16 & 0x7FFF]; + dstColor32.a = 0xFF; break; } break; @@ -1974,7 +2016,6 @@ case ColorEffect_Blend: { FragmentColor srcColor32; - srcColor32.color = ConvertColor555To6665Opaque<false>(srcColor16); switch (OUTPUTFORMAT) { @@ -1984,11 +2025,13 @@ break; case NDSColorFormat_BGR666_Rev: + srcColor32.color = ConvertColor555To6665Opaque<false>(srcColor16); dstColor32 = this->_ColorEffectBlend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB); dstColor32.a = 0x1F; break; case NDSColorFormat_BGR888_Rev: + srcColor32.color = ConvertColor555To8888Opaque<false>(srcColor16); dstColor32 = this->_ColorEffectBlend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB); dstColor32.a = 0xFF; break; @@ -1997,15 +2040,14 @@ } } - *dstLayerIDLine = srcLayerID; + dstLayerID = compState.selectedLayerID; } #ifdef ENABLE_SSE2 template <NDSColorFormat OUTPUTFORMAT, bool ISSRCLAYEROBJ, bool ISDEBUGRENDER, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> -FORCEINLINE void GPUEngineBase::_RenderPixel16_SSE2(const size_t dstX, - const ColorEffect colorEffect, - const GPULayerID srcLayerID, +FORCEINLINE void GPUEngineBase::_RenderPixel16_SSE2(GPUEngineCompositorInfo &compState, + const size_t dstX, const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0, const __m128i &srcAlpha, const __m128i &srcEffectEnableMask, @@ -2013,7 +2055,7 @@ __m128i &dstLayerID, __m128i &passMask8) { - const __m128i srcLayerID_vec128 = _mm_set1_epi8(srcLayerID); + const __m128i srcLayerID_vec128 = _mm_set1_epi8(compState.selectedLayerID); __m128i passMask16[2] = { _mm_unpacklo_epi8(passMask8, passMask8), _mm_unpackhi_epi8(passMask8, passMask8) }; @@ -2041,7 +2083,6 @@ dst3 = _mm_blendv_epi8(dst3, _mm_or_si128(src3, alphaBits), passMask32[3]); } - dstLayerID = _mm_blendv_epi8(dstLayerID, srcLayerID_vec128, passMask8); return; } @@ -2051,7 +2092,7 @@ { // Do the window test. __m128i didPassWindowTest; - this->_RenderPixel_CheckWindows16_SSE2<ISCUSTOMRENDERINGNEEDED>(dstX, srcLayerID, didPassWindowTest, enableColorEffectMask); + this->_RenderPixel_CheckWindows16_SSE2<ISCUSTOMRENDERINGNEEDED>(compState, dstX, didPassWindowTest, enableColorEffectMask); passMask8 = _mm_and_si128(passMask8, didPassWindowTest); passMask16[0] = _mm_unpacklo_epi8(passMask8, passMask8); @@ -2097,11 +2138,11 @@ // Select the color effect based on the BLDCNT target flags. __m128i forceBlendEffectMask = _mm_setzero_si128(); - const __m128i colorEffect_vec128 = (NOWINDOWSENABLEDHINT) ? _mm_set1_epi8(colorEffect) : _mm_blendv_epi8(_mm_set1_epi8(ColorEffect_Disable), _mm_set1_epi8(colorEffect), enableColorEffectMask); + const __m128i colorEffect_vec128 = (NOWINDOWSENABLEDHINT) ? _mm_set1_epi8(compState.colorEffect) : _mm_blendv_epi8(_mm_set1_epi8(ColorEffect_Disable), _mm_set1_epi8(compState.colorEffect), enableColorEffectMask); - __m128i eva_vec128 = _mm_set1_epi16(this->_BLDALPHA_EVA); - __m128i evb_vec128 = _mm_set1_epi16(this->_BLDALPHA_EVB); - const __m128i evy_vec128 = _mm_set1_epi16(this->_BLDALPHA_EVY); + __m128i eva_vec128 = _mm_set1_epi16(compState.blendEVA); + __m128i evb_vec128 = _mm_set1_epi16(compState.blendEVB); + const __m128i evy_vec128 = _mm_set1_epi16(compState.blendEVY); if (ISSRCLAYEROBJ) { @@ -2117,7 +2158,7 @@ __m128i tmpSrc[4] = {src0, src1, src2, src3}; - switch (colorEffect) + switch (compState.colorEffect) { case ColorEffect_IncreaseBrightness: { @@ -2230,14 +2271,16 @@ // However, GPUEngineBase::_RenderPixel() takes source pixels in RGB555. In order to unify the methods, all pixels // must be processed in RGBA6665. template<NDSColorFormat OUTPUTFORMAT> -FORCEINLINE void GPUEngineBase::_RenderPixel3D(const bool enableColorEffect, const FragmentColor srcColor32, void *__restrict dstColorLine, u8 *__restrict dstLayerIDLine) +FORCEINLINE void GPUEngineBase::_RenderPixel3D(GPUEngineCompositorInfo &compState, const bool enableColorEffect, const FragmentColor srcColor32) { + u16 &dstColor16 = *compState.lineColorTarget16; + FragmentColor &dstColor32 = *compState.lineColorTarget32; + u8 &dstLayerID = *compState.lineLayerIDTarget; ColorEffect selectedEffect = ColorEffect_Disable; if (enableColorEffect) { - const IOREG_BLDCNT &BLDCNT = this->_IORegisterMap->BLDCNT; - const bool dstEffectEnable = (*dstLayerIDLine != GPULayerID_BG0) && this->_dstBlendEnable[*dstLayerIDLine]; + const bool dstEffectEnable = (dstLayerID != GPULayerID_BG0) && this->_dstBlendEnable[dstLayerID]; // Select the color effect based on the BLDCNT target flags. bool forceBlendEffect = false; @@ -2253,12 +2296,12 @@ } else if (this->_srcBlendEnable[GPULayerID_BG0]) { - switch ((ColorEffect)BLDCNT.ColorEffect) + switch (compState.colorEffect) { // For the Blend effect, both first and second target flags must be checked. case ColorEffect_Blend: { - if (dstEffectEnable) selectedEffect = (ColorEffect)BLDCNT.ColorEffect; + if (dstEffectEnable) selectedEffect = compState.colorEffect; break; } @@ -2266,7 +2309,7 @@ // Test case: Bomberman Land Touch! dialog boxes will render too dark without this check. case ColorEffect_IncreaseBrightness: case ColorEffect_DecreaseBrightness: - selectedEffect = (ColorEffect)BLDCNT.ColorEffect; + selectedEffect = compState.colorEffect; break; default: @@ -2279,7 +2322,6 @@ if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) { const u16 srcColor16 = ConvertColor6665To5551<false>(srcColor32); - u16 &dstColor16 = *(u16 *)dstColorLine; switch (selectedEffect) { @@ -2288,11 +2330,11 @@ break; case ColorEffect_IncreaseBrightness: - dstColor16 = this->_ColorEffectIncreaseBrightness(srcColor16); + dstColor16 = compState.brightnessUpTable555[srcColor16]; break; case ColorEffect_DecreaseBrightness: - dstColor16 = this->_ColorEffectDecreaseBrightness(srcColor16); + dstColor16 = compState.brightnessDownTable555[srcColor16]; break; case ColorEffect_Blend: @@ -2304,8 +2346,6 @@ } else { - FragmentColor &dstColor32 = *(FragmentColor *)dstColorLine; - switch (selectedEffect) { case ColorEffect_Disable: @@ -2313,11 +2353,11 @@ break; case ColorEffect_IncreaseBrightness: - dstColor32 = this->_ColorEffectIncreaseBrightness<OUTPUTFORMAT>(srcColor32, this->_BLDALPHA_EVY); + dstColor32 = this->_ColorEffectIncreaseBrightness<OUTPUTFORMAT>(srcColor32, compState.blendEVY); break; case ColorEffect_DecreaseBrightness: - dstColor32 = this->_ColorEffectDecreaseBrightness(srcColor32, this->_BLDALPHA_EVY); + dstColor32 = this->_ColorEffectDecreaseBrightness(srcColor32, compState.blendEVY); break; case ColorEffect_Blend: @@ -2328,13 +2368,14 @@ dstColor32.a = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? 0xFF : 0x1F; } - *dstLayerIDLine = GPULayerID_BG0; + dstLayerID = GPULayerID_BG0; } #ifdef ENABLE_SSE2 template <NDSColorFormat OUTPUTFORMAT> -FORCEINLINE void GPUEngineBase::_RenderPixel3D_SSE2(const __m128i &passMask8, +FORCEINLINE void GPUEngineBase::_RenderPixel3D_SSE2(GPUEngineCompositorInfo &compState, + const __m128i &passMask8, const __m128i &enableColorEffectMask, const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0, __m128i &dst3, __m128i &dst2, __m128i &dst1, __m128i &dst0, @@ -2383,7 +2424,6 @@ tmpSrc[3] = src3; } - const IOREG_BLDCNT &BLDCNT = this->_IORegisterMap->BLDCNT; const __m128i srcEffectEnableMask = this->_srcBlendEnable_SSE2[GPULayerID_BG0]; __m128i dstEffectEnableMask; @@ -2402,11 +2442,11 @@ dstEffectEnableMask = _mm_andnot_si128( _mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG0)), dstEffectEnableMask ); // Select the color effect based on the BLDCNT target flags. - const __m128i colorEffect_vec128 = _mm_blendv_epi8(_mm_set1_epi8(ColorEffect_Disable), _mm_set1_epi8(BLDCNT.ColorEffect), enableColorEffectMask); + const __m128i colorEffect_vec128 = _mm_blendv_epi8(_mm_set1_epi8(ColorEffect_Disable), _mm_set1_epi8(compState.colorEffect), enableColorEffectMask); const __m128i forceBlendEffectMask = _mm_and_si128(enableColorEffectMask, dstEffectEnableMask); - const __m128i evy_vec128 = _mm_set1_epi16(this->_BLDALPHA_EVY); + const __m128i evy_vec128 = _mm_set1_epi16(compState.blendEVY); - switch (BLDCNT.ColorEffect) + switch (compState.colorEffect) { case ColorEffect_IncreaseBrightness: { @@ -2517,7 +2557,7 @@ //this is fantastically inaccurate. //we do the early return even though it reduces the resulting accuracy //because we need the speed, and because it is inaccurate anyway -void GPUEngineBase::_MosaicSpriteLinePixel(const size_t x, u16 l, u16 *__restrict dst, u8 *__restrict dst_alpha, u8 *__restrict typeTab, u8 *__restrict prioTab) +void GPUEngineBase::_MosaicSpriteLinePixel(GPUEngineCompositorInfo &compState, const size_t x, u16 *__restrict dst, u8 *__restrict dst_alpha, u8 *__restrict typeTab, u8 *__restrict prioTab) { const bool enableMosaic = (this->_oamList[this->_sprNum[x]].Mosaic != 0); if (!enableMosaic) @@ -2530,7 +2570,7 @@ objColor.alpha = dst_alpha[x]; objColor.opaque = opaque; - const size_t y = l; + const size_t y = compState.lineIndexNative; if (!this->_mosaicWidthOBJ[x].begin || !this->_mosaicHeightOBJ[y].begin) { @@ -2544,7 +2584,7 @@ if (!objColor.opaque) prioTab[x] = 0x7F; } -void GPUEngineBase::_MosaicSpriteLine(u16 l, u16 *__restrict dst, u8 *__restrict dst_alpha, u8 *__restrict typeTab, u8 *__restrict prioTab) +void GPUEngineBase::_MosaicSpriteLine(GPUEngineCompositorInfo &compState, u16 *__restrict dst, u8 *__restrict dst_alpha, u8 *__restrict typeTab, u8 *__restrict prioTab) { if (!this->_isOBJMosaicSet) { @@ -2553,18 +2593,18 @@ for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH; i++) { - this->_MosaicSpriteLinePixel(i, l, dst, dst_alpha, typeTab, prioTab); + this->_MosaicSpriteLinePixel(compState, i, dst, dst_alpha, typeTab, prioTab); } } template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> -void GPUEngineBase::_RenderPixelIterate_Final(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) +void GPUEngineBase::_RenderPixelIterate_Final(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) { - const u16 lineWidth = (ISDEBUGRENDER) ? this->_BGLayer[srcLayerID].size.width : GPU_FRAMEBUFFER_NATIVE_WIDTH; + const u16 lineWidth = (ISDEBUGRENDER) ? compState.selectedBGLayer->size.width : GPU_FRAMEBUFFER_NATIVE_WIDTH; const s16 dx = (s16)LOCAL_TO_LE_16(param.BGnPA.value); const s16 dy = (s16)LOCAL_TO_LE_16(param.BGnPC.value); - const s32 wh = this->_BGLayer[srcLayerID].size.width; - const s32 ht = this->_BGLayer[srcLayerID].size.height; + const s32 wh = compState.selectedBGLayer->size.width; + const s32 ht = compState.selectedBGLayer->size.height; const s32 wmask = wh - 1; const s32 hmask = ht - 1; @@ -2604,7 +2644,7 @@ } else { - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(lineIndex, i, srcColor, srcLayerID, dstColorLine, this->_renderLineLayerIDNative, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, i, srcColor, (index != 0)); } auxX++; @@ -2635,28 +2675,28 @@ } else { - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(lineIndex, i, srcColor, srcLayerID, dstColorLine, this->_renderLineLayerIDNative, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, i, srcColor, (index != 0)); } } } } template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> -void GPUEngineBase::_RenderPixelIterate_ApplyWrap(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) +void GPUEngineBase::_RenderPixelIterate_ApplyWrap(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) { - this->_RenderPixelIterate_Final<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, WRAP>(srcLayerID, dstColorLine, lineIndex, param, map, tile, pal); + this->_RenderPixelIterate_Final<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, WRAP>(compState, param, map, tile, pal); } template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc> -void GPUEngineBase::_RenderPixelIterate(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) +void GPUEngineBase::_RenderPixelIterate(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) { - if (this->_BGLayer[srcLayerID].isDisplayWrapped) + if (compState.selectedBGLayer->isDisplayWrapped) { - this->_RenderPixelIterate_ApplyWrap<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, true>(srcLayerID, dstColorLine, lineIndex, param, map, tile, pal); + this->_RenderPixelIterate_ApplyWrap<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, true>(compState, param, map, tile, pal); } else { - this->_RenderPixelIterate_ApplyWrap<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, false>(srcLayerID, dstColorLine, lineIndex, param, map, tile, pal); + this->_RenderPixelIterate_ApplyWrap<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, false>(compState, param, map, tile, pal); } } @@ -2673,10 +2713,16 @@ } template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> -FORCEINLINE void GPUEngineBase::_RenderPixelSingle(const size_t lineIndex, const size_t srcX, u16 srcColor16, const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerIDLine, const bool opaque) +FORCEINLINE void GPUEngineBase::_RenderPixelSingle(GPUEngineCompositorInfo &compState, const size_t srcX, u16 srcColor16, const bool opaque) { bool willRenderColor = opaque; + compState.xNative = srcX; + compState.xCustom = _gpuDstPitchIndex[srcX]; + compState.lineLayerIDTarget = compState.lineLayerIDHeadNative + srcX; + compState.lineColorTarget16 = (u16 *)compState.lineColorHeadNative + srcX; + compState.lineColorTarget32 = (FragmentColor *)compState.lineColorHeadNative + srcX; + if (MOSAIC) { //due to this early out, we will get incorrect behavior in cases where @@ -2685,29 +2731,26 @@ if (!opaque) srcColor16 = 0xFFFF; else srcColor16 &= 0x7FFF; - if (!this->_mosaicWidthBG[srcX].begin || !this->_mosaicHeightBG[lineIndex].begin) + if (!this->_mosaicWidthBG[srcX].begin || !this->_mosaicHeightBG[compState.lineIndexNative].begin) { - srcColor16 = this->_mosaicColors.bg[srcLayerID][this->_mosaicWidthBG[srcX].trunc]; + srcColor16 = this->_mosaicColors.bg[compState.selectedLayerID][this->_mosaicWidthBG[srcX].trunc]; } - this->_mosaicColors.bg[srcLayerID][srcX] = srcColor16; + this->_mosaicColors.bg[compState.selectedLayerID][srcX] = srcColor16; willRenderColor = (srcColor16 != 0xFFFF); } if (willRenderColor) { - this->_RenderPixel<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcX, + this->_RenderPixel<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, srcColor16, - 0, - srcLayerID, - (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + srcX) : (void *)((FragmentColor *)dstColorLine + srcX), - dstLayerIDLine + srcX); + 0); } } template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> -void GPUEngineBase::_RenderPixelsCustom(const size_t lineIndex, const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerIDLine) +void GPUEngineBase::_RenderPixelsCustom(GPUEngineCompositorInfo &compState) { const size_t lineWidth = GPU->GetDisplayInfo().customWidth; @@ -2729,19 +2772,19 @@ const __m128i tmpColor_vec128 = _mm_blendv_epi8(_mm_and_si128(col_vec128, _mm_set1_epi16(0x7FFF)), _mm_set1_epi16(0xFFFF), idxMask); const __m128i mosaicWidthMask = _mm_cmpeq_epi16( _mm_and_si128(_mm_set1_epi16(0x00FF), _mm_loadu_si128((__m128i *)(this->_mosaicWidthBG + x))), _mm_setzero_si128() ); - const __m128i mosaicHeightMask = _mm_cmpeq_epi16(_mm_set1_epi16(this->_mosaicHeightBG[lineIndex].begin), _mm_setzero_si128()); + const __m128i mosaicHeightMask = _mm_cmpeq_epi16(_mm_set1_epi16(this->_mosaicHeightBG[compState.lineIndexNative].begin), _mm_setzero_si128()); const __m128i mosaicMask = _mm_or_si128(mosaicWidthMask, mosaicHeightMask); - this->_mosaicColors.bg[srcLayerID][x+0] = (_mm_extract_epi16(mosaicMask, 0) != 0) ? this->_mosaicColors.bg[srcLayerID][this->_mosaicWidthBG[x+0].trunc] : _mm_extract_epi16(tmpColor_vec128, 0); - this->_mosaicColors.bg[srcLayerID][x+1] = (_mm_extract_epi16(mosaicMask, 1) != 0) ? this->_mosaicColors.bg[srcLayerID][this->_mosaicWidthBG[x+1].trunc] : _mm_extract_epi16(tmpColor_vec128, 1); - this->_mosaicColors.bg[srcLayerID][x+2] = (_mm_extract_epi16(mosaicMask, 2) != 0) ? this->_mosaicColors.bg[srcLayerID][this->_mosaicWidthBG[x+2].trunc] : _mm_extract_epi16(tmpColor_vec128, 2); - this->_mosaicColors.bg[srcLayerID][x+3] = (_mm_extract_epi16(mosaicMask, 3) != 0) ? this->_mosaicColors.bg[srcLayerID][this->_mosaicWidthBG[x+3].trunc] : _mm_extract_epi16(tmpColor_vec128, 3); - this->_mosaicColors.bg[srcLayerID][x+4] = (_mm_extract_epi16(mosaicMask, 4) != 0) ? this->_mosaicColors.bg[srcLayerID][this->_mosaicWidthBG[x+4].trunc] : _mm_extract_epi16(tmpColor_vec128, 4); - this->_mosaicColors.bg[srcLayerID][x+5] = (_mm_extract_epi16(mosaicMask, 5) != 0) ? this->_mosaicColors.bg[srcLayerID][this->_mosaicWidthBG[x+5].trunc] : _mm_extract_epi16(tmpColor_vec128, 5); - this->_mosaicColors.bg[srcLayerID][x+6] = (_mm_extract_epi16(mosaicMask, 6) != 0) ? this->_mosaicColors.bg[srcLayerID][this->_mosaicWidthBG[x+6].trunc] : _mm_extract_epi16(tmpColor_vec128, 6); - this->_mosaicColors.bg[srcLayerID][x+7] = (_mm_extract_epi16(mosaicMask, 7) != 0) ? this->_mosaicColors.bg[srcLayerID][this->_mosaicWidthBG[x+7].trunc] : _mm_extract_epi16(tmpColor_vec128, 7); + this->_mosaicColors.bg[compState.selectedLayerID][x+0] = (_mm_extract_epi16(mosaicMask, 0) != 0) ? this->_mosaicColors.bg[compState.selectedLayerID][this->_mosaicWidthBG[x+0].trunc] : _mm_extract_epi16(tmpColor_vec128, 0); + this->_mosaicColors.bg[compState.selectedLayerID][x+1] = (_mm_extract_epi16(mosaicMask, 1) != 0) ? this->_mosaicColors.bg[compState.selectedLayerID][this->_mosaicWidthBG[x+1].trunc] : _mm_extract_epi16(tmpColor_vec128, 1); + this->_mosaicColors.bg[compState.selectedLayerID][x+2] = (_mm_extract_epi16(mosaicMask, 2) != 0) ? this->_mosaicColors.bg[compState.selectedLayerID][this->_mosaicWidthBG[x+2].trunc] : _mm_extract_epi16(tmpColor_vec128, 2); + this->_mosaicColors.bg[compState.selectedLayerID][x+3] = (_mm_extract_epi16(mosaicMask, 3) != 0) ? this->_mosaicColors.bg[compState.selectedLayerID][this->_mosaicWidthBG[x+3].trunc] : _mm_extract_epi16(tmpColor_vec128, 3); + this->_mosaicColors.bg[compState.selectedLayerID][x+4] = (_mm_extract_epi16(mosaicMask, 4) != 0) ? this->_mosaicColors.bg[compState.selectedLayerID][this->_mosaicWidthBG[x+4].trunc] : _mm_extract_epi16(tmpColor_vec128, 4); + this->_mosaicColors.bg[compState.selectedLayerID][x+5] = (_mm_extract_epi16(mosaicMask, 5) != 0) ? this->_mosaicColors.bg[compState.selectedLayerID][this->_mosaicWidthBG[x+5].trunc] : _mm_extract_epi16(tmpColor_vec128, 5); + this->_mosaicColors.bg[compState.selectedLayerID][x+6] = (_mm_extract_epi16(mosaicMask, 6) != 0) ? this->_mosaicColors.bg[compState.selectedLayerID][this->_mosaicWidthBG[x+6].trunc] : _mm_extract_epi16(tmpColor_vec128, 6); + this->_mosaicColors.bg[compState.selectedLayerID][x+7] = (_mm_extract_epi16(mosaicMask, 7) != 0) ? this->_mosaicColors.bg[compState.selectedLayerID][this->_mosaicWidthBG[x+7].trunc] : _mm_extract_epi16(tmpColor_vec128, 7); - const __m128i mosaicColor_vec128 = _mm_loadu_si128((__m128i *)(this->_mosaicColors.bg[srcLayerID] + x)); + const __m128i mosaicColor_vec128 = _mm_loadu_si128((__m128i *)(this->_mosaicColors.bg[compState.selectedLayerID] + x)); const __m128i mosaicColorMask = _mm_cmpeq_epi16(mosaicColor_vec128, _mm_set1_epi16(0xFFFF)); _mm_storel_epi64( (__m128i *)(this->_bgLayerIndex + x), _mm_andnot_si128(_mm_packs_epi16(mosaicColorMask, _mm_setzero_si128()), index_vec128) ); _mm_store_si128( (__m128i *)(this->_bgLayerColor + x), _mm_blendv_epi8(mosaicColor_vec128, col_vec128, mosaicColorMask) ); @@ -2809,20 +2852,23 @@ } #endif - const size_t lineCount = _gpuDstLineCount[lineIndex]; + compState.xNative = 0; + compState.xCustom = 0; + compState.lineColorTarget16 = (u16 *)compState.lineColorHead; + compState.lineColorTarget32 = (FragmentColor *)compState.lineColorHead; + compState.lineLayerIDTarget = compState.lineLayerIDHead; #ifdef ENABLE_SSE2 const size_t ssePixCount = (lineWidth - (lineWidth % 16)); - const IOREG_BLDCNT &BLDCNT = this->_IORegisterMap->BLDCNT; - const __m128i srcEffectEnableMask = this->_srcBlendEnable_SSE2[srcLayerID]; + const __m128i srcEffectEnableMask = this->_srcBlendEnable_SSE2[compState.selectedLayerID]; #endif - for (size_t l = 0; l < lineCount; l++) + for (size_t l = 0; l < compState.lineRenderCount; l++) { size_t i = 0; #ifdef ENABLE_SSE2 - for (; i < ssePixCount; i+=16, dstLayerIDLine+=16, dstColorLine = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + 16) : (void *)((FragmentColor *)dstColorLine + 16)) + for (; i < ssePixCount; i+=16, compState.lineColorTarget16+=16, compState.lineColorTarget32+=16, compState.lineLayerIDTarget+=16) { __m128i src[4]; @@ -2852,12 +2898,12 @@ const __m128i srcAlpha = _mm_setzero_si128(); - __m128i dstLayerID_vec128 = _mm_load_si128((__m128i *)dstLayerIDLine); + __m128i dstLayerID_vec128 = _mm_load_si128((__m128i *)compState.lineLayerIDTarget); __m128i passMask8 = _mm_xor_si128( _mm_cmpeq_epi8(_mm_load_si128((__m128i *)(this->_bgLayerIndexCustom + i)), _mm_setzero_si128()), _mm_set1_epi32(0xFFFFFFFF) ); __m128i dst[4]; - dst[0] = _mm_load_si128((__m128i *)dstColorLine + 0); - dst[1] = _mm_load_si128((__m128i *)dstColorLine + 1); + dst[0] = _mm_load_si128((__m128i *)*compState.lineColorTarget + 0); + dst[1] = _mm_load_si128((__m128i *)*compState.lineColorTarget + 1); if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) { @@ -2866,68 +2912,66 @@ } else { - dst[2] = _mm_load_si128((__m128i *)dstColorLine + 2); - dst[3] = _mm_load_si128((__m128i *)dstColorLine + 3); + dst[2] = _mm_load_si128((__m128i *)*compState.lineColorTarget + 2); + dst[3] = _mm_load_si128((__m128i *)*compState.lineColorTarget + 3); } - this->_RenderPixel16_SSE2<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, true>(i, - (ColorEffect)BLDCNT.ColorEffect, - srcLayerID, + this->_RenderPixel16_SSE2<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, true>(compState, + i, src[3], src[2], src[1], src[0], srcAlpha, srcEffectEnableMask, dst[3], dst[2], dst[1], dst[0], dstLayerID_vec128, passMask8); - _mm_store_si128((__m128i *)dstColorLine + 0, dst[0]); - _mm_store_si128((__m128i *)dstColorLine + 1, dst[1]); + _mm_store_si128((__m128i *)*compState.lineColorTarget + 0, dst[0]); + _mm_store_si128((__m128i *)*compState.lineColorTarget + 1, dst[1]); if (OUTPUTFORMAT != NDSColorFormat_BGR555_Rev) { - _mm_store_si128((__m128i *)dstColorLine + 2, dst[2]); - _mm_store_si128((__m128i *)dstColorLine + 3, dst[3]); + _mm_store_si128((__m128i *)*compState.lineColorTarget + 2, dst[2]); + _mm_store_si128((__m128i *)*compState.lineColorTarget + 3, dst[3]); } - _mm_store_si128((__m128i *)dstLayerIDLine, dstLayerID_vec128); + _mm_store_si128((__m128i *)compState.lineLayerIDTarget, dstLayerID_vec128); } #endif #ifdef ENABLE_SSE2 #pragma LOOPVECTORIZE_DISABLE #endif - for (; i < lineWidth; i++, dstLayerIDLine++, dstColorLine = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + 1) : (void *)((FragmentColor *)dstColorLine + 1)) + for (; i < lineWidth; i++, compState.xCustom++, compState.xNative = _gpuDstToSrcIndex[compState.xCustom], compState.lineColorTarget16++, compState.lineColorTarget32++, compState.lineLayerIDTarget++) { if (this->_bgLayerIndexCustom[i] == 0) { continue; } - this->_RenderPixel<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(_gpuDstToSrcIndex[i], + this->_RenderPixel<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, this->_bgLayerColorCustom[i], - 0, - srcLayerID, - dstColorLine, - dstLayerIDLine); + 0); } } } template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> -void GPUEngineBase::_RenderPixelsCustomVRAM(const size_t lineIndex, const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerIDLine) +void GPUEngineBase::_RenderPixelsCustomVRAM(GPUEngineCompositorInfo &compState) { - const size_t lineWidth = GPU->GetDisplayInfo().customWidth; - const size_t lineCount = _gpuDstLineCount[lineIndex]; - const size_t dstPixCount = lineWidth * lineCount; - const u16 *__restrict srcLine = GPU->GetCustomVRAMAddressUsingMappedAddress(this->_BGLayer[srcLayerID].BMPAddress) + (_gpuDstLineIndex[lineIndex] * lineWidth); + const u16 *__restrict srcLine = GPU->GetCustomVRAMAddressUsingMappedAddress(compState.selectedBGLayer->BMPAddress) + compState.blockOffsetCustom; + compState.xNative = 0; + compState.xCustom = 0; + compState.lineColorTarget16 = (u16 *)compState.lineColorHead; + compState.lineColorTarget32 = (FragmentColor *)compState.lineColorHead; + compState.lineLayerIDTarget = compState.lineLayerIDHead; + size_t i = 0; #ifdef ENABLE_SSE2 - const IOREG_BLDCNT &BLDCNT = this->_IORegisterMap->BLDCNT; - const __m128i srcEffectEnableMask = this->_srcBlendEnable_SSE2[srcLayerID]; + const __m128i srcEffectEnableMask = this->_srcBlendEnable_SSE2[compState.selectedLayerID]; - const size_t ssePixCount = (dstPixCount - (dstPixCount % 16)); - for (; i < ssePixCount; i+=16, dstLayerIDLine+=16, dstColorLine = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + 16) : (void *)((FragmentColor *)dstColorLine + 16)) + const size_t ssePixCount = (compState.linePixelCount - (compState.linePixelCount % 16)); + for (; i < ssePixCount; i+=16, compState.lineColorTarget16+=16, compState.lineColorTarget32+=16, compState.lineLayerIDTarget+=16) { const __m128i src16[2] = { _mm_load_si128((__m128i *)(srcLine + i + 0)), _mm_load_si128((__m128i *)(srcLine + i + 8)) }; @@ -2956,13 +3000,13 @@ const __m128i srcAlpha = _mm_setzero_si128(); - __m128i dstLayerID_vec128 = _mm_load_si128((__m128i *)(dstLayerIDLine + i)); + __m128i dstLayerID_vec128 = _mm_load_si128((__m128i *)compState.lineLayerIDTarget); __m128i passMask8 = _mm_packs_epi16( _mm_srli_epi16(src16[0], 15), _mm_srli_epi16(src16[1], 15) ); passMask8 = _mm_cmpeq_epi8(passMask8, _mm_set1_epi8(1)); __m128i dst[4]; - dst[0] = _mm_load_si128((__m128i *)dstColorLine + 0); - dst[1] = _mm_load_si128((__m128i *)dstColorLine + 1); + dst[0] = _mm_load_si128((__m128i *)*compState.lineColorTarget + 0); + dst[1] = _mm_load_si128((__m128i *)*compState.lineColorTarget + 1); if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) { @@ -2971,48 +3015,44 @@ } else { - dst[2] = _mm_load_si128((__m128i *)dstColorLine + 2); - dst[3] = _mm_load_si128((__m128i *)dstColorLine + 3); + dst[2] = _mm_load_si128((__m128i *)*compState.lineColorTarget + 2); + dst[3] = _mm_load_si128((__m128i *)*compState.lineColorTarget + 3); } - this->_RenderPixel16_SSE2<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, true>(i, - (ColorEffect)BLDCNT.ColorEffect, - srcLayerID, + this->_RenderPixel16_SSE2<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, true>(compState, + i, src[3], src[2], src[1], src[0], srcAlpha, srcEffectEnableMask, dst[3], dst[2], dst[1], dst[0], dstLayerID_vec128, passMask8); - _mm_store_si128((__m128i *)dstColorLine + 0, dst[0]); - _mm_store_si128((__m128i *)dstColorLine + 1, dst[1]); + _mm_store_si128((__m128i *)*compState.lineColorTarget + 0, dst[0]); + _mm_store_si128((__m128i *)*compState.lineColorTarget + 1, dst[1]); if (OUTPUTFORMAT != NDSColorFormat_BGR555_Rev) { - _mm_store_si128((__m128i *)dstColorLine + 2, dst[2]); - _mm_store_si128((__m128i *)dstColorLine + 3, dst[3]); + _mm_store_si128((__m128i *)*compState.lineColorTarget + 2, dst[2]); + _mm_store_si128((__m128i *)*compState.lineColorTarget + 3, dst[3]); } - _mm_store_si128((__m128i *)dstLayerIDLine, dstLayerID_vec128); + _mm_store_si128((__m128i *)compState.lineLayerIDTarget, dstLayerID_vec128); } #endif #ifdef ENABLE_SSE2 #pragma LOOPVECTORIZE_DISABLE #endif - for (; i < dstPixCount; i++, dstLayerIDLine++, dstColorLine = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + 1) : (void *)((FragmentColor *)dstColorLine + 1)) + for (; i < compState.linePixelCount; i++, compState.xCustom++, compState.xNative = _gpuDstToSrcIndex[compState.xCustom], compState.lineColorTarget16++, compState.lineColorTarget32++, compState.lineLayerIDTarget++) { if ((srcLine[i] & 0x8000) == 0) { continue; } - this->_RenderPixel<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(_gpuDstToSrcIndex[i], + this->_RenderPixel<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, srcLine[i], - 0, - srcLayerID, - dstColorLine, - dstLayerIDLine); + 0); } } @@ -3021,13 +3061,13 @@ /*****************************************************************************/ // render a text background to the combined pixelbuffer template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> -void GPUEngineBase::_RenderLine_BGText(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const u16 XBG, const u16 YBG) +void GPUEngineBase::_RenderLine_BGText(GPUEngineCompositorInfo &compState, const u16 XBG, const u16 YBG) { const IOREG_DISPCNT &DISPCNT = this->_IORegisterMap->DISPCNT; - const u16 lineWidth = (ISDEBUGRENDER) ? this->_BGLayer[srcLayerID].size.width : GPU_FRAMEBUFFER_NATIVE_WIDTH; - const u16 lg = this->_BGLayer[srcLayerID].size.width; - const u16 ht = this->_BGLayer[srcLayerID].size.height; - const u32 tile = this->_BGLayer[srcLayerID].tileEntryAddress; + const u16 lineWidth = (ISDEBUGRENDER) ? compState.selectedBGLayer->size.width : GPU_FRAMEBUFFER_NATIVE_WIDTH; + const u16 lg = compState.selectedBGLayer->size.width; + const u16 ht = compState.selectedBGLayer->size.height; + const u32 tile = compState.selectedBGLayer->tileEntryAddress; const u16 wmask = lg - 1; const u16 hmask = ht - 1; @@ -3036,11 +3076,11 @@ size_t xoff = XBG; const u16 tmp = (YBG & hmask) >> 3; - u32 map = this->_BGLayer[srcLayerID].tileMapAddress + (tmp & 31) * 64; + u32 map = compState.selectedBGLayer->tileMapAddress + (tmp & 31) * 64; if (tmp > 31) - map += ADDRESS_STEP_512B << this->_BGLayer[srcLayerID].BGnCNT.ScreenSize; + map += ADDRESS_STEP_512B << compState.selectedBGLayer->BGnCNT.ScreenSize; - if (this->_BGLayer[srcLayerID].BGnCNT.PaletteMode == PaletteMode_16x16) // color: 16 palette entries + if (compState.selectedBGLayer->BGnCNT.PaletteMode == PaletteMode_16x16) // color: 16 palette entries { const u16 *__restrict pal = this->_paletteBG; const u16 yoff = (YBG & 0x0007) << 2; @@ -3068,7 +3108,7 @@ { index = *tileColorIdx & 0x0F; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(lineIndex, x, color, srcLayerID, dstColorLine, this->_renderLineLayerIDNative, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, x, color, (index != 0)); } x++; @@ -3087,7 +3127,7 @@ { index = *tileColorIdx >> 4; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(lineIndex, x, color, srcLayerID, dstColorLine, this->_renderLineLayerIDNative, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, x, color, (index != 0)); } x++; @@ -3104,7 +3144,7 @@ { index = *tileColorIdx & 0x0F; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(lineIndex, x, color, srcLayerID, dstColorLine, this->_renderLineLayerIDNative, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, x, color, (index != 0)); } x++; @@ -3127,7 +3167,7 @@ { index = *tileColorIdx >> 4; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(lineIndex, x, color, srcLayerID, dstColorLine, this->_renderLineLayerIDNative, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, x, color, (index != 0)); } x++; @@ -3146,7 +3186,7 @@ { index = *tileColorIdx & 0x0F; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(lineIndex, x, color, srcLayerID, dstColorLine, this->_renderLineLayerIDNative, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, x, color, (index != 0)); } x++; @@ -3163,7 +3203,7 @@ { index = *tileColorIdx >> 4; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(lineIndex, x, color, srcLayerID, dstColorLine, this->_renderLineLayerIDNative, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, x, color, (index != 0)); } x++; @@ -3175,7 +3215,7 @@ } else //256-color BG { - const u16 *__restrict pal = (DISPCNT.ExBGxPalette_Enable) ? *(this->_BGLayer[srcLayerID].extPalette) : this->_paletteBG; + const u16 *__restrict pal = (DISPCNT.ExBGxPalette_Enable) ? *(compState.selectedBGLayer->extPalette) : this->_paletteBG; const u32 extPalMask = -DISPCNT.ExBGxPalette_Enable; const u16 yoff = (YBG & 0x0007) << 3; size_t line_dir; @@ -3208,7 +3248,7 @@ { const u8 index = *tileColorIdx; const u16 color = LE_TO_LOCAL_16(tilePal[index]); - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(lineIndex, x, color, srcLayerID, dstColorLine, this->_renderLineLayerIDNative, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(compState, x, color, (index != 0)); } } } @@ -3216,40 +3256,40 @@ } template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> -void GPUEngineBase::_RenderLine_BGAffine(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m) +void GPUEngineBase::_RenderLine_BGAffine(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m) { - this->_RenderPixelIterate<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_tiled_8bit_entry>(srcLayerID, dstColorLine, lineIndex, param, this->_BGLayer[srcLayerID].tileMapAddress, this->_BGLayer[srcLayerID].tileEntryAddress, this->_paletteBG); + this->_RenderPixelIterate<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_tiled_8bit_entry>(compState, param, compState.selectedBGLayer->tileMapAddress, compState.selectedBGLayer->tileEntryAddress, this->_paletteBG); } template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> -void* GPUEngineBase::_RenderLine_BGExtended(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m, bool &outUseCustomVRAM) +void GPUEngineBase::_RenderLine_BGExtended(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, bool &outUseCustomVRAM) { const IOREG_DISPCNT &DISPCNT = this->_IORegisterMap->DISPCNT; - switch (this->_BGLayer[srcLayerID].type) + switch (compState.selectedBGLayer->type) { case BGType_AffineExt_256x16: // 16 bit bgmap entries { if (DISPCNT.ExBGxPalette_Enable) { - this->_RenderPixelIterate< OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_tiled_16bit_entry<true> >(srcLayerID, dstColorLine, lineIndex, param, this->_BGLayer[srcLayerID].tileMapAddress, this->_BGLayer[srcLayerID].tileEntryAddress, *(this->_BGLayer[srcLayerID].extPalette)); + this->_RenderPixelIterate< OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_tiled_16bit_entry<true> >(compState, param, comp... [truncated message content] |
From: <rog...@us...> - 2016-07-22 18:32:32
|
Revision: 5511 http://sourceforge.net/p/desmume/code/5511 Author: rogerman Date: 2016-07-22 18:32:31 +0000 (Fri, 22 Jul 2016) Log Message: ----------- GPU: - Fix bugs with window processing when rendering at the native resolution. (Regression from r5506.) Revision Links: -------------- http://sourceforge.net/p/desmume/code/5506 Modified Paths: -------------- trunk/desmume/src/GPU.cpp Modified: trunk/desmume/src/GPU.cpp =================================================================== --- trunk/desmume/src/GPU.cpp 2016-07-20 10:07:53 UTC (rev 5510) +++ trunk/desmume/src/GPU.cpp 2016-07-22 18:32:31 UTC (rev 5511) @@ -4672,7 +4672,7 @@ this->_WIN1_enable[GPULayerID_BG2] = (this->_IORegisterMap->WIN1IN.BG2_Enable != 0); this->_WIN1_enable[GPULayerID_BG3] = (this->_IORegisterMap->WIN1IN.BG3_Enable != 0); this->_WIN1_enable[GPULayerID_OBJ] = (this->_IORegisterMap->WIN1IN.OBJ_Enable != 0); - this->_WIN1_enable[WINDOWCONTROL_EFFECTFLAG] = (this->_IORegisterMap->WIN0IN.Effect_Enable != 0); + this->_WIN1_enable[WINDOWCONTROL_EFFECTFLAG] = (this->_IORegisterMap->WIN1IN.Effect_Enable != 0); #if defined(ENABLE_SSE2) this->_WIN0_enable_SSE2[GPULayerID_BG0] = _mm_set1_epi8((this->_IORegisterMap->WIN0IN.BG0_Enable != 0) ? 0xFF : 0x00); @@ -4698,14 +4698,14 @@ this->_WINOUT_enable[GPULayerID_BG2] = (this->_IORegisterMap->WINOUT.BG2_Enable != 0); this->_WINOUT_enable[GPULayerID_BG3] = (this->_IORegisterMap->WINOUT.BG3_Enable != 0); this->_WINOUT_enable[GPULayerID_OBJ] = (this->_IORegisterMap->WINOUT.OBJ_Enable != 0); - this->_WINOUT_enable[WINDOWCONTROL_EFFECTFLAG] = (this->_IORegisterMap->WIN0IN.Effect_Enable != 0); + this->_WINOUT_enable[WINDOWCONTROL_EFFECTFLAG] = (this->_IORegisterMap->WINOUT.Effect_Enable != 0); this->_WINOBJ_enable[GPULayerID_BG0] = (this->_IORegisterMap->WINOBJ.BG0_Enable != 0); this->_WINOBJ_enable[GPULayerID_BG1] = (this->_IORegisterMap->WINOBJ.BG1_Enable != 0); this->_WINOBJ_enable[GPULayerID_BG2] = (this->_IORegisterMap->WINOBJ.BG2_Enable != 0); this->_WINOBJ_enable[GPULayerID_BG3] = (this->_IORegisterMap->WINOBJ.BG3_Enable != 0); this->_WINOBJ_enable[GPULayerID_OBJ] = (this->_IORegisterMap->WINOBJ.OBJ_Enable != 0); - this->_WINOBJ_enable[WINDOWCONTROL_EFFECTFLAG] = (this->_IORegisterMap->WIN0IN.Effect_Enable != 0); + this->_WINOBJ_enable[WINDOWCONTROL_EFFECTFLAG] = (this->_IORegisterMap->WINOBJ.Effect_Enable != 0); #if defined(ENABLE_SSE2) this->_WINOUT_enable_SSE2[GPULayerID_BG0] = _mm_set1_epi8((this->_IORegisterMap->WINOUT.BG0_Enable != 0) ? 0xFF : 0x00); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-07-20 10:07:56
|
Revision: 5510 http://sourceforge.net/p/desmume/code/5510 Author: rogerman Date: 2016-07-20 10:07:53 +0000 (Wed, 20 Jul 2016) Log Message: ----------- Cocoa Port: - In the OpenGL blitter, replace some calls to glBufferSubDataARB() with glMapBufferARB(). This, maybe, possibly, fixes an intermittent crash that can occur with the Intel HD Graphics 3000 OpenGL driver. Modified Paths: -------------- trunk/desmume/src/cocoa/OGLDisplayOutput.cpp trunk/desmume/src/cocoa/OGLDisplayOutput.h Modified: trunk/desmume/src/cocoa/OGLDisplayOutput.cpp =================================================================== --- trunk/desmume/src/cocoa/OGLDisplayOutput.cpp 2016-07-19 06:40:53 UTC (rev 5509) +++ trunk/desmume/src/cocoa/OGLDisplayOutput.cpp 2016-07-20 10:07:53 UTC (rev 5510) @@ -6737,7 +6737,7 @@ _isVisible = true; _output = oglVO; _useClientStorage = GL_FALSE; - _needUploadVertices = true; + _needUpdateVertices = true; _useDeposterize = false; _displayWidth = GPU_DISPLAY_WIDTH; @@ -6762,10 +6762,6 @@ _displayTexFilter[0] = GL_NEAREST; _displayTexFilter[1] = GL_NEAREST; - _vtxBufferOffset = 0; - UpdateVertices(); - UpdateTexCoords(_vf[0]->GetDstWidth(), _vf[0]->GetDstHeight(), _vf[1]->GetDstWidth(), _vf[1]->GetDstHeight()); - _isTexVideoInputDataNative[0] = true; _isTexVideoInputDataNative[1] = true; _texLoadedWidth[0] = (GLfloat)GPU_DISPLAY_WIDTH; @@ -6841,9 +6837,9 @@ glGenBuffersARB(1, &_vboElementID); glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboVertexID); - glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(GLint) * (2 * 8), vtxBuffer, GL_STATIC_DRAW_ARB); + glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(GLint) * (2 * 8), NULL, GL_STATIC_DRAW_ARB); glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboTexCoordID); - glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(GLfloat) * (2 * 8), texCoordBuffer, GL_STATIC_DRAW_ARB); + glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(GLfloat) * (2 * 8), NULL, GL_STREAM_DRAW_ARB); glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, _vboElementID); @@ -7149,7 +7145,7 @@ this->_displayWidth = w; this->_displayHeight = h; this->GetNormalSize(this->_normalWidth, this->_normalHeight); - this->UpdateVertices(); + this->_needUpdateVertices = true; } int OGLDisplayLayer::GetMode() @@ -7161,7 +7157,7 @@ { this->_displayMode = dispMode; this->GetNormalSize(this->_normalWidth, this->_normalHeight); - this->UpdateVertices(); + this->_needUpdateVertices = true; } int OGLDisplayLayer::GetOrientation() @@ -7173,7 +7169,7 @@ { this->_displayOrientation = dispOrientation; this->GetNormalSize(this->_normalWidth, this->_normalHeight); - this->UpdateVertices(); + this->_needUpdateVertices = true; } GLfloat OGLDisplayLayer::GetGapScalar() @@ -7185,7 +7181,7 @@ { this->_gapScalar = theScalar; this->GetNormalSize(this->_normalWidth, this->_normalHeight); - this->UpdateVertices(); + this->_needUpdateVertices = true; } GLfloat OGLDisplayLayer::GetRotation() @@ -7216,82 +7212,61 @@ void OGLDisplayLayer::SetOrder(int dispOrder) { this->_displayOrder = dispOrder; - - if (this->_displayOrder == DS_DISPLAY_ORDER_MAIN_FIRST) - { - this->_vtxBufferOffset = 0; - } - else // dispOrder == DS_DISPLAY_ORDER_TOUCH_FIRST - { - this->_vtxBufferOffset = (2 * 8); - } - - this->_needUploadVertices = true; + this->_needUpdateVertices = true; } -void OGLDisplayLayer::UpdateVertices() +void OGLDisplayLayer::UpdateVerticesOGL() { + const size_t f = (this->_displayOrder == DS_DISPLAY_ORDER_MAIN_FIRST) ? 0 : 8; const GLfloat w = this->_displayWidth; const GLfloat h = this->_displayHeight; const GLfloat gap = (h * DS_DISPLAY_VERTICAL_GAP_TO_HEIGHT_RATIO) * this->_gapScalar / 2.0; + glBindBufferARB(GL_ARRAY_BUFFER_ARB, this->_vboVertexID); + GLint *vtxBufferPtr = (GLint *)glMapBufferARB(GL_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB); + if (this->_displayMode == DS_DISPLAY_TYPE_DUAL) { // displayOrder == DS_DISPLAY_ORDER_MAIN_FIRST if (this->_displayOrientation == DS_DISPLAY_ORIENTATION_VERTICAL) { - vtxBuffer[0] = -w/2; vtxBuffer[1] = h+gap; // Top display, top left - vtxBuffer[2] = w/2; vtxBuffer[3] = h+gap; // Top display, top right - vtxBuffer[4] = w/2; vtxBuffer[5] = gap; // Top display, bottom right - vtxBuffer[6] = -w/2; vtxBuffer[7] = gap; // Top display, bottom left + vtxBufferPtr[0+f] = -w/2; vtxBufferPtr[1+f] = h+gap; // Top display, top left + vtxBufferPtr[2+f] = w/2; vtxBufferPtr[3+f] = h+gap; // Top display, top right + vtxBufferPtr[4+f] = w/2; vtxBufferPtr[5+f] = gap; // Top display, bottom right + vtxBufferPtr[6+f] = -w/2; vtxBufferPtr[7+f] = gap; // Top display, bottom left - vtxBuffer[8] = -w/2; vtxBuffer[9] = -gap; // Bottom display, top left - vtxBuffer[10] = w/2; vtxBuffer[11] = -gap; // Bottom display, top right - vtxBuffer[12] = w/2; vtxBuffer[13] = -(h+gap); // Bottom display, bottom right - vtxBuffer[14] = -w/2; vtxBuffer[15] = -(h+gap); // Bottom display, bottom left + vtxBufferPtr[8-f] = -w/2; vtxBufferPtr[9-f] = -gap; // Bottom display, top left + vtxBufferPtr[10-f] = w/2; vtxBufferPtr[11-f] = -gap; // Bottom display, top right + vtxBufferPtr[12-f] = w/2; vtxBufferPtr[13-f] = -(h+gap); // Bottom display, bottom right + vtxBufferPtr[14-f] = -w/2; vtxBufferPtr[15-f] = -(h+gap); // Bottom display, bottom left } else // displayOrientationID == DS_DISPLAY_ORIENTATION_HORIZONTAL { - vtxBuffer[0] = -(w+gap); vtxBuffer[1] = h/2; // Left display, top left - vtxBuffer[2] = -gap; vtxBuffer[3] = h/2; // Left display, top right - vtxBuffer[4] = -gap; vtxBuffer[5] = -h/2; // Left display, bottom right - vtxBuffer[6] = -(w+gap); vtxBuffer[7] = -h/2; // Left display, bottom left + vtxBufferPtr[0+f] = -(w+gap); vtxBufferPtr[1+f] = h/2; // Left display, top left + vtxBufferPtr[2+f] = -gap; vtxBufferPtr[3+f] = h/2; // Left display, top right + vtxBufferPtr[4+f] = -gap; vtxBufferPtr[5+f] = -h/2; // Left display, bottom right + vtxBufferPtr[6+f] = -(w+gap); vtxBufferPtr[7+f] = -h/2; // Left display, bottom left - vtxBuffer[8] = gap; vtxBuffer[9] = h/2; // Right display, top left - vtxBuffer[10] = w+gap; vtxBuffer[11] = h/2; // Right display, top right - vtxBuffer[12] = w+gap; vtxBuffer[13] = -h/2; // Right display, bottom right - vtxBuffer[14] = gap; vtxBuffer[15] = -h/2; // Right display, bottom left + vtxBufferPtr[8-f] = gap; vtxBufferPtr[9-f] = h/2; // Right display, top left + vtxBufferPtr[10-f] = w+gap; vtxBufferPtr[11-f] = h/2; // Right display, top right + vtxBufferPtr[12-f] = w+gap; vtxBufferPtr[13-f] = -h/2; // Right display, bottom right + vtxBufferPtr[14-f] = gap; vtxBufferPtr[15-f] = -h/2; // Right display, bottom left } - - // displayOrder == DS_DISPLAY_ORDER_TOUCH_FIRST - memcpy(vtxBuffer + (2 * 8), vtxBuffer + (1 * 8), sizeof(GLint) * (1 * 8)); - memcpy(vtxBuffer + (3 * 8), vtxBuffer + (0 * 8), sizeof(GLint) * (1 * 8)); } else // displayModeID == DS_DISPLAY_TYPE_MAIN || displayModeID == DS_DISPLAY_TYPE_TOUCH { - vtxBuffer[0] = -w/2; vtxBuffer[1] = h/2; // First display, top left - vtxBuffer[2] = w/2; vtxBuffer[3] = h/2; // First display, top right - vtxBuffer[4] = w/2; vtxBuffer[5] = -h/2; // First display, bottom right - vtxBuffer[6] = -w/2; vtxBuffer[7] = -h/2; // First display, bottom left + vtxBufferPtr[0] = -w/2; vtxBufferPtr[1] = h/2; // First display, top left + vtxBufferPtr[2] = w/2; vtxBufferPtr[3] = h/2; // First display, top right + vtxBufferPtr[4] = w/2; vtxBufferPtr[5] = -h/2; // First display, bottom right + vtxBufferPtr[6] = -w/2; vtxBufferPtr[7] = -h/2; // First display, bottom left - memcpy(vtxBuffer + (1 * 8), vtxBuffer + (0 * 8), sizeof(GLint) * (1 * 8)); // Second display - memcpy(vtxBuffer + (2 * 8), vtxBuffer + (0 * 8), sizeof(GLint) * (2 * 8)); // Second display + memcpy(vtxBufferPtr + (1 * 8), vtxBufferPtr + (0 * 8), sizeof(GLint) * (1 * 8)); // Second display } - this->_needUploadVertices = true; -} - -void OGLDisplayLayer::UpdateTexCoords(GLfloat s0, GLfloat t0, GLfloat s1, GLfloat t1) -{ - texCoordBuffer[0] = 0.0f; texCoordBuffer[1] = 0.0f; - texCoordBuffer[2] = s0; texCoordBuffer[3] = 0.0f; - texCoordBuffer[4] = s0; texCoordBuffer[5] = t0; - texCoordBuffer[6] = 0.0f; texCoordBuffer[7] = t0; + glUnmapBufferARB(GL_ARRAY_BUFFER_ARB); + glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); - texCoordBuffer[8] = 0.0f; texCoordBuffer[9] = 0.0f; - texCoordBuffer[10] = s1; texCoordBuffer[11] = 0.0f; - texCoordBuffer[12] = s1; texCoordBuffer[13] = t1; - texCoordBuffer[14] = 0.0f; texCoordBuffer[15] = t1; + this->_needUpdateVertices = false; } bool OGLDisplayLayer::CanUseShaderBasedFilters() @@ -7361,21 +7336,6 @@ free(oldMasterBuffer); } -void OGLDisplayLayer::UploadVerticesOGL() -{ - glBindBufferARB(GL_ARRAY_BUFFER_ARB, this->_vboVertexID); - glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(GLint) * (2 * 8), this->vtxBuffer + this->_vtxBufferOffset); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); - this->_needUploadVertices = false; -} - -void OGLDisplayLayer::UploadTexCoordsOGL() -{ - glBindBufferARB(GL_ARRAY_BUFFER_ARB, this->_vboTexCoordID); - glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(GLfloat) * (2 * 8), this->texCoordBuffer); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); -} - void OGLDisplayLayer::UploadTransformationOGL() { const GLdouble w = this->_viewportWidth; @@ -8003,9 +7963,24 @@ this->_texVideoOutputID[0] = texVideoSourceID[0]; this->_texVideoOutputID[1] = texVideoSourceID[1]; - this->UpdateTexCoords(w0, h0, w1, h1); - this->UploadTexCoordsOGL(); + // Update the texture coordinates + glBindBufferARB(GL_ARRAY_BUFFER_ARB, this->_vboTexCoordID); + glBufferDataARB(GL_ARRAY_BUFFER_ARB, (2 * 8) * sizeof(GLfloat), NULL, GL_STREAM_DRAW_ARB); + GLfloat *texCoordBufferPtr = (GLfloat *)glMapBufferARB(GL_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB); + texCoordBufferPtr[0] = 0.0f; texCoordBufferPtr[1] = 0.0f; + texCoordBufferPtr[2] = w0; texCoordBufferPtr[3] = 0.0f; + texCoordBufferPtr[4] = w0; texCoordBufferPtr[5] = h0; + texCoordBufferPtr[6] = 0.0f; texCoordBufferPtr[7] = h0; + + texCoordBufferPtr[8] = 0.0f; texCoordBufferPtr[9] = 0.0f; + texCoordBufferPtr[10] = w1; texCoordBufferPtr[11] = 0.0f; + texCoordBufferPtr[12] = w1; texCoordBufferPtr[13] = h1; + texCoordBufferPtr[14] = 0.0f; texCoordBufferPtr[15] = h1; + + glUnmapBufferARB(GL_ARRAY_BUFFER_ARB); + glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); } @@ -8014,9 +7989,9 @@ glUseProgram(this->_finalOutputProgram->GetProgramID()); this->UploadTransformationOGL(); - if (this->_needUploadVertices) + if (this->_needUpdateVertices) { - this->UploadVerticesOGL(); + this->UpdateVerticesOGL(); } // Enable vertex attributes Modified: trunk/desmume/src/cocoa/OGLDisplayOutput.h =================================================================== --- trunk/desmume/src/cocoa/OGLDisplayOutput.h 2016-07-19 06:40:53 UTC (rev 5509) +++ trunk/desmume/src/cocoa/OGLDisplayOutput.h 2016-07-20 10:07:53 UTC (rev 5510) @@ -377,7 +377,7 @@ ShaderSupportTier _shaderSupport; GLboolean _useClientStorage; - bool _needUploadVertices; + bool _needUpdateVertices; bool _useDeposterize; bool _useShaderBasedPixelScaler; bool _filtersPreferGPU; @@ -424,10 +424,6 @@ GLuint _texHQ3xLUT; GLuint _texHQ4xLUT; - GLint vtxBuffer[4 * 8]; - GLfloat texCoordBuffer[2 * 8]; - size_t _vtxBufferOffset; - GLuint _vaoMainStatesID; GLuint _vboVertexID; GLuint _vboTexCoordID; @@ -441,12 +437,9 @@ void DetermineTextureStorageHints(GLint &videoSrcTexStorageHint, GLint &cpuFilterTexStorageHint); void ResizeCPUPixelScalerOGL(const size_t srcWidthMain, const size_t srcHeightMain, const size_t srcWidthTouch, const size_t srcHeightTouch, const size_t scaleMultiply, const size_t scaleDivide); - void UploadVerticesOGL(); - void UploadTexCoordsOGL(); void UploadTransformationOGL(); - void UpdateVertices(); - void UpdateTexCoords(GLfloat s0, GLfloat t0, GLfloat s1, GLfloat t1); + void UpdateVerticesOGL(); public: OGLDisplayLayer() {}; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-07-19 06:40:56
|
Revision: 5509 http://sourceforge.net/p/desmume/code/5509 Author: rogerman Date: 2016-07-19 06:40:53 +0000 (Tue, 19 Jul 2016) Log Message: ----------- GPU: - Do some code cleanup. Modified Paths: -------------- trunk/desmume/src/GPU.cpp trunk/desmume/src/GPU.h Modified: trunk/desmume/src/GPU.cpp =================================================================== --- trunk/desmume/src/GPU.cpp 2016-07-16 22:21:22 UTC (rev 5508) +++ trunk/desmume/src/GPU.cpp 2016-07-19 06:40:53 UTC (rev 5509) @@ -1783,8 +1783,11 @@ // PIXEL RENDERING /*****************************************************************************/ template <NDSColorFormat OUTPUTFORMAT, bool ISSRCLAYEROBJ, bool ISDEBUGRENDER, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> -FORCEINLINE void GPUEngineBase::_RenderPixel(const size_t srcX, const u16 srcColor, const u8 srcAlpha, const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerIDLine) +FORCEINLINE void GPUEngineBase::_RenderPixel(const size_t srcX, const u16 srcColor16, const u8 srcAlpha, const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerIDLine) { + u16 &dstColor16 = *(u16 *)dstColorLine; + FragmentColor &dstColor32 = *(FragmentColor *)dstColorLine; + if (ISDEBUGRENDER) { // If we're rendering pixels to a debugging context, then assume that the pixel @@ -1792,15 +1795,15 @@ switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: - *(u16 *)dstColorLine = srcColor | 0x8000; + dstColor16 = srcColor16 | 0x8000; break; case NDSColorFormat_BGR666_Rev: - (*(FragmentColor *)dstColorLine).color = ConvertColor555To6665Opaque<false>(srcColor); + dstColor32.color = ConvertColor555To6665Opaque<false>(srcColor16); break; case NDSColorFormat_BGR888_Rev: - (*(FragmentColor *)dstColorLine).color = ConvertColor555To8888Opaque<false>(srcColor); + dstColor32.color = ConvertColor555To8888Opaque<false>(srcColor16); break; } @@ -1826,15 +1829,15 @@ switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: - *(u16 *)dstColorLine = srcColor | 0x8000; + dstColor16 = srcColor16 | 0x8000; break; case NDSColorFormat_BGR666_Rev: - (*(FragmentColor *)dstColorLine).color = ConvertColor555To6665Opaque<false>(srcColor); + dstColor32.color = ConvertColor555To6665Opaque<false>(srcColor16); break; case NDSColorFormat_BGR888_Rev: - (*(FragmentColor *)dstColorLine).color = ConvertColor555To8888Opaque<false>(srcColor); + dstColor32.color = ConvertColor555To8888Opaque<false>(srcColor16); break; } @@ -1905,27 +1908,22 @@ } // Render the pixel using the selected color effect. - u16 finalDstColor16; - FragmentColor finalDstColor32; - switch (selectedEffect) { case ColorEffect_Disable: { - finalDstColor16 = srcColor; - switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: - finalDstColor16 |= 0x8000; + dstColor16 = srcColor16 | 0x8000; break; case NDSColorFormat_BGR666_Rev: - finalDstColor32.color = ConvertColor555To6665Opaque<false>(finalDstColor16); + dstColor32.color = ConvertColor555To6665Opaque<false>(srcColor16); break; case NDSColorFormat_BGR888_Rev: - finalDstColor32.color = ConvertColor555To8888Opaque<false>(finalDstColor16); + dstColor32.color = ConvertColor555To8888Opaque<false>(srcColor16); break; } break; @@ -1933,20 +1931,20 @@ case ColorEffect_IncreaseBrightness: { - finalDstColor16 = this->_ColorEffectIncreaseBrightness(srcColor & 0x7FFF); + const u16 finalDstColor16 = this->_ColorEffectIncreaseBrightness(srcColor16 & 0x7FFF); switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: - finalDstColor16 |= 0x8000; + dstColor16 = finalDstColor16 | 0x8000; break; case NDSColorFormat_BGR666_Rev: - finalDstColor32.color = ConvertColor555To6665Opaque<false>(finalDstColor16); + dstColor32.color = ConvertColor555To6665Opaque<false>(finalDstColor16); break; case NDSColorFormat_BGR888_Rev: - finalDstColor32.color = ConvertColor555To8888Opaque<false>(finalDstColor16); + dstColor32.color = ConvertColor555To8888Opaque<false>(finalDstColor16); break; } break; @@ -1954,20 +1952,20 @@ case ColorEffect_DecreaseBrightness: { - finalDstColor16 = this->_ColorEffectDecreaseBrightness(srcColor & 0x7FFF); + const u16 finalDstColor16 = this->_ColorEffectDecreaseBrightness(srcColor16 & 0x7FFF); switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: - finalDstColor16 |= 0x8000; + dstColor16 = finalDstColor16 | 0x8000; break; case NDSColorFormat_BGR666_Rev: - finalDstColor32.color = ConvertColor555To6665Opaque<false>(finalDstColor16); + dstColor32.color = ConvertColor555To6665Opaque<false>(finalDstColor16); break; case NDSColorFormat_BGR888_Rev: - finalDstColor32.color = ConvertColor555To8888Opaque<false>(finalDstColor16); + dstColor32.color = ConvertColor555To8888Opaque<false>(finalDstColor16); break; } break; @@ -1975,41 +1973,30 @@ case ColorEffect_Blend: { + FragmentColor srcColor32; + srcColor32.color = ConvertColor555To6665Opaque<false>(srcColor16); + switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: - finalDstColor16 = this->_ColorEffectBlend(srcColor, *(u16 *)dstColorLine, selectedBlendTable); - finalDstColor16 |= 0x8000; + dstColor16 = this->_ColorEffectBlend(srcColor16, dstColor16, selectedBlendTable); + dstColor16 |= 0x8000; break; case NDSColorFormat_BGR666_Rev: - finalDstColor32.color = ConvertColor555To6665Opaque<false>(srcColor); - finalDstColor32 = this->_ColorEffectBlend<OUTPUTFORMAT>(finalDstColor32, *(FragmentColor *)dstColorLine, blendEVA, blendEVB); - finalDstColor32.a = 0x1F; + dstColor32 = this->_ColorEffectBlend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB); + dstColor32.a = 0x1F; break; case NDSColorFormat_BGR888_Rev: - finalDstColor32.color = ConvertColor555To8888Opaque<false>(srcColor); - finalDstColor32 = this->_ColorEffectBlend<OUTPUTFORMAT>(finalDstColor32, *(FragmentColor *)dstColorLine, blendEVA, blendEVB); - finalDstColor32.a = 0xFF; + dstColor32 = this->_ColorEffectBlend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB); + dstColor32.a = 0xFF; break; } break; } } - switch (OUTPUTFORMAT) - { - case NDSColorFormat_BGR555_Rev: - *(u16 *)dstColorLine = finalDstColor16; - break; - - case NDSColorFormat_BGR666_Rev: - case NDSColorFormat_BGR888_Rev: - *(FragmentColor *)dstColorLine = finalDstColor32; - break; - } - *dstLayerIDLine = srcLayerID; } @@ -2242,14 +2229,15 @@ // We can't unify this yet because the output framebuffer is in RGBA5551, but the 3D source pixels are in RGBA6665. // However, GPUEngineBase::_RenderPixel() takes source pixels in RGB555. In order to unify the methods, all pixels // must be processed in RGBA6665. -FORCEINLINE void GPUEngineBase::_RenderPixel3D(const FragmentColor src, u16 &dstColor, u8 &dstLayerID, bool enableColorEffect) +template<NDSColorFormat OUTPUTFORMAT> +FORCEINLINE void GPUEngineBase::_RenderPixel3D(const bool enableColorEffect, const FragmentColor srcColor32, void *__restrict dstColorLine, u8 *__restrict dstLayerIDLine) { ColorEffect selectedEffect = ColorEffect_Disable; if (enableColorEffect) { const IOREG_BLDCNT &BLDCNT = this->_IORegisterMap->BLDCNT; - const bool dstEffectEnable = (dstLayerID != GPULayerID_BG0) && this->_dstBlendEnable[dstLayerID]; + const bool dstEffectEnable = (*dstLayerIDLine != GPULayerID_BG0) && this->_dstBlendEnable[*dstLayerIDLine]; // Select the color effect based on the BLDCNT target flags. bool forceBlendEffect = false; @@ -2288,98 +2276,59 @@ } // Render the pixel using the selected color effect. - u16 convertedSrc = R6G6B6TORGB15(src.r, src.g, src.b); - - switch (selectedEffect) + if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) { - case ColorEffect_Disable: - break; - - case ColorEffect_IncreaseBrightness: - convertedSrc = this->_ColorEffectIncreaseBrightness(convertedSrc); - break; - - case ColorEffect_DecreaseBrightness: - convertedSrc = this->_ColorEffectDecreaseBrightness(convertedSrc); - break; - - case ColorEffect_Blend: - convertedSrc = this->_ColorEffectBlend3D(src, dstColor); - break; + const u16 srcColor16 = ConvertColor6665To5551<false>(srcColor32); + u16 &dstColor16 = *(u16 *)dstColorLine; + + switch (selectedEffect) + { + case ColorEffect_Disable: + dstColor16 = srcColor16; + break; + + case ColorEffect_IncreaseBrightness: + dstColor16 = this->_ColorEffectIncreaseBrightness(srcColor16); + break; + + case ColorEffect_DecreaseBrightness: + dstColor16 = this->_ColorEffectDecreaseBrightness(srcColor16); + break; + + case ColorEffect_Blend: + dstColor16 = this->_ColorEffectBlend3D(srcColor32, dstColor16); + break; + } + + dstColor16 |= 0x8000; } - - dstColor = convertedSrc | 0x8000; - dstLayerID = GPULayerID_BG0; -} - -template<NDSColorFormat OUTPUTFORMAT> -FORCEINLINE void GPUEngineBase::_RenderPixel3D(const FragmentColor src, FragmentColor &dstColor, u8 &dstLayerID, bool enableColorEffect) -{ - ColorEffect selectedEffect = ColorEffect_Disable; - - if (enableColorEffect) + else { - const IOREG_BLDCNT &BLDCNT = this->_IORegisterMap->BLDCNT; - const bool dstEffectEnable = (dstLayerID != GPULayerID_BG0) && this->_dstBlendEnable[dstLayerID]; + FragmentColor &dstColor32 = *(FragmentColor *)dstColorLine; - // Select the color effect based on the BLDCNT target flags. - bool forceBlendEffect = false; - - // 3D rendering has a special override: If the destination pixel is set to blend, then always blend. - // Test case: When starting a stage in Super Princess Peach, the screen will be solid black unless - // blending is forced here. - forceBlendEffect = dstEffectEnable; - - if (forceBlendEffect) + switch (selectedEffect) { - selectedEffect = ColorEffect_Blend; + case ColorEffect_Disable: + dstColor32 = srcColor32; + break; + + case ColorEffect_IncreaseBrightness: + dstColor32 = this->_ColorEffectIncreaseBrightness<OUTPUTFORMAT>(srcColor32, this->_BLDALPHA_EVY); + break; + + case ColorEffect_DecreaseBrightness: + dstColor32 = this->_ColorEffectDecreaseBrightness(srcColor32, this->_BLDALPHA_EVY); + break; + + case ColorEffect_Blend: + dstColor32 = this->_ColorEffectBlend3D<OUTPUTFORMAT>(srcColor32, dstColor32); + break; } - else if (this->_srcBlendEnable[GPULayerID_BG0]) - { - switch ((ColorEffect)BLDCNT.ColorEffect) - { - // For the Blend effect, both first and second target flags must be checked. - case ColorEffect_Blend: - { - if (dstEffectEnable) selectedEffect = (ColorEffect)BLDCNT.ColorEffect; - break; - } - - // For the Increase/Decrease Brightness effects, only the first target flag needs to be checked. - // Test case: Bomberman Land Touch! dialog boxes will render too dark without this check. - case ColorEffect_IncreaseBrightness: - case ColorEffect_DecreaseBrightness: - selectedEffect = (ColorEffect)BLDCNT.ColorEffect; - break; - - default: - break; - } - } + + dstColor32.a = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? 0xFF : 0x1F; } - // Render the pixel using the selected color effect. - switch (selectedEffect) - { - case ColorEffect_Disable: - dstColor = src; - break; - - case ColorEffect_IncreaseBrightness: - dstColor = this->_ColorEffectIncreaseBrightness<OUTPUTFORMAT>(src, this->_BLDALPHA_EVY); - break; - - case ColorEffect_DecreaseBrightness: - dstColor = this->_ColorEffectDecreaseBrightness(src, this->_BLDALPHA_EVY); - break; - - case ColorEffect_Blend: - dstColor = this->_ColorEffectBlend3D<OUTPUTFORMAT>(src, dstColor); - break; - } - - dstColor.a = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? 0xFF : 0x1F; - dstLayerID = GPULayerID_BG0; + *dstLayerIDLine = GPULayerID_BG0; } #ifdef ENABLE_SSE2 @@ -2633,7 +2582,7 @@ #endif u8 index; - u16 color; + u16 srcColor; // as an optimization, specially handle the fairly common case of // "unrotated + unscaled + no boundary checking required" @@ -2646,16 +2595,16 @@ { for (size_t i = 0; i < lineWidth; i++) { - GetPixelFunc(auxX, auxY, wh, map, tile, pal, index, color); + GetPixelFunc(auxX, auxY, wh, map, tile, pal, index, srcColor); if (ISCUSTOMRENDERINGNEEDED) { this->_bgLayerIndex[i] = index; - this->_bgLayerColor[i] = color; + this->_bgLayerColor[i] = srcColor; } else { - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, i, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(lineIndex, i, srcColor, srcLayerID, dstColorLine, this->_renderLineLayerIDNative, (index != 0)); } auxX++; @@ -2677,16 +2626,16 @@ if (WRAP || ((auxX >= 0) && (auxX < wh) && (auxY >= 0) && (auxY < ht))) { - GetPixelFunc(auxX, auxY, wh, map, tile, pal, index, color); + GetPixelFunc(auxX, auxY, wh, map, tile, pal, index, srcColor); if (ISCUSTOMRENDERINGNEEDED) { this->_bgLayerIndex[i] = index; - this->_bgLayerColor[i] = color; + this->_bgLayerColor[i] = srcColor; } else { - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, i, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(lineIndex, i, srcColor, srcLayerID, dstColorLine, this->_renderLineLayerIDNative, (index != 0)); } } } @@ -2724,7 +2673,7 @@ } template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> -FORCEINLINE void GPUEngineBase::_RenderPixelSingle(const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerID, const size_t lineIndex, u16 color, const size_t srcX, const bool opaque) +FORCEINLINE void GPUEngineBase::_RenderPixelSingle(const size_t lineIndex, const size_t srcX, u16 srcColor16, const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerIDLine, const bool opaque) { bool willRenderColor = opaque; @@ -2733,32 +2682,32 @@ //due to this early out, we will get incorrect behavior in cases where //we enable mosaic in the middle of a frame. this is deemed unlikely. - if (!opaque) color = 0xFFFF; - else color &= 0x7FFF; + if (!opaque) srcColor16 = 0xFFFF; + else srcColor16 &= 0x7FFF; if (!this->_mosaicWidthBG[srcX].begin || !this->_mosaicHeightBG[lineIndex].begin) { - color = this->_mosaicColors.bg[srcLayerID][this->_mosaicWidthBG[srcX].trunc]; + srcColor16 = this->_mosaicColors.bg[srcLayerID][this->_mosaicWidthBG[srcX].trunc]; } - this->_mosaicColors.bg[srcLayerID][srcX] = color; + this->_mosaicColors.bg[srcLayerID][srcX] = srcColor16; - willRenderColor = (color != 0xFFFF); + willRenderColor = (srcColor16 != 0xFFFF); } if (willRenderColor) { this->_RenderPixel<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcX, - color, + srcColor16, 0, srcLayerID, (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + srcX) : (void *)((FragmentColor *)dstColorLine + srcX), - dstLayerID + srcX); + dstLayerIDLine + srcX); } } template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> -void GPUEngineBase::_RenderPixelsCustom(const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerID, const size_t lineIndex) +void GPUEngineBase::_RenderPixelsCustom(const size_t lineIndex, const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerIDLine) { const size_t lineWidth = GPU->GetDisplayInfo().customWidth; @@ -2873,7 +2822,7 @@ size_t i = 0; #ifdef ENABLE_SSE2 - for (; i < ssePixCount; i+=16, dstLayerID+=16, dstColorLine = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + 16) : (void *)((FragmentColor *)dstColorLine + 16)) + for (; i < ssePixCount; i+=16, dstLayerIDLine+=16, dstColorLine = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + 16) : (void *)((FragmentColor *)dstColorLine + 16)) { __m128i src[4]; @@ -2903,7 +2852,7 @@ const __m128i srcAlpha = _mm_setzero_si128(); - __m128i dstLayerID_vec128 = _mm_load_si128((__m128i *)dstLayerID); + __m128i dstLayerID_vec128 = _mm_load_si128((__m128i *)dstLayerIDLine); __m128i passMask8 = _mm_xor_si128( _mm_cmpeq_epi8(_mm_load_si128((__m128i *)(this->_bgLayerIndexCustom + i)), _mm_setzero_si128()), _mm_set1_epi32(0xFFFFFFFF) ); __m128i dst[4]; @@ -2939,14 +2888,14 @@ _mm_store_si128((__m128i *)dstColorLine + 3, dst[3]); } - _mm_store_si128((__m128i *)dstLayerID, dstLayerID_vec128); + _mm_store_si128((__m128i *)dstLayerIDLine, dstLayerID_vec128); } #endif #ifdef ENABLE_SSE2 #pragma LOOPVECTORIZE_DISABLE #endif - for (; i < lineWidth; i++, dstLayerID++, dstColorLine = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + 1) : (void *)((FragmentColor *)dstColorLine + 1)) + for (; i < lineWidth; i++, dstLayerIDLine++, dstColorLine = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + 1) : (void *)((FragmentColor *)dstColorLine + 1)) { if (this->_bgLayerIndexCustom[i] == 0) { @@ -2958,13 +2907,13 @@ 0, srcLayerID, dstColorLine, - dstLayerID); + dstLayerIDLine); } } } template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> -void GPUEngineBase::_RenderPixelsCustomVRAM(const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerID, const size_t lineIndex) +void GPUEngineBase::_RenderPixelsCustomVRAM(const size_t lineIndex, const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerIDLine) { const size_t lineWidth = GPU->GetDisplayInfo().customWidth; const size_t lineCount = _gpuDstLineCount[lineIndex]; @@ -2978,7 +2927,7 @@ const __m128i srcEffectEnableMask = this->_srcBlendEnable_SSE2[srcLayerID]; const size_t ssePixCount = (dstPixCount - (dstPixCount % 16)); - for (; i < ssePixCount; i+=16, dstLayerID+=16, dstColorLine = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + 16) : (void *)((FragmentColor *)dstColorLine + 16)) + for (; i < ssePixCount; i+=16, dstLayerIDLine+=16, dstColorLine = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + 16) : (void *)((FragmentColor *)dstColorLine + 16)) { const __m128i src16[2] = { _mm_load_si128((__m128i *)(srcLine + i + 0)), _mm_load_si128((__m128i *)(srcLine + i + 8)) }; @@ -3007,7 +2956,7 @@ const __m128i srcAlpha = _mm_setzero_si128(); - __m128i dstLayerID_vec128 = _mm_load_si128((__m128i *)(dstLayerID + i)); + __m128i dstLayerID_vec128 = _mm_load_si128((__m128i *)(dstLayerIDLine + i)); __m128i passMask8 = _mm_packs_epi16( _mm_srli_epi16(src16[0], 15), _mm_srli_epi16(src16[1], 15) ); passMask8 = _mm_cmpeq_epi8(passMask8, _mm_set1_epi8(1)); @@ -3044,14 +2993,14 @@ _mm_store_si128((__m128i *)dstColorLine + 3, dst[3]); } - _mm_store_si128((__m128i *)dstLayerID, dstLayerID_vec128); + _mm_store_si128((__m128i *)dstLayerIDLine, dstLayerID_vec128); } #endif #ifdef ENABLE_SSE2 #pragma LOOPVECTORIZE_DISABLE #endif - for (; i < dstPixCount; i++, dstLayerID++, dstColorLine = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + 1) : (void *)((FragmentColor *)dstColorLine + 1)) + for (; i < dstPixCount; i++, dstLayerIDLine++, dstColorLine = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + 1) : (void *)((FragmentColor *)dstColorLine + 1)) { if ((srcLine[i] & 0x8000) == 0) { @@ -3063,7 +3012,7 @@ 0, srcLayerID, dstColorLine, - dstLayerID); + dstLayerIDLine); } } @@ -3119,7 +3068,7 @@ { index = *tileColorIdx & 0x0F; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(lineIndex, x, color, srcLayerID, dstColorLine, this->_renderLineLayerIDNative, (index != 0)); } x++; @@ -3138,7 +3087,7 @@ { index = *tileColorIdx >> 4; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(lineIndex, x, color, srcLayerID, dstColorLine, this->_renderLineLayerIDNative, (index != 0)); } x++; @@ -3155,7 +3104,7 @@ { index = *tileColorIdx & 0x0F; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(lineIndex, x, color, srcLayerID, dstColorLine, this->_renderLineLayerIDNative, (index != 0)); } x++; @@ -3178,7 +3127,7 @@ { index = *tileColorIdx >> 4; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(lineIndex, x, color, srcLayerID, dstColorLine, this->_renderLineLayerIDNative, (index != 0)); } x++; @@ -3197,7 +3146,7 @@ { index = *tileColorIdx & 0x0F; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(lineIndex, x, color, srcLayerID, dstColorLine, this->_renderLineLayerIDNative, (index != 0)); } x++; @@ -3214,7 +3163,7 @@ { index = *tileColorIdx >> 4; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(lineIndex, x, color, srcLayerID, dstColorLine, this->_renderLineLayerIDNative, (index != 0)); } x++; @@ -3259,7 +3208,7 @@ { const u8 index = *tileColorIdx; const u16 color = LE_TO_LOCAL_16(tilePal[index]); - this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(lineIndex, x, color, srcLayerID, dstColorLine, this->_renderLineLayerIDNative, (index != 0)); } } } @@ -4585,11 +4534,11 @@ { if (useCustomVRAM) { - this->_RenderPixelsCustomVRAM<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDCustom, lineIndex); + this->_RenderPixelsCustomVRAM<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(lineIndex, srcLayerID, dstColorLine, this->_renderLineLayerIDCustom); } else { - this->_RenderPixelsCustom<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDCustom, lineIndex); + this->_RenderPixelsCustom<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(lineIndex, srcLayerID, dstColorLine, this->_renderLineLayerIDCustom); } } @@ -5467,27 +5416,10 @@ continue; } - switch (OUTPUTFORMAT) - { - case NDSColorFormat_BGR555_Rev: - { - this->_RenderPixel3D(*srcLinePtr, - *(u16 *)dstColorLinePtr, - *dstLayerIDPtr, - enableColorEffect); - break; - } - - case NDSColorFormat_BGR666_Rev: - case NDSColorFormat_BGR888_Rev: - { - this->_RenderPixel3D<OUTPUTFORMAT>(*srcLinePtr, - *(FragmentColor *)dstColorLinePtr, - *dstLayerIDPtr, - enableColorEffect); - break; - } - } + this->_RenderPixel3D<OUTPUTFORMAT>(enableColorEffect, + *srcLinePtr, + dstColorLinePtr, + dstLayerIDPtr); } } } @@ -5517,27 +5449,10 @@ continue; } - switch (OUTPUTFORMAT) - { - case NDSColorFormat_BGR555_Rev: - { - this->_RenderPixel3D(srcLinePtr[srcX], - *(u16 *)dstColorLinePtr, - *dstLayerIDPtr, - enableColorEffect); - break; - } - - case NDSColorFormat_BGR666_Rev: - case NDSColorFormat_BGR888_Rev: - { - this->_RenderPixel3D<OUTPUTFORMAT>(srcLinePtr[srcX], - *(FragmentColor *)dstColorLinePtr, - *dstLayerIDPtr, - enableColorEffect); - break; - } - } + this->_RenderPixel3D<OUTPUTFORMAT>(enableColorEffect, + srcLinePtr[srcX], + dstColorLinePtr, + dstLayerIDPtr); } srcLinePtr += customLineWidth; @@ -6655,7 +6570,7 @@ { const u8 index = map[XBG]; const u16 color = LE_TO_LOCAL_16(this->_paletteBG[index]); - this->_RenderPixelSingle<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (color != 0)); + this->_RenderPixelSingle<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(lineIndex, x, color, srcLayerID, dstColorLine, this->_renderLineLayerIDNative, (color != 0)); } } } Modified: trunk/desmume/src/GPU.h =================================================================== --- trunk/desmume/src/GPU.h 2016-07-16 22:21:22 UTC (rev 5508) +++ trunk/desmume/src/GPU.h 2016-07-19 06:40:53 UTC (rev 5509) @@ -1327,9 +1327,9 @@ template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc> void _RenderPixelIterate(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal); TILEENTRY _GetTileEntry(const u32 tileMapAddress, const u16 xOffset, const u16 layerWidthMask); - template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> FORCEINLINE void _RenderPixelSingle(const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerID, const size_t lineIndex, u16 color, const size_t srcX, const bool opaque); - template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> void _RenderPixelsCustom(const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerID, const size_t lineIndex); - template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> void _RenderPixelsCustomVRAM(const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerID, const size_t lineIndex); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> FORCEINLINE void _RenderPixelSingle(const size_t lineIndex, const size_t srcX, u16 srcColor16, const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerIDLine, const bool opaque); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> void _RenderPixelsCustom(const size_t lineIndex, const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerIDLine); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> void _RenderPixelsCustomVRAM(const size_t lineIndex, const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerIDLine); template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_BGText(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const u16 XBG, const u16 YBG); template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_BGAffine(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m); @@ -1359,9 +1359,8 @@ template<NDSColorFormat OUTPUTFORMAT> void* _RenderLine_LayerOBJ(itemsForPriority_t *__restrict item, void *__restrict dstColorLine, const u16 lineIndex); - template<NDSColorFormat OUTPUTFORMAT, bool ISSRCLAYEROBJ, bool ISDEBUGRENDER, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> FORCEINLINE void _RenderPixel(const size_t srcX, const u16 srcColor, const u8 srcAlpha, const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerIDLine); - FORCEINLINE void _RenderPixel3D(const FragmentColor src, u16 &dstColor, u8 &dstLayerID, bool enableColorEffect); - template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _RenderPixel3D(const FragmentColor src, FragmentColor &dstColor, u8 &dstLayerID, bool enableColorEffect); + template<NDSColorFormat OUTPUTFORMAT, bool ISSRCLAYEROBJ, bool ISDEBUGRENDER, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> FORCEINLINE void _RenderPixel(const size_t srcX, const u16 srcColor16, const u8 srcAlpha, const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerIDLine); + template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _RenderPixel3D(const bool enableColorEffect, const FragmentColor srcColor32, void *__restrict dstColorLine, u8 *__restrict dstLayerIDLine); FORCEINLINE u16 _ColorEffectBlend(const u16 colA, const u16 colB, const u16 blendEVA, const u16 blendEVB); FORCEINLINE u16 _ColorEffectBlend(const u16 colA, const u16 colB, const TBlendTable *blendTable); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ze...@us...> - 2016-07-16 22:21:25
|
Revision: 5508 http://sourceforge.net/p/desmume/code/5508 Author: zeromus Date: 2016-07-16 22:21:22 +0000 (Sat, 16 Jul 2016) Log Message: ----------- add texture smoothing option to windows port Modified Paths: -------------- trunk/desmume/src/windows/main.cpp trunk/desmume/src/windows/resource.h trunk/desmume/src/windows/resources.rc Modified: trunk/desmume/src/windows/main.cpp =================================================================== --- trunk/desmume/src/windows/main.cpp 2016-07-15 20:30:05 UTC (rev 5507) +++ trunk/desmume/src/windows/main.cpp 2016-07-16 22:21:22 UTC (rev 5508) @@ -2971,6 +2971,7 @@ CommonSettings.GFX3D_Renderer_TextureScalingFactor = GetPrivateProfileInt("3D", "TextureScalingFactor ", 1, IniName); CommonSettings.GFX3D_Renderer_TextureDeposterize = GetPrivateProfileBool("3D", "TextureDeposterize ", 1, IniName); + CommonSettings.GFX3D_Renderer_TextureSmoothing = GetPrivateProfileBool("3D", "TextureSmooth ", 1, IniName); lostFocusPause = GetPrivateProfileBool("Focus", "BackgroundPause", false, IniName); @@ -6310,6 +6311,7 @@ if(CommonSettings.GFX3D_Renderer_TextureScalingFactor == 2) CheckDlgButton(hw,IDC_TEXSCALE_2,CommonSettings.GFX3D_Texture); if(CommonSettings.GFX3D_Renderer_TextureScalingFactor == 4) CheckDlgButton(hw,IDC_TEXSCALE_4,CommonSettings.GFX3D_Texture); CheckDlgButton(hw,IDC_TEX_DEPOSTERIZE, CommonSettings.GFX3D_Renderer_TextureDeposterize); + CheckDlgButton(hw,IDC_TEX_SMOOTH, CommonSettings.GFX3D_Renderer_TextureSmoothing); SendDlgItemMessage(hw, IDC_NUD_PRESCALEHD, UDM_SETRANGE, 0, MAKELPARAM(5, 1)); SendDlgItemMessage(hw, IDC_NUD_PRESCALEHD, UDM_SETPOS, 0, CommonSettings.GFX3D_PrescaleHD); @@ -6341,6 +6343,7 @@ if(IsDlgCheckboxChecked(hw,IDC_TEXSCALE_2)) CommonSettings.GFX3D_Renderer_TextureScalingFactor = 2; if(IsDlgCheckboxChecked(hw,IDC_TEXSCALE_4)) CommonSettings.GFX3D_Renderer_TextureScalingFactor = 4; CommonSettings.GFX3D_Renderer_TextureDeposterize = IsDlgCheckboxChecked(hw,IDC_TEX_DEPOSTERIZE); + CommonSettings.GFX3D_Renderer_TextureSmoothing = IsDlgCheckboxChecked(hw,IDC_TEX_SMOOTH); { Lock lock(win_backbuffer_sync); @@ -6364,6 +6367,7 @@ WritePrivateProfileInt ("3D", "PrescaleHD", CommonSettings.GFX3D_PrescaleHD, IniName); WritePrivateProfileInt ("3D", "TextureScalingFactor", CommonSettings.GFX3D_Renderer_TextureScalingFactor, IniName); WritePrivateProfileBool("3D", "TextureDeposterize", CommonSettings.GFX3D_Renderer_TextureDeposterize, IniName); + WritePrivateProfileBool("3D", "TextureSmooth", CommonSettings.GFX3D_Renderer_TextureSmoothing, IniName); } case IDCANCEL: { Modified: trunk/desmume/src/windows/resource.h =================================================================== --- trunk/desmume/src/windows/resource.h 2016-07-15 20:30:05 UTC (rev 5507) +++ trunk/desmume/src/windows/resource.h 2016-07-16 22:21:22 UTC (rev 5508) @@ -464,6 +464,8 @@ #define IDC_STATIC_S4 1064 #define IDC_TEX_DEPOSTERIZE 1064 #define IDC_EVALUE 1065 +#define IDC_TEX_DEPOSTERIZE2 1065 +#define IDC_TEX_SMOOTH 1065 #define IDC_STATIC_RANGE 1066 #define IDC_TEXSCALE_1 1066 #define IDC_TEXSCALE_2 1067 Modified: trunk/desmume/src/windows/resources.rc =================================================================== --- trunk/desmume/src/windows/resources.rc 2016-07-15 20:30:05 UTC (rev 5507) +++ trunk/desmume/src/windows/resources.rc 2016-07-16 22:21:22 UTC (rev 5508) @@ -143,6 +143,7 @@ CONTROL "GL TexScale 2x",IDC_TEXSCALE_2,"Button",BS_AUTORADIOBUTTON,193,90,67,10 CONTROL "GL TexScale 4x",IDC_TEXSCALE_4,"Button",BS_AUTORADIOBUTTON,193,100,67,10 CONTROL "GL Tex Deposterize",IDC_TEX_DEPOSTERIZE,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,261,81,79,10 + CONTROL "GL Tex Smoothing",IDC_TEX_SMOOTH,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,261,91,75,10 END IDD_ABOUT_BOX DIALOGEX 0, 0, 268, 198 This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-07-15 20:30:08
|
Revision: 5507 http://sourceforge.net/p/desmume/code/5507 Author: rogerman Date: 2016-07-15 20:30:05 +0000 (Fri, 15 Jul 2016) Log Message: ----------- GPU: - Move towards completing support for changing the output framebuffer color format to RGB666 or RGB888. Significantly increases the generated code size, but this is necessary for performance. (Related to r5433. This rework is still incomplete.) Revision Links: -------------- http://sourceforge.net/p/desmume/code/5433 Modified Paths: -------------- trunk/desmume/src/GPU.cpp trunk/desmume/src/GPU.h trunk/desmume/src/NDSSystem.cpp Modified: trunk/desmume/src/GPU.cpp =================================================================== --- trunk/desmume/src/GPU.cpp 2016-07-15 16:29:08 UTC (rev 5506) +++ trunk/desmume/src/GPU.cpp 2016-07-15 20:30:05 UTC (rev 5507) @@ -1277,7 +1277,7 @@ #endif } -template<GPULayerID LAYERID> +template <GPULayerID LAYERID> void GPUEngineBase::ParseReg_BGnX() { if (LAYERID == GPULayerID_BG2) @@ -1290,7 +1290,7 @@ } } -template<GPULayerID LAYERID> +template <GPULayerID LAYERID> void GPUEngineBase::ParseReg_BGnY() { if (LAYERID == GPULayerID_BG2) @@ -1303,6 +1303,7 @@ } } +template <NDSColorFormat OUTPUTFORMAT> void GPUEngineBase::_RenderLine_Clear(const u16 clearColor, const u16 l, void *renderLineTarget) { // Clear the current line with the clear color @@ -1310,7 +1311,7 @@ u16 dstClearColor16 = clearColor; - if (BLDCNT.Backdrop_Target1 != 0) + if (this->_srcBlendEnable[GPULayerID_Backdrop]) { if (BLDCNT.ColorEffect == ColorEffect_IncreaseBrightness) { @@ -1322,7 +1323,7 @@ } } - switch (GPU->GetDisplayInfo().colorFormat) + switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: memset_u16_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>(renderLineTarget, dstClearColor16); @@ -1348,6 +1349,7 @@ this->_itemsForPriority[3].nbPixelsX = 0; } +template <NDSColorFormat OUTPUTFORMAT> void GPUEngineBase::RenderLine(const u16 l) { // By default, do nothing. @@ -2606,7 +2608,7 @@ } } -template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> +template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> void GPUEngineBase::_RenderPixelIterate_Final(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) { const u16 lineWidth = (ISDEBUGRENDER) ? this->_BGLayer[srcLayerID].size.width : GPU_FRAMEBUFFER_NATIVE_WIDTH; @@ -2653,7 +2655,7 @@ } else { - this->_RenderPixelSingle<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, i, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, i, (index != 0)); } auxX++; @@ -2684,28 +2686,28 @@ } else { - this->_RenderPixelSingle<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, i, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, i, (index != 0)); } } } } -template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> +template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> void GPUEngineBase::_RenderPixelIterate_ApplyWrap(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) { - this->_RenderPixelIterate_Final<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, WRAP>(srcLayerID, dstColorLine, lineIndex, param, map, tile, pal); + this->_RenderPixelIterate_Final<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, WRAP>(srcLayerID, dstColorLine, lineIndex, param, map, tile, pal); } -template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc> +template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc> void GPUEngineBase::_RenderPixelIterate(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) { if (this->_BGLayer[srcLayerID].isDisplayWrapped) { - this->_RenderPixelIterate_ApplyWrap<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, true>(srcLayerID, dstColorLine, lineIndex, param, map, tile, pal); + this->_RenderPixelIterate_ApplyWrap<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, true>(srcLayerID, dstColorLine, lineIndex, param, map, tile, pal); } else { - this->_RenderPixelIterate_ApplyWrap<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, false>(srcLayerID, dstColorLine, lineIndex, param, map, tile, pal); + this->_RenderPixelIterate_ApplyWrap<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, false>(srcLayerID, dstColorLine, lineIndex, param, map, tile, pal); } } @@ -2721,7 +2723,7 @@ return theTileEntry; } -template <bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> +template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> FORCEINLINE void GPUEngineBase::_RenderPixelSingle(const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerID, const size_t lineIndex, u16 color, const size_t srcX, const bool opaque) { bool willRenderColor = opaque; @@ -2746,17 +2748,16 @@ if (willRenderColor) { - // TODO: This should flag a warning. Fix this when we get proper color format support. - this->_RenderPixel<NDSColorFormat_BGR555_Rev, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcX, - color, - 0, - srcLayerID, - (NDSColorFormat_BGR555_Rev == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + srcX) : (void *)((FragmentColor *)dstColorLine + srcX), - dstLayerID + srcX); + this->_RenderPixel<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcX, + color, + 0, + srcLayerID, + (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + srcX) : (void *)((FragmentColor *)dstColorLine + srcX), + dstLayerID + srcX); } } -template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> +template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> void GPUEngineBase::_RenderPixelsCustom(const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerID, const size_t lineIndex) { const size_t lineWidth = GPU->GetDisplayInfo().customWidth; @@ -2859,7 +2860,6 @@ } #endif - const NDSColorFormat outputFormat = GPU->GetDisplayInfo().colorFormat; const size_t lineCount = _gpuDstLineCount[lineIndex]; #ifdef ENABLE_SSE2 @@ -2873,11 +2873,11 @@ size_t i = 0; #ifdef ENABLE_SSE2 - for (; i < ssePixCount; i+=16, dstLayerID+=16, dstColorLine = (outputFormat == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + 16) : (void *)((FragmentColor *)dstColorLine + 16)) + for (; i < ssePixCount; i+=16, dstLayerID+=16, dstColorLine = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + 16) : (void *)((FragmentColor *)dstColorLine + 16)) { __m128i src[4]; - if (outputFormat == NDSColorFormat_BGR555_Rev) + if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) { src[0] = _mm_load_si128((__m128i *)(this->_bgLayerColorCustom + i + 0)); src[1] = _mm_load_si128((__m128i *)(this->_bgLayerColorCustom + i + 8)); @@ -2889,7 +2889,7 @@ const __m128i src16[2] = { _mm_load_si128((__m128i *)(this->_bgLayerColorCustom + i + 0)), _mm_load_si128((__m128i *)(this->_bgLayerColorCustom + i + 8)) }; - if (outputFormat == NDSColorFormat_BGR666_Rev) + if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { ConvertColor555To6665Opaque<false>(src16[0], src[0], src[1]); ConvertColor555To6665Opaque<false>(src16[1], src[2], src[3]); @@ -2910,7 +2910,7 @@ dst[0] = _mm_load_si128((__m128i *)dstColorLine + 0); dst[1] = _mm_load_si128((__m128i *)dstColorLine + 1); - if (outputFormat == NDSColorFormat_BGR555_Rev) + if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) { dst[2] = _mm_setzero_si128(); dst[3] = _mm_setzero_si128(); @@ -2921,19 +2921,19 @@ dst[3] = _mm_load_si128((__m128i *)dstColorLine + 3); } - this->_RenderPixel16_SSE2<NDSColorFormat_BGR555_Rev, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, true>(i, - (ColorEffect)BLDCNT.ColorEffect, - srcLayerID, - src[3], src[2], src[1], src[0], - srcAlpha, - srcEffectEnableMask, - dst[3], dst[2], dst[1], dst[0], - dstLayerID_vec128, - passMask8); + this->_RenderPixel16_SSE2<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, true>(i, + (ColorEffect)BLDCNT.ColorEffect, + srcLayerID, + src[3], src[2], src[1], src[0], + srcAlpha, + srcEffectEnableMask, + dst[3], dst[2], dst[1], dst[0], + dstLayerID_vec128, + passMask8); _mm_store_si128((__m128i *)dstColorLine + 0, dst[0]); _mm_store_si128((__m128i *)dstColorLine + 1, dst[1]); - if (outputFormat != NDSColorFormat_BGR555_Rev) + if (OUTPUTFORMAT != NDSColorFormat_BGR555_Rev) { _mm_store_si128((__m128i *)dstColorLine + 2, dst[2]); _mm_store_si128((__m128i *)dstColorLine + 3, dst[3]); @@ -2946,27 +2946,26 @@ #ifdef ENABLE_SSE2 #pragma LOOPVECTORIZE_DISABLE #endif - for (; i < lineWidth; i++, dstLayerID++, dstColorLine = (outputFormat == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + 1) : (void *)((FragmentColor *)dstColorLine + 1)) + for (; i < lineWidth; i++, dstLayerID++, dstColorLine = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + 1) : (void *)((FragmentColor *)dstColorLine + 1)) { if (this->_bgLayerIndexCustom[i] == 0) { continue; } - this->_RenderPixel<NDSColorFormat_BGR555_Rev, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(_gpuDstToSrcIndex[i], - this->_bgLayerColorCustom[i], - 0, - srcLayerID, - dstColorLine, - dstLayerID); + this->_RenderPixel<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(_gpuDstToSrcIndex[i], + this->_bgLayerColorCustom[i], + 0, + srcLayerID, + dstColorLine, + dstLayerID); } } } -template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> +template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> void GPUEngineBase::_RenderPixelsCustomVRAM(const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerID, const size_t lineIndex) { - const NDSColorFormat outputFormat = GPU->GetDisplayInfo().colorFormat; const size_t lineWidth = GPU->GetDisplayInfo().customWidth; const size_t lineCount = _gpuDstLineCount[lineIndex]; const size_t dstPixCount = lineWidth * lineCount; @@ -2979,13 +2978,13 @@ const __m128i srcEffectEnableMask = this->_srcBlendEnable_SSE2[srcLayerID]; const size_t ssePixCount = (dstPixCount - (dstPixCount % 16)); - for (; i < ssePixCount; i+=16, dstLayerID+=16, dstColorLine = (outputFormat == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + 16) : (void *)((FragmentColor *)dstColorLine + 16)) + for (; i < ssePixCount; i+=16, dstLayerID+=16, dstColorLine = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + 16) : (void *)((FragmentColor *)dstColorLine + 16)) { const __m128i src16[2] = { _mm_load_si128((__m128i *)(srcLine + i + 0)), _mm_load_si128((__m128i *)(srcLine + i + 8)) }; __m128i src[4]; - if (outputFormat == NDSColorFormat_BGR555_Rev) + if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) { src[0] = src16[0]; src[1] = src16[1]; @@ -2994,7 +2993,7 @@ } else { - if (outputFormat == NDSColorFormat_BGR666_Rev) + if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { ConvertColor555To6665Opaque<false>(src16[0], src[0], src[1]); ConvertColor555To6665Opaque<false>(src16[1], src[2], src[3]); @@ -3016,7 +3015,7 @@ dst[0] = _mm_load_si128((__m128i *)dstColorLine + 0); dst[1] = _mm_load_si128((__m128i *)dstColorLine + 1); - if (outputFormat == NDSColorFormat_BGR555_Rev) + if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) { dst[2] = _mm_setzero_si128(); dst[3] = _mm_setzero_si128(); @@ -3027,19 +3026,19 @@ dst[3] = _mm_load_si128((__m128i *)dstColorLine + 3); } - this->_RenderPixel16_SSE2<NDSColorFormat_BGR555_Rev, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, true>(i, - (ColorEffect)BLDCNT.ColorEffect, - srcLayerID, - src[3], src[2], src[1], src[0], - srcAlpha, - srcEffectEnableMask, - dst[3], dst[2], dst[1], dst[0], - dstLayerID_vec128, - passMask8); + this->_RenderPixel16_SSE2<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, true>(i, + (ColorEffect)BLDCNT.ColorEffect, + srcLayerID, + src[3], src[2], src[1], src[0], + srcAlpha, + srcEffectEnableMask, + dst[3], dst[2], dst[1], dst[0], + dstLayerID_vec128, + passMask8); _mm_store_si128((__m128i *)dstColorLine + 0, dst[0]); _mm_store_si128((__m128i *)dstColorLine + 1, dst[1]); - if (outputFormat != NDSColorFormat_BGR555_Rev) + if (OUTPUTFORMAT != NDSColorFormat_BGR555_Rev) { _mm_store_si128((__m128i *)dstColorLine + 2, dst[2]); _mm_store_si128((__m128i *)dstColorLine + 3, dst[3]); @@ -3052,19 +3051,19 @@ #ifdef ENABLE_SSE2 #pragma LOOPVECTORIZE_DISABLE #endif - for (; i < dstPixCount; i++, dstLayerID++, dstColorLine = (outputFormat == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + 1) : (void *)((FragmentColor *)dstColorLine + 1)) + for (; i < dstPixCount; i++, dstLayerID++, dstColorLine = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + 1) : (void *)((FragmentColor *)dstColorLine + 1)) { if ((srcLine[i] & 0x8000) == 0) { continue; } - this->_RenderPixel<NDSColorFormat_BGR555_Rev, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(_gpuDstToSrcIndex[i], - srcLine[i], - 0, - srcLayerID, - dstColorLine, - dstLayerID); + this->_RenderPixel<OUTPUTFORMAT, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(_gpuDstToSrcIndex[i], + srcLine[i], + 0, + srcLayerID, + dstColorLine, + dstLayerID); } } @@ -3072,7 +3071,7 @@ // BACKGROUND RENDERING -TEXT- /*****************************************************************************/ // render a text background to the combined pixelbuffer -template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void GPUEngineBase::_RenderLine_BGText(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const u16 XBG, const u16 YBG) { const IOREG_DISPCNT &DISPCNT = this->_IORegisterMap->DISPCNT; @@ -3120,7 +3119,7 @@ { index = *tileColorIdx & 0x0F; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (index != 0)); } x++; @@ -3139,7 +3138,7 @@ { index = *tileColorIdx >> 4; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (index != 0)); } x++; @@ -3156,7 +3155,7 @@ { index = *tileColorIdx & 0x0F; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (index != 0)); } x++; @@ -3179,7 +3178,7 @@ { index = *tileColorIdx >> 4; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (index != 0)); } x++; @@ -3198,7 +3197,7 @@ { index = *tileColorIdx & 0x0F; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (index != 0)); } x++; @@ -3215,7 +3214,7 @@ { index = *tileColorIdx >> 4; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (index != 0)); } x++; @@ -3260,20 +3259,20 @@ { const u8 index = *tileColorIdx; const u16 color = LE_TO_LOCAL_16(tilePal[index]); - this->_RenderPixelSingle<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (index != 0)); + this->_RenderPixelSingle<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, x, (index != 0)); } } } } } -template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void GPUEngineBase::_RenderLine_BGAffine(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m) { - this->_RenderPixelIterate<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_tiled_8bit_entry>(srcLayerID, dstColorLine, lineIndex, param, this->_BGLayer[srcLayerID].tileMapAddress, this->_BGLayer[srcLayerID].tileEntryAddress, this->_paletteBG); + this->_RenderPixelIterate<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_tiled_8bit_entry>(srcLayerID, dstColorLine, lineIndex, param, this->_BGLayer[srcLayerID].tileMapAddress, this->_BGLayer[srcLayerID].tileEntryAddress, this->_paletteBG); } -template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void* GPUEngineBase::_RenderLine_BGExtended(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m, bool &outUseCustomVRAM) { const IOREG_DISPCNT &DISPCNT = this->_IORegisterMap->DISPCNT; @@ -3284,17 +3283,17 @@ { if (DISPCNT.ExBGxPalette_Enable) { - this->_RenderPixelIterate< ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_tiled_16bit_entry<true> >(srcLayerID, dstColorLine, lineIndex, param, this->_BGLayer[srcLayerID].tileMapAddress, this->_BGLayer[srcLayerID].tileEntryAddress, *(this->_BGLayer[srcLayerID].extPalette)); + this->_RenderPixelIterate< OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_tiled_16bit_entry<true> >(srcLayerID, dstColorLine, lineIndex, param, this->_BGLayer[srcLayerID].tileMapAddress, this->_BGLayer[srcLayerID].tileEntryAddress, *(this->_BGLayer[srcLayerID].extPalette)); } else { - this->_RenderPixelIterate< ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_tiled_16bit_entry<false> >(srcLayerID, dstColorLine, lineIndex, param, this->_BGLayer[srcLayerID].tileMapAddress, this->_BGLayer[srcLayerID].tileEntryAddress, this->_paletteBG); + this->_RenderPixelIterate< OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_tiled_16bit_entry<false> >(srcLayerID, dstColorLine, lineIndex, param, this->_BGLayer[srcLayerID].tileMapAddress, this->_BGLayer[srcLayerID].tileEntryAddress, this->_paletteBG); } break; } case BGType_AffineExt_256x1: // 256 colors - this->_RenderPixelIterate<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_256_map>(srcLayerID, dstColorLine, lineIndex, param, this->_BGLayer[srcLayerID].BMPAddress, 0, this->_paletteBG); + this->_RenderPixelIterate<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_256_map>(srcLayerID, dstColorLine, lineIndex, param, this->_BGLayer[srcLayerID].BMPAddress, 0, this->_paletteBG); break; case BGType_AffineExt_Direct: // direct colors / BMP @@ -3317,7 +3316,7 @@ { void *newRenderLineTarget; - switch (GPU->GetDisplayInfo().colorFormat) + switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: newRenderLineTarget = (this->_displayOutputMode == GPUDisplayMode_Normal) ? (u8 *)this->nativeBuffer + (lineIndex * GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16)) : (u8 *)this->_internalRenderLineTargetNative; @@ -3345,7 +3344,7 @@ if (!outUseCustomVRAM) { - this->_RenderPixelIterate<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_BMP_map>(srcLayerID, dstColorLine, lineIndex, param, this->_BGLayer[srcLayerID].BMPAddress, 0, this->_paletteBG); + this->_RenderPixelIterate<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_BMP_map>(srcLayerID, dstColorLine, lineIndex, param, this->_BGLayer[srcLayerID].BMPAddress, 0, this->_paletteBG); } else { @@ -3357,7 +3356,7 @@ void *newRenderLineTarget; - switch (GPU->GetDisplayInfo().colorFormat) + switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: newRenderLineTarget = (this->_displayOutputMode == GPUDisplayMode_Normal) ? (u8 *)this->customBuffer + (customLineIndex * customLineWidth * sizeof(u16)) : (u8 *)this->_internalRenderLineTargetCustom; @@ -3382,7 +3381,7 @@ } case BGType_Large8bpp: // large screen 256 colors - this->_RenderPixelIterate<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_256_map>(srcLayerID, dstColorLine, lineIndex, param, this->_BGLayer[srcLayerID].largeBMPAddress, 0, this->_paletteBG); + this->_RenderPixelIterate<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_256_map>(srcLayerID, dstColorLine, lineIndex, param, this->_BGLayer[srcLayerID].largeBMPAddress, 0, this->_paletteBG); break; default: @@ -3396,49 +3395,49 @@ // BACKGROUND RENDERING -HELPER FUNCTIONS- /*****************************************************************************/ -template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void GPUEngineBase::_LineText(const GPULayerID srcLayerID, void *__restrict dstColorLine, const u16 lineIndex) { if (ISDEBUGRENDER) { - this->_RenderLine_BGText<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, (u16 *)dstColorLine, lineIndex, 0, lineIndex); + this->_RenderLine_BGText<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, (u16 *)dstColorLine, lineIndex, 0, lineIndex); } else { - this->_RenderLine_BGText<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, (u16 *)dstColorLine, lineIndex, this->_BGLayer[srcLayerID].xOffset, lineIndex + this->_BGLayer[srcLayerID].yOffset); + this->_RenderLine_BGText<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, (u16 *)dstColorLine, lineIndex, this->_BGLayer[srcLayerID].xOffset, lineIndex + this->_BGLayer[srcLayerID].yOffset); } } -template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void GPUEngineBase::_LineRot(const GPULayerID srcLayerID, void *__restrict dstColorLine, const u16 lineIndex) { if (ISDEBUGRENDER) { static const IOREG_BGnParameter debugParams = {256, 0, 0, -77, 0, lineIndex*GPU_FRAMEBUFFER_NATIVE_WIDTH}; - this->_RenderLine_BGAffine<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, (u16 *)dstColorLine, lineIndex, debugParams); + this->_RenderLine_BGAffine<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, (u16 *)dstColorLine, lineIndex, debugParams); } else { IOREG_BGnParameter *__restrict bgParams = (srcLayerID == GPULayerID_BG2) ? (IOREG_BGnParameter *)&this->_IORegisterMap->BG2Param : (IOREG_BGnParameter *)&this->_IORegisterMap->BG3Param; - this->_RenderLine_BGAffine<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, (u16 *)dstColorLine, lineIndex, *bgParams); + this->_RenderLine_BGAffine<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, (u16 *)dstColorLine, lineIndex, *bgParams); bgParams->BGnX.value += bgParams->BGnPB.value; bgParams->BGnY.value += bgParams->BGnPD.value; } } -template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void* GPUEngineBase::_LineExtRot(const GPULayerID srcLayerID, void *__restrict dstColorLine, const u16 lineIndex, bool &outUseCustomVRAM) { if (ISDEBUGRENDER) { static const IOREG_BGnParameter debugParams = {256, 0, 0, -77, 0, lineIndex*GPU_FRAMEBUFFER_NATIVE_WIDTH}; - return this->_RenderLine_BGExtended<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, (u16 *)dstColorLine, lineIndex, debugParams, outUseCustomVRAM); + return this->_RenderLine_BGExtended<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, (u16 *)dstColorLine, lineIndex, debugParams, outUseCustomVRAM); } else { IOREG_BGnParameter *__restrict bgParams = (srcLayerID == GPULayerID_BG2) ? (IOREG_BGnParameter *)&this->_IORegisterMap->BG2Param : (IOREG_BGnParameter *)&this->_IORegisterMap->BG3Param; - dstColorLine = this->_RenderLine_BGExtended<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, (u16 *)dstColorLine, lineIndex, *bgParams, outUseCustomVRAM); + dstColorLine = this->_RenderLine_BGExtended<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, (u16 *)dstColorLine, lineIndex, *bgParams, outUseCustomVRAM); bgParams->BGnX.value += bgParams->BGnPB.value; bgParams->BGnY.value += bgParams->BGnPD.value; @@ -3454,7 +3453,7 @@ /* if i understand it correct, and it fixes some sprite problems in chameleon shot */ /* we have a 15 bit color, and should use the pal entry bits as alpha ?*/ /* http://nocash.emubase.de/gbatek.htm#dsvideoobjs */ -template<bool ISDEBUGRENDER> +template <bool ISDEBUGRENDER> void GPUEngineBase::_RenderSpriteBMP(const u8 spriteNum, const u16 l, u16 *__restrict dst, const u32 srcadr, u8 *__restrict dst_alpha, u8 *__restrict typeTab, u8 *__restrict prioTab, const u8 prio, const size_t lg, size_t sprX, size_t x, const s32 xdir, const u8 alpha) { const u16 *__restrict bmpBuffer = (u16 *)MMU_gpu_map(srcadr); @@ -4071,7 +4070,7 @@ void *currentRenderLineTarget = (this->_displayOutputMode == GPUDisplayMode_Normal) ? (u8 *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH * dispInfo.pixelBytes) : (u8 *)this->_internalRenderLineTargetNative; const u16 backdropColor = LE_TO_LOCAL_16(this->_paletteBG[0]) & 0x7FFF; - this->_RenderLine_Clear(backdropColor, l, currentRenderLineTarget); + this->_RenderLine_Clear<OUTPUTFORMAT>(backdropColor, l, currentRenderLineTarget); // for all the pixels in the line if (this->_enableLayer[GPULayerID_OBJ]) @@ -4104,11 +4103,11 @@ if (this->isLineRenderNative[l]) { - currentRenderLineTarget = this->_RenderLine_LayerBG<false, false>(layerID, currentRenderLineTarget, l); + currentRenderLineTarget = this->_RenderLine_LayerBG<OUTPUTFORMAT, false, false>(layerID, currentRenderLineTarget, l); } else { - currentRenderLineTarget = this->_RenderLine_LayerBG<false, true>(layerID, currentRenderLineTarget, l); + currentRenderLineTarget = this->_RenderLine_LayerBG<OUTPUTFORMAT, false, true>(layerID, currentRenderLineTarget, l); } } //layer enabled } @@ -4260,10 +4259,9 @@ return dstColorLine; } -template <bool ISFULLINTENSITYHINT> +template <NDSColorFormat OUTPUTFORMAT, bool ISFULLINTENSITYHINT> void GPUEngineBase::ApplyMasterBrightness() { - const NDSColorFormat outputFormat = GPU->GetDisplayInfo().colorFormat; const IOREG_MASTER_BRIGHT &MASTER_BRIGHT = this->_IORegisterMap->MASTER_BRIGHT; const u32 intensity = MASTER_BRIGHT.Intensity; @@ -4283,7 +4281,7 @@ { size_t i = 0; - switch (outputFormat) + switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: { @@ -4294,7 +4292,7 @@ for (; i < ssePixCount; i += 8) { __m128i dstColor_vec128 = _mm_load_si128((__m128i *)((u16 *)dst + i)); - dstColor_vec128 = this->_ColorEffectIncreaseBrightness<NDSColorFormat_BGR555_Rev>(dstColor_vec128, intensity_vec128); + dstColor_vec128 = this->_ColorEffectIncreaseBrightness<OUTPUTFORMAT>(dstColor_vec128, intensity_vec128); dstColor_vec128 = _mm_or_si128(dstColor_vec128, _mm_set1_epi16(0x8000)); _mm_store_si128((__m128i *)((u16 *)dst + i), dstColor_vec128); } @@ -4311,31 +4309,6 @@ } case NDSColorFormat_BGR666_Rev: - { -#ifdef ENABLE_SSE2 - const __m128i intensity_vec128 = _mm_set1_epi16(intensity); - - const size_t ssePixCount = pixCount - (pixCount % 4); - for (; i < ssePixCount; i += 4) - { - __m128i dstColor_vec128 = _mm_load_si128((__m128i *)((FragmentColor *)dst + i)); - dstColor_vec128 = this->_ColorEffectIncreaseBrightness<NDSColorFormat_BGR666_Rev>(dstColor_vec128, intensity_vec128); - dstColor_vec128 = _mm_or_si128(dstColor_vec128, _mm_set1_epi32(0x1F000000)); - _mm_store_si128((__m128i *)((FragmentColor *)dst + i), dstColor_vec128); - } -#endif - -#ifdef ENABLE_SSE2 -#pragma LOOPVECTORIZE_DISABLE -#endif - for (; i < pixCount; i++) - { - ((FragmentColor *)dst)[i] = this->_ColorEffectIncreaseBrightness<NDSColorFormat_BGR666_Rev>(((FragmentColor *)dst)[i], intensity); - ((FragmentColor *)dst)[i].a = 0x1F; - } - break; - } - case NDSColorFormat_BGR888_Rev: { #ifdef ENABLE_SSE2 @@ -4345,8 +4318,8 @@ for (; i < ssePixCount; i += 4) { __m128i dstColor_vec128 = _mm_load_si128((__m128i *)((FragmentColor *)dst + i)); - dstColor_vec128 = this->_ColorEffectIncreaseBrightness<NDSColorFormat_BGR888_Rev>(dstColor_vec128, intensity_vec128); - dstColor_vec128 = _mm_or_si128(dstColor_vec128, _mm_set1_epi32(0xFF000000)); + dstColor_vec128 = this->_ColorEffectIncreaseBrightness<OUTPUTFORMAT>(dstColor_vec128, intensity_vec128); + dstColor_vec128 = _mm_or_si128(dstColor_vec128, _mm_set1_epi32((OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? 0x1F000000 : 0xFF000000)); _mm_store_si128((__m128i *)((FragmentColor *)dst + i), dstColor_vec128); } #endif @@ -4356,8 +4329,8 @@ #endif for (; i < pixCount; i++) { - ((FragmentColor *)dst)[i] = this->_ColorEffectIncreaseBrightness<NDSColorFormat_BGR888_Rev>(((FragmentColor *)dst)[i], intensity); - ((FragmentColor *)dst)[i].a = 0xFF; + ((FragmentColor *)dst)[i] = this->_ColorEffectIncreaseBrightness<OUTPUTFORMAT>(((FragmentColor *)dst)[i], intensity); + ((FragmentColor *)dst)[i].a = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? 0x1F : 0xFF; } break; } @@ -4369,7 +4342,7 @@ else { // all white (optimization) - switch (outputFormat) + switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: memset_u16(dst, 0xFFFF, pixCount); @@ -4396,7 +4369,7 @@ { size_t i = 0; - switch (outputFormat) + switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: { @@ -4407,7 +4380,7 @@ for (; i < ssePixCount; i += 8) { __m128i dstColor_vec128 = _mm_load_si128((__m128i *)((u16 *)dst + i)); - dstColor_vec128 = this->_ColorEffectDecreaseBrightness<NDSColorFormat_BGR555_Rev>(dstColor_vec128, intensity_vec128); + dstColor_vec128 = this->_ColorEffectDecreaseBrightness<OUTPUTFORMAT>(dstColor_vec128, intensity_vec128); dstColor_vec128 = _mm_or_si128(dstColor_vec128, _mm_set1_epi16(0x8000)); _mm_store_si128((__m128i *)((u16 *)dst + i), dstColor_vec128); } @@ -4424,31 +4397,6 @@ } case NDSColorFormat_BGR666_Rev: - { -#ifdef ENABLE_SSE2 - const __m128i intensity_vec128 = _mm_set1_epi16(intensity); - - const size_t ssePixCount = pixCount - (pixCount % 4); - for (; i < ssePixCount; i += 4) - { - __m128i dstColor_vec128 = _mm_load_si128((__m128i *)((FragmentColor *)dst + i)); - dstColor_vec128 = this->_ColorEffectDecreaseBrightness<NDSColorFormat_BGR666_Rev>(dstColor_vec128, intensity_vec128); - dstColor_vec128 = _mm_or_si128(dstColor_vec128, _mm_set1_epi32(0x1F000000)); - _mm_store_si128((__m128i *)((FragmentColor *)dst + i), dstColor_vec128); - } -#endif - -#ifdef ENABLE_SSE2 -#pragma LOOPVECTORIZE_DISABLE -#endif - for (; i < pixCount; i++) - { - ((FragmentColor *)dst)[i] = this->_ColorEffectDecreaseBrightness(((FragmentColor *)dst)[i], intensity); - ((FragmentColor *)dst)[i].a = 0x1F; - } - break; - } - case NDSColorFormat_BGR888_Rev: { #ifdef ENABLE_SSE2 @@ -4458,8 +4406,8 @@ for (; i < ssePixCount; i += 4) { __m128i dstColor_vec128 = _mm_load_si128((__m128i *)((FragmentColor *)dst + i)); - dstColor_vec128 = this->_ColorEffectDecreaseBrightness<NDSColorFormat_BGR888_Rev>(dstColor_vec128, intensity_vec128); - dstColor_vec128 = _mm_or_si128(dstColor_vec128, _mm_set1_epi32(0xFF000000)); + dstColor_vec128 = this->_ColorEffectDecreaseBrightness<OUTPUTFORMAT>(dstColor_vec128, intensity_vec128); + dstColor_vec128 = _mm_or_si128(dstColor_vec128, _mm_set1_epi32((OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? 0x1F000000 : 0xFF000000)); _mm_store_si128((__m128i *)((FragmentColor *)dst + i), dstColor_vec128); } #endif @@ -4470,7 +4418,7 @@ for (; i < pixCount; i++) { ((FragmentColor *)dst)[i] = this->_ColorEffectDecreaseBrightness(((FragmentColor *)dst)[i], intensity); - ((FragmentColor *)dst)[i].a = 0xFF; + ((FragmentColor *)dst)[i].a = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? 0x1F : 0xFF; } break; } @@ -4482,7 +4430,7 @@ else { // all black (optimization) - switch (outputFormat) + switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: memset_u16(dst, 0x8000, pixCount); @@ -4612,17 +4560,17 @@ } } -template <bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void* GPUEngineBase::_RenderLine_LayerBG_Final(const GPULayerID srcLayerID, void *dstColorLine, const u16 lineIndex) { bool useCustomVRAM = false; switch (this->_BGLayer[srcLayerID].baseType) { - case BGType_Text: this->_LineText<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex); break; - case BGType_Affine: this->_LineRot<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex); break; - case BGType_AffineExt: dstColorLine = this->_LineExtRot<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex, useCustomVRAM); break; - case BGType_Large8bpp: dstColorLine = this->_LineExtRot<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex, useCustomVRAM); break; + case BGType_Text: this->_LineText<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex); break; + case BGType_Affine: this->_LineRot<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex); break; + case BGType_AffineExt: dstColorLine = this->_LineExtRot<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex, useCustomVRAM); break; + case BGType_Large8bpp: dstColorLine = this->_LineExtRot<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex, useCustomVRAM); break; case BGType_Invalid: PROGINFO("Attempting to render an invalid BG type\n"); break; @@ -4637,73 +4585,73 @@ { if (useCustomVRAM) { - this->_RenderPixelsCustomVRAM<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDCustom, lineIndex); + this->_RenderPixelsCustomVRAM<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDCustom, lineIndex); } else { - this->_RenderPixelsCustom<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDCustom, lineIndex); + this->_RenderPixelsCustom<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDCustom, lineIndex); } } return dstColorLine; } -template <bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void* GPUEngineBase::_RenderLine_LayerBG_ApplyColorEffectDisabledHint(const GPULayerID srcLayerID, void *dstColorLine, const u16 lineIndex) { - return this->_RenderLine_LayerBG_Final<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex); + return this->_RenderLine_LayerBG_Final<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex); } -template <bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void* GPUEngineBase::_RenderLine_LayerBG_ApplyNoWindowsEnabledHint(const GPULayerID srcLayerID, void *dstColorLine, const u16 lineIndex) { const IOREG_BLDCNT &BLDCNT = this->_IORegisterMap->BLDCNT; if (BLDCNT.ColorEffect == ColorEffect_Disable) { - dstColorLine = this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, true, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex); + dstColorLine = this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, true, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex); } else { - dstColorLine = this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, false, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex); + dstColorLine = this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, false, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex); } return dstColorLine; } -template <bool ISDEBUGRENDER, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> +template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> void* GPUEngineBase::_RenderLine_LayerBG_ApplyMosaic(const GPULayerID srcLayerID, void *dstColorLine, const u16 lineIndex) { if (this->_isAnyWindowEnabled) { - dstColorLine = this->_RenderLine_LayerBG_ApplyNoWindowsEnabledHint<ISDEBUGRENDER, MOSAIC, false, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex); + dstColorLine = this->_RenderLine_LayerBG_ApplyNoWindowsEnabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, false, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex); } else { - dstColorLine = this->_RenderLine_LayerBG_ApplyNoWindowsEnabledHint<ISDEBUGRENDER, MOSAIC, true, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex); + dstColorLine = this->_RenderLine_LayerBG_ApplyNoWindowsEnabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, true, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex); } return dstColorLine; } -template <bool ISDEBUGRENDER, bool ISCUSTOMRENDERINGNEEDED> +template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool ISCUSTOMRENDERINGNEEDED> void* GPUEngineBase::_RenderLine_LayerBG(const GPULayerID srcLayerID, void *dstColorLine, const u16 lineIndex) { if (ISDEBUGRENDER) { - return this->_RenderLine_LayerBG_Final<ISDEBUGRENDER, false, true, true, false>(srcLayerID, dstColorLine, lineIndex); + return this->_RenderLine_LayerBG_Final<OUTPUTFORMAT, ISDEBUGRENDER, false, true, true, false>(srcLayerID, dstColorLine, lineIndex); } else { #ifndef DISABLE_MOSAIC if (this->_BGLayer[srcLayerID].isMosaic && this->_isBGMosaicSet) { - dstColorLine = this->_RenderLine_LayerBG_ApplyMosaic<ISDEBUGRENDER, true, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex); + dstColorLine = this->_RenderLine_LayerBG_ApplyMosaic<OUTPUTFORMAT, ISDEBUGRENDER, true, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex); } else #endif { - dstColorLine = this->_RenderLine_LayerBG_ApplyMosaic<ISDEBUGRENDER, false, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex); + dstColorLine = this->_RenderLine_LayerBG_ApplyMosaic<OUTPUTFORMAT, ISDEBUGRENDER, false, ISCUSTOMRENDERINGNEEDED>(srcLayerID, dstColorLine, lineIndex); } } @@ -4718,17 +4666,18 @@ for (size_t lineIndex = 0; lineIndex < layerHeight; lineIndex++) { - this->_RenderLine_LayerBG<true, false>(layerID, dstColorLine, lineIndex); + this->_RenderLine_LayerBG<NDSColorFormat_BGR555_Rev, true, false>(layerID, dstColorLine, lineIndex); dstColorLine += layerWidth; } } +template <NDSColorFormat OUTPUTFORMAT> void GPUEngineBase::_HandleDisplayModeOff(const size_t l) { // Native rendering only. // In this display mode, the display is cleared to white. - switch (GPU->GetDisplayInfo().colorFormat) + switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: memset_u16_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>((u16 *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH), 0xFFFF); @@ -4744,6 +4693,7 @@ } } +template <NDSColorFormat OUTPUTFORMAT> void GPUEngineBase::_HandleDisplayModeNormal(const size_t l) { if (!this->isLineRenderNative[l]) @@ -5324,6 +5274,7 @@ return didVRAMLineChange; } +template <NDSColorFormat OUTPUTFORMAT> void GPUEngineA::RenderLine(const u16 l) { const IOREG_DISPCAPCNT &DISPCAPCNT = this->_IORegisterMap->DISPCAPCNT; @@ -5336,25 +5287,25 @@ this->_SetupWindows<1>(l); // Render the line - void *renderLineTarget = this->_RenderLine_Layers<NDSColorFormat_BGR555_Rev>(l); + void *renderLineTarget = this->_RenderLine_Layers<OUTPUTFORMAT>(l); // Fill the display output switch (this->_displayOutputMode) { case GPUDisplayMode_Off: // Display Off(Display white) - this->_HandleDisplayModeOff(l); + this->_HandleDisplayModeOff<OUTPUTFORMAT>(l); break; case GPUDisplayMode_Normal: // Display BG and OBJ layers - this->_HandleDisplayModeNormal(l); + this->_HandleDisplayModeNormal<OUTPUTFORMAT>(l); break; case GPUDisplayMode_VRAM: // Display vram framebuffer - this->_HandleDisplayModeVRAM(l); + this->_HandleDisplayModeVRAM<OUTPUTFORMAT>(l); break; case GPUDisplayMode_MainMemory: // Display memory FIFO - this->_HandleDisplayModeMainMemory(l); + this->_HandleDisplayModeMainMemory<OUTPUTFORMAT>(l); break; } @@ -5367,11 +5318,11 @@ { if (DISPCAPCNT.CaptureSize == DisplayCaptureSize_128x128) { - this->_RenderLine_DisplayCapture<GPU_FRAMEBUFFER_NATIVE_WIDTH/2>(renderLineTarget, l); + this->_RenderLine_DisplayCapture<OUTPUTFORMAT, GPU_FRAMEBUFFER_NATIVE_WIDTH/2>(renderLineTarget, l); } else { - this->_RenderLine_DisplayCapture<GPU_FRAMEBUFFER_NATIVE_WIDTH>(renderLineTarget, l); + this->_RenderLine_DisplayCapture<OUTPUTFORMAT, GPU_FRAMEBUFFER_NATIVE_WIDTH>(renderLineTarget, l); } } } @@ -5596,7 +5547,7 @@ return dstColorLine; } -template<size_t CAPTURELENGTH> +template<NDSColorFormat OUTPUTFORMAT, size_t CAPTURELENGTH> void GPUEngineA::_RenderLine_DisplayCapture(const void *renderedLineSrcA, const u16 l) { assert( (CAPTURELENGTH == GPU_FRAMEBUFFER_NATIVE_WIDTH/2) || (CAPTURELENGTH == GPU_FRAMEBUFFER_NATIVE_WIDTH) ); @@ -5646,7 +5597,7 @@ if (DISPCAPCNT.SrcA == 0) { - switch (dispInfo.colorFormat) + switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: break; @@ -6550,6 +6501,7 @@ } } +template <NDSColorFormat OUTPUTFORMAT> void GPUEngineA::_HandleDisplayModeVRAM(const size_t l) { const IOREG_DISPCNT &DISPCNT = this->_IORegisterMap->DISPCNT; @@ -6557,7 +6509,7 @@ if (this->isLineCaptureNative[DISPCNT.VRAM_Block][l]) { - switch (GPU->GetDisplayInfo().colorFormat) + switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: this->_LineColorCopy<true, true, true, true, 2>(this->nativeBuffer, this->_VRAMNativeBlockPtr[DISPCNT.VRAM_Block], l); @@ -6585,7 +6537,7 @@ const size_t customWidth = GPU->GetDisplayInfo().customWidth; const size_t customPixCount = customWidth * _gpuDstLineCount[l]; - switch (GPU->GetDisplayInfo().colorFormat) + switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: this->_LineColorCopy<false, false, true, true, 2>((u16 *)this->customBuffer, this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block], l); @@ -6613,6 +6565,7 @@ } } +template <NDSColorFormat OUTPUTFORMAT> void GPUEngineA::_HandleDisplayModeMainMemory(const size_t l) { // Native rendering only. @@ -6622,7 +6575,7 @@ u32 *dstColorLine = (u32 *)((u16 *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH)); - switch (GPU->GetDisplayInfo().colorFormat) + switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: { @@ -6768,6 +6721,7 @@ this->SetDisplayByID(NDSDisplayID_Touch); } +template <NDSColorFormat OUTPUTFORMAT> void GPUEngineB::RenderLine(const u16 l) { //cache some parameters which are assumed to be stable throughout the rendering of the entire line @@ -6780,12 +6734,12 @@ switch (this->_displayOutputMode) { case GPUDisplayMode_Off: // Display Off(Display white) - this->_HandleDisplayModeOff(l); + this->_HandleDisplayModeOff<OUTPUTFORMAT>(l); break; case GPUDisplayMode_Normal: // Display BG and OBJ layers - this->_RenderLine_Layers<NDSColorFormat_BGR555_Rev>(l); - this->_HandleDisplayModeNormal(l); + this->_RenderLine_Layers<OUTPUTFORMAT>(l); + this->_HandleDisplayModeNormal<OUTPUTFORMAT>(l); break; default: @@ -7208,10 +7162,7 @@ void GPUSubsystem::SetColorFormat(const NDSColorFormat outputFormat, void *clientNativeBuffer, void *clientCustomBuffer) { - // TBD: Multiple color formats aren't supported in the renderer yet. Force the color format to NDSColorFormat_BGR555_Rev until then. - //this->_displayInfo.colorFormat = outputFormat; - this->_displayInfo.colorFormat = NDSColorFormat_BGR555_Rev; - + this->_displayInfo.colorFormat = outputFormat; this->_displayInfo.pixelBytes = (outputFormat == NDSColorFormat_BGR555_Rev) ? sizeof(u16) : sizeof(FragmentColor); this->_AllocateFramebuffers(this->_displayInfo.colorFormat, this->_displayInfo.customWidth, this->_displayInfo.customHeight, clientNativeBuffer, clientCustomBuffer); @@ -7345,6 +7296,7 @@ this->_willAutoResolveToCustomBuffer = willAutoResolve; } +template <NDSColorFormat OUTPUTFORMAT> void GPUSubsystem::RenderLine(const u16 l, bool isFrameSkipRequested) { const bool isFramebufferRenderNeeded[2] = {(CommonSettings.showGpu.main && !this->_engineMain->GetIsMasterBrightFullIntensity()) || (this->_engineMain->GetIORegisterMap().DISPCAPCNT.CaptureEnable != 0), @@ -7367,7 +7319,7 @@ } else if (this->_engineMain->GetIsMasterBrightFullIntensity()) { - this->_engineMain->ApplyMasterBrightness<true>(); + this->_engineMain->ApplyMasterBrightness<OUTPUTFORMAT, true>(); } } @@ -7379,7 +7331,7 @@ } else if (this->_engineSub->GetIsMasterBrightFullIntensity()) { - this->_engineSub->ApplyMasterBrightness<true>(); + this->_engineSub->ApplyMasterBrightness<OUTPUTFORMAT, true>(); } } } @@ -7411,7 +7363,7 @@ } } - this->_engineMain->RenderLine(l); + this->_engineMain->RenderLine<OUTPUTFORMAT>(l); } else { @@ -7420,7 +7372,7 @@ if (isFramebufferRenderNeeded[GPUEngineID_Sub] && !isFrameSkipRequested) { - this->_engineSub->RenderLine(l); + this->_engineSub->RenderLine<OUTPUTFORMAT>(l); } else { @@ -7449,15 +7401,15 @@ if (isFramebufferRenderNeeded[GPUEngineID_Main]) { - this->_engineMain->ApplyMasterBrightness<false>(); + this->_engineMain->ApplyMasterBrightness<OUTPUTFORMAT, false>(); } if (isFramebufferRenderNeeded[GPUEngineID_Sub]) { - this->_engineSub->ApplyMasterBrightness<false>(); + this->_engineSub->ApplyMasterBrightness<OUTPUTFORMAT, false>(); } - if (this->_displayInfo.colorFormat == NDSColorFormat_BGR666_Rev) + if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { this->_engineMain->ResolveRGB666ToRGB888(); this->_engineSub->ResolveRGB666ToRGB888(); @@ -7754,6 +7706,10 @@ template void GPUEngineBase::ParseReg_BGnX<GPULayerID_BG3>(); template void GPUEngineBase::ParseReg_BGnY<GPULayerID_BG3>(); +template void GPUSubsystem::RenderLine<NDSColorFormat_BGR555_Rev>(const u16 l, bool skip); +template void GPUSubsystem::RenderLine<NDSColorFormat_BGR666_Rev>(const u16 l, bool skip); +template void GPUSubsystem::RenderLine<NDSColorFormat_BGR888_Rev>(const u16 l, bool skip); + template void ConvertColorBuffer555To8888Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ConvertColorBuffer555To8888Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ConvertColorBuffer555To8888Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); Modified: trunk/desmume/src/GPU.h =================================================================== --- trunk/desmume/src/GPU.h 2016-07-15 16:29:08 UTC (rev 5506) +++ trunk/desmume/src/GPU.h 2016-07-15 20:30:05 UTC (rev 5507) @@ -1120,7 +1120,6 @@ // User-requested settings. These fields will always remain constant until changed. // Changed by calling GPUSubsystem::SetColorFormat(). - // TBD: The color format will always be 2-byte NDSColorFormat_BGR555_Rev until internal rendering in multiple formats is fully supported. NDSColorFormat colorFormat; // The output color format. size_t pixelBytes; // The number of bytes per pixel. @@ -1323,40 +1322,40 @@ void _MosaicSpriteLinePixel(const size_t x, u16 l, u16 *__restrict dst, u8 *__restrict dst_alpha, u8 *__restrict typeTab, u8 *__restrict prioTab); void _MosaicSpriteLine(u16 l, u16 *__restrict dst, u8 *__restrict dst_alpha, u8 *__restrict typeTab, u8 *__restrict prioTab); - template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> void _RenderPixelIterate_Final(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal); - template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> void _RenderPixelIterate_ApplyWrap(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal); - template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc> void _RenderPixelIterate(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> void _RenderPixelIterate_Final(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> void _RenderPixelIterate_ApplyWrap(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal); + template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, b... [truncated message content] |
From: <rog...@us...> - 2016-07-15 16:29:11
|
Revision: 5506 http://sourceforge.net/p/desmume/code/5506 Author: rogerman Date: 2016-07-15 16:29:08 +0000 (Fri, 15 Jul 2016) Log Message: ----------- GPU: - Parse and cache the WININ and WINOUT registers, instead of using them directly. - Parse and cache the Target1 bits of the BLDCNT register. - Remove some template parameters which are now suspected to no longer improve performance, most notably LAYERID. Should significantly reduce the generated code size. Modified Paths: -------------- trunk/desmume/src/GPU.cpp trunk/desmume/src/GPU.h trunk/desmume/src/MMU.cpp trunk/desmume/src/windows/mapView.cpp Modified: trunk/desmume/src/GPU.cpp =================================================================== --- trunk/desmume/src/GPU.cpp 2016-07-14 05:49:47 UTC (rev 5505) +++ trunk/desmume/src/GPU.cpp 2016-07-15 16:29:08 UTC (rev 5506) @@ -272,8 +272,8 @@ //subEngine->refreshAffineStartRegs(-1,-1); } - mainEngine->ParseAllRegisters<GPUEngineID_Main>(); - subEngine->ParseAllRegisters<GPUEngineID_Sub>(); + mainEngine->ParseAllRegisters(); + subEngine->ParseAllRegisters(); return !is->fail(); } @@ -514,24 +514,97 @@ this->_currentFadeInColors = &GPUEngineBase::_fadeInColors[this->_BLDALPHA_EVY][0]; this->_currentFadeOutColors = &GPUEngineBase::_fadeOutColors[this->_BLDALPHA_EVY][0]; - this->_blend2[GPULayerID_BG0] = false; - this->_blend2[GPULayerID_BG1] = false; - this->_blend2[GPULayerID_BG2] = false; - this->_blend2[GPULayerID_BG3] = false; - this->_blend2[GPULayerID_OBJ] = false; - this->_blend2[GPULayerID_Backdrop] = false; + this->_srcBlendEnable[GPULayerID_BG0] = false; + this->_srcBlendEnable[GPULayerID_BG1] = false; + this->_srcBlendEnable[GPULayerID_BG2] = false; + this->_srcBlendEnable[GPULayerID_BG3] = false; + this->_srcBlendEnable[GPULayerID_OBJ] = false; + this->_srcBlendEnable[GPULayerID_Backdrop] = false; -#if defined(ENABLE_SSSE3) - this->_blend2_SSSE3 = _mm_setzero_si128(); -#elif defined(ENABLE_SSE2) - this->_blend2_SSE2[GPULayerID_BG0] = _mm_setzero_si128(); - this->_blend2_SSE2[GPULayerID_BG1] = _mm_setzero_si128(); - this->_blend2_SSE2[GPULayerID_BG2] = _mm_setzero_si128(); - this->_blend2_SSE2[GPULayerID_BG3] = _mm_setzero_si128(); - this->_blend2_SSE2[GPULayerID_OBJ] = _mm_setzero_si128(); - this->_blend2_SSE2[GPULayerID_Backdrop] = _mm_setzero_si128(); + this->_dstBlendEnable[GPULayerID_BG0] = false; + this->_dstBlendEnable[GPULayerID_BG1] = false; + this->_dstBlendEnable[GPULayerID_BG2] = false; + this->_dstBlendEnable[GPULayerID_BG3] = false; + this->_dstBlendEnable[GPULayerID_OBJ] = false; + this->_dstBlendEnable[GPULayerID_Backdrop] = false; + +#ifdef ENABLE_SSE2 + this->_srcBlendEnable_SSE2[GPULayerID_BG0] = _mm_setzero_si128(); + this->_srcBlendEnable_SSE2[GPULayerID_BG1] = _mm_setzero_si128(); + this->_srcBlendEnable_SSE2[GPULayerID_BG2] = _mm_setzero_si128(); + this->_srcBlendEnable_SSE2[GPULayerID_BG3] = _mm_setzero_si128(); + this->_srcBlendEnable_SSE2[GPULayerID_OBJ] = _mm_setzero_si128(); + this->_srcBlendEnable_SSE2[GPULayerID_Backdrop] = _mm_setzero_si128(); +#ifdef ENABLE_SSSE3 + this->_dstBlendEnable_SSSE3 = _mm_setzero_si128(); +#else + this->_dstBlendEnable_SSE2[GPULayerID_BG0] = _mm_setzero_si128(); + this->_dstBlendEnable_SSE2[GPULayerID_BG1] = _mm_setzero_si128(); + this->_dstBlendEnable_SSE2[GPULayerID_BG2] = _mm_setzero_si128(); + this->_dstBlendEnable_SSE2[GPULayerID_BG3] = _mm_setzero_si128(); + this->_dstBlendEnable_SSE2[GPULayerID_OBJ] = _mm_setzero_si128(); + this->_dstBlendEnable_SSE2[GPULayerID_Backdrop] = _mm_setzero_si128(); #endif +#endif + this->_WIN0_enable[GPULayerID_BG0] = false; + this->_WIN0_enable[GPULayerID_BG1] = false; + this->_WIN0_enable[GPULayerID_BG2] = false; + this->_WIN0_enable[GPULayerID_BG3] = false; + this->_WIN0_enable[GPULayerID_OBJ] = false; + this->_WIN0_enable[WINDOWCONTROL_EFFECTFLAG] = false; + + this->_WIN1_enable[GPULayerID_BG0] = false; + this->_WIN1_enable[GPULayerID_BG1] = false; + this->_WIN1_enable[GPULayerID_BG2] = false; + this->_WIN1_enable[GPULayerID_BG3] = false; + this->_WIN1_enable[GPULayerID_OBJ] = false; + this->_WIN1_enable[WINDOWCONTROL_EFFECTFLAG] = false; + + this->_WINOUT_enable[GPULayerID_BG0] = false; + this->_WINOUT_enable[GPULayerID_BG1] = false; + this->_WINOUT_enable[GPULayerID_BG2] = false; + this->_WINOUT_enable[GPULayerID_BG3] = false; + this->_WINOUT_enable[GPULayerID_OBJ] = false; + this->_WINOUT_enable[WINDOWCONTROL_EFFECTFLAG] = false; + + this->_WINOBJ_enable[GPULayerID_BG0] = false; + this->_WINOBJ_enable[GPULayerID_BG1] = false; + this->_WINOBJ_enable[GPULayerID_BG2] = false; + this->_WINOBJ_enable[GPULayerID_BG3] = false; + this->_WINOBJ_enable[GPULayerID_OBJ] = false; + this->_WINOBJ_enable[WINDOWCONTROL_EFFECTFLAG] = false; + +#if defined(ENABLE_SSE2) + this->_WIN0_enable_SSE2[GPULayerID_BG0] = _mm_setzero_si128(); + this->_WIN0_enable_SSE2[GPULayerID_BG1] = _mm_setzero_si128(); + this->_WIN0_enable_SSE2[GPULayerID_BG2] = _mm_setzero_si128(); + this->_WIN0_enable_SSE2[GPULayerID_BG3] = _mm_setzero_si128(); + this->_WIN0_enable_SSE2[GPULayerID_OBJ] = _mm_setzero_si128(); + this->_WIN0_enable_SSE2[WINDOWCONTROL_EFFECTFLAG] = _mm_setzero_si128(); + + this->_WIN1_enable_SSE2[GPULayerID_BG0] = _mm_setzero_si128(); + this->_WIN1_enable_SSE2[GPULayerID_BG1] = _mm_setzero_si128(); + this->_WIN1_enable_SSE2[GPULayerID_BG2] = _mm_setzero_si128(); + this->_WIN1_enable_SSE2[GPULayerID_BG3] = _mm_setzero_si128(); + this->_WIN1_enable_SSE2[GPULayerID_OBJ] = _mm_setzero_si128(); + this->_WIN1_enable_SSE2[WINDOWCONTROL_EFFECTFLAG] = _mm_setzero_si128(); + + this->_WINOUT_enable_SSE2[GPULayerID_BG0] = _mm_setzero_si128(); + this->_WINOUT_enable_SSE2[GPULayerID_BG1] = _mm_setzero_si128(); + this->_WINOUT_enable_SSE2[GPULayerID_BG2] = _mm_setzero_si128(); + this->_WINOUT_enable_SSE2[GPULayerID_BG3] = _mm_setzero_si128(); + this->_WINOUT_enable_SSE2[GPULayerID_OBJ] = _mm_setzero_si128(); + this->_WINOUT_enable_SSE2[WINDOWCONTROL_EFFECTFLAG] = _mm_setzero_si128(); + + this->_WINOBJ_enable_SSE2[GPULayerID_BG0] = _mm_setzero_si128(); + this->_WINOBJ_enable_SSE2[GPULayerID_BG1] = _mm_setzero_si128(); + this->_WINOBJ_enable_SSE2[GPULayerID_BG2] = _mm_setzero_si128(); + this->_WINOBJ_enable_SSE2[GPULayerID_BG3] = _mm_setzero_si128(); + this->_WINOBJ_enable_SSE2[GPULayerID_OBJ] = _mm_setzero_si128(); + this->_WINOBJ_enable_SSE2[WINDOWCONTROL_EFFECTFLAG] = _mm_setzero_si128(); +#endif + this->_isMasterBrightFullIntensity = false; this->_spriteRenderMode = SpriteRenderMode_Sprite1D; @@ -1062,11 +1135,10 @@ } //Sets up LCD control variables for Display Engines A and B for quick reading -template<GPUEngineID ENGINEID> void GPUEngineBase::ParseReg_DISPCNT() { const IOREG_DISPCNT &DISPCNT = this->_IORegisterMap->DISPCNT; - this->_displayOutputMode = (ENGINEID == GPUEngineID_Main) ? (GPUDisplayMode)DISPCNT.DisplayMode : (GPUDisplayMode)(DISPCNT.DisplayMode & 0x01); + this->_displayOutputMode = (this->_engineID == GPUEngineID_Main) ? (GPUDisplayMode)DISPCNT.DisplayMode : (GPUDisplayMode)(DISPCNT.DisplayMode & 0x01); this->_WIN0_ENABLED = (DISPCNT.Win0_Enable != 0); this->_WIN1_ENABLED = (DISPCNT.Win1_Enable != 0); @@ -1093,53 +1165,52 @@ this->_spriteRenderMode = SpriteRenderMode_Sprite2D; } - if (DISPCNT.OBJ_BMP_1D_Bound && (ENGINEID == GPUEngineID_Main)) + if (DISPCNT.OBJ_BMP_1D_Bound && (this->_engineID == GPUEngineID_Main)) this->_sprBMPBoundary = 8; else this->_sprBMPBoundary = 7; - this->ParseReg_BGnCNT<ENGINEID, GPULayerID_BG3>(); - this->ParseReg_BGnCNT<ENGINEID, GPULayerID_BG2>(); - this->ParseReg_BGnCNT<ENGINEID, GPULayerID_BG1>(); - this->ParseReg_BGnCNT<ENGINEID, GPULayerID_BG0>(); + this->ParseReg_BGnCNT(GPULayerID_BG3); + this->ParseReg_BGnCNT(GPULayerID_BG2); + this->ParseReg_BGnCNT(GPULayerID_BG1); + this->ParseReg_BGnCNT(GPULayerID_BG0); } -template <GPUEngineID ENGINEID, GPULayerID LAYERID> -void GPUEngineBase::ParseReg_BGnCNT() +void GPUEngineBase::ParseReg_BGnCNT(const GPULayerID layerID) { const IOREG_DISPCNT &DISPCNT = this->_IORegisterMap->DISPCNT; - const IOREG_BGnCNT &BGnCNT = this->_IORegisterMap->BGnCNT[LAYERID]; - this->_BGLayer[LAYERID].BGnCNT = BGnCNT; + const IOREG_BGnCNT &BGnCNT = this->_IORegisterMap->BGnCNT[layerID]; + this->_BGLayer[layerID].BGnCNT = BGnCNT; - switch (LAYERID) + switch (layerID) { - case GPULayerID_BG0: this->_BGLayer[LAYERID].isVisible = (DISPCNT.BG0_Enable != 0); break; - case GPULayerID_BG1: this->_BGLayer[LAYERID].isVisible = (DISPCNT.BG1_Enable != 0); break; - case GPULayerID_BG2: this->_BGLayer[LAYERID].isVisible = (DISPCNT.BG2_Enable != 0); break; - case GPULayerID_BG3: this->_BGLayer[LAYERID].isVisible = (DISPCNT.BG3_Enable != 0); break; + case GPULayerID_BG0: this->_BGLayer[layerID].isVisible = (DISPCNT.BG0_Enable != 0); break; + case GPULayerID_BG1: this->_BGLayer[layerID].isVisible = (DISPCNT.BG1_Enable != 0); break; + case GPULayerID_BG2: this->_BGLayer[layerID].isVisible = (DISPCNT.BG2_Enable != 0); break; + case GPULayerID_BG3: this->_BGLayer[layerID].isVisible = (DISPCNT.BG3_Enable != 0); break; default: break; } - if (ENGINEID == GPUEngineID_Main) + if (this->_engineID == GPUEngineID_Main) { - this->_BGLayer[LAYERID].largeBMPAddress = MMU_ABG; - this->_BGLayer[LAYERID].BMPAddress = MMU_ABG + (BGnCNT.ScreenBase_Block * ADDRESS_STEP_16KB); - this->_BGLayer[LAYERID].tileMapAddress = MMU_ABG + (DISPCNT.ScreenBase_Block * ADDRESS_STEP_64KB) + (BGnCNT.ScreenBase_Block * ADDRESS_STEP_2KB); - this->_BGLayer[LAYERID].tileEntryAddress = MMU_ABG + (DISPCNT.CharacBase_Block * ADDRESS_STEP_64KB) + (BGnCNT.CharacBase_Block * ADDRESS_STEP_16KB); + this->_BGLayer[layerID].largeBMPAddress = MMU_ABG; + this->_BGLayer[layerID].BMPAddress = MMU_ABG + (BGnCNT.ScreenBase_Block * ADDRESS_STEP_16KB); + this->_BGLayer[layerID].tileMapAddress = MMU_ABG + (DISPCNT.ScreenBase_Block * ADDRESS_STEP_64KB) + (BGnCNT.ScreenBase_Block * ADDRESS_STEP_2KB); + this->_BGLayer[layerID].tileEntryAddress = MMU_ABG + (DISPCNT.CharacBase_Block * ADDRESS_STEP_64KB) + (BGnCNT.CharacBase_Block * ADDRESS_STEP_16KB); } else { - this->_BGLayer[LAYERID].largeBMPAddress = MMU_BBG; - this->_BGLayer[LAYERID].BMPAddress = MMU_BBG + (BGnCNT.ScreenBase_Block * ADDRESS_STEP_16KB); - this->_BGLayer[LAYERID].tileMapAddress = MMU_BBG + (BGnCNT.ScreenBase_Block * ADDRESS_STEP_2KB); - this->_BGLayer[LAYERID].tileEntryAddress = MMU_BBG + (BGnCNT.CharacBase_Block * ADDRESS_STEP_16KB); + this->_BGLayer[layerID].largeBMPAddress = MMU_BBG; + this->_BGLayer[layerID].BMPAddress = MMU_BBG + (BGnCNT.ScreenBase_Block * ADDRESS_STEP_16KB); + this->_BGLayer[layerID].tileMapAddress = MMU_BBG + (BGnCNT.ScreenBase_Block * ADDRESS_STEP_2KB); + this->_BGLayer[layerID].tileEntryAddress = MMU_BBG + (BGnCNT.CharacBase_Block * ADDRESS_STEP_16KB); } //clarify affine ext modes - BGType mode = GPUEngineBase::_mode2type[DISPCNT.BG_Mode][LAYERID]; - this->_BGLayer[LAYERID].baseType = mode; + BGType mode = GPUEngineBase::_mode2type[DISPCNT.BG_Mode][layerID]; + this->_BGLayer[layerID].baseType = mode; if (mode == BGType_AffineExt) { @@ -1162,20 +1233,20 @@ // Extended palette slots can be changed for BG0 and BG1, but BG2 and BG3 remain constant. // Display wrapping can be changed for BG2 and BG3, but BG0 and BG1 cannot wrap. - if (LAYERID == GPULayerID_BG0 || LAYERID == GPULayerID_BG1) + if (layerID == GPULayerID_BG0 || layerID == GPULayerID_BG1) { - this->_BGLayer[LAYERID].extPaletteSlot = (BGnCNT.PaletteSet_Wrap * 2) + LAYERID; + this->_BGLayer[layerID].extPaletteSlot = (BGnCNT.PaletteSet_Wrap * 2) + layerID; } else { - this->_BGLayer[LAYERID].isDisplayWrapped = (BGnCNT.PaletteSet_Wrap != 0); + this->_BGLayer[layerID].isDisplayWrapped = (BGnCNT.PaletteSet_Wrap != 0); } - this->_BGLayer[LAYERID].type = mode; - this->_BGLayer[LAYERID].size = GPUEngineBase::_BGLayerSizeLUT[mode][BGnCNT.ScreenSize]; - this->_BGLayer[LAYERID].isMosaic = (BGnCNT.Mosaic != 0); - this->_BGLayer[LAYERID].priority = BGnCNT.Priority; - this->_BGLayer[LAYERID].extPalette = (u16 **)&MMU.ExtPal[this->_engineID][this->_BGLayer[LAYERID].extPaletteSlot]; + this->_BGLayer[layerID].type = mode; + this->_BGLayer[layerID].size = GPUEngineBase::_BGLayerSizeLUT[mode][BGnCNT.ScreenSize]; + this->_BGLayer[layerID].isMosaic = (BGnCNT.Mosaic != 0); + this->_BGLayer[layerID].priority = BGnCNT.Priority; + this->_BGLayer[layerID].extPalette = (u16 **)&MMU.ExtPal[this->_engineID][this->_BGLayer[layerID].extPaletteSlot]; this->_ResortBGLayers(); } @@ -1521,15 +1592,15 @@ // ROUTINES FOR INSIDE / OUTSIDE WINDOW CHECKS /*****************************************************************************/ -template <GPULayerID LAYERID> -FORCEINLINE void GPUEngineBase::_RenderPixel_CheckWindows(const size_t srcX, bool &didPassWindowTest, bool &enableColorEffect) const +FORCEINLINE void GPUEngineBase::_RenderPixel_CheckWindows(const size_t srcX, const GPULayerID srcLayerID, bool &didPassWindowTest, bool &enableColorEffect) const { + didPassWindowTest = true; + enableColorEffect = true; + // If no windows are enabled, then we don't need to perform any window tests. // In this case, the pixel always passes and the color effect is always processed. if (!this->_isAnyWindowEnabled) { - didPassWindowTest = true; - enableColorEffect = true; return; } @@ -1539,19 +1610,8 @@ if (this->_curr_win[0][srcX] == 1) { //INFO("bg%i passed win0 : (%i %i) was within (%i %i)(%i %i)\n", bgnum, x, gpu->_currentScanline, gpu->WIN0H0, gpu->WIN0V0, gpu->WIN0H1, gpu->WIN0V1); - switch (LAYERID) - { - case GPULayerID_BG0: didPassWindowTest = (this->_IORegisterMap->WIN0IN.BG0_Enable != 0); break; - case GPULayerID_BG1: didPassWindowTest = (this->_IORegisterMap->WIN0IN.BG1_Enable != 0); break; - case GPULayerID_BG2: didPassWindowTest = (this->_IORegisterMap->WIN0IN.BG2_Enable != 0); break; - case GPULayerID_BG3: didPassWindowTest = (this->_IORegisterMap->WIN0IN.BG3_Enable != 0); break; - case GPULayerID_OBJ: didPassWindowTest = (this->_IORegisterMap->WIN0IN.OBJ_Enable != 0); break; - - default: - break; - } - - enableColorEffect = (this->_IORegisterMap->WIN0IN.Effect_Enable != 0); + didPassWindowTest = this->_WIN0_enable[srcLayerID]; + enableColorEffect = this->_WIN0_enable[WINDOWCONTROL_EFFECTFLAG]; return; } } @@ -1562,19 +1622,8 @@ if (this->_curr_win[1][srcX] == 1) { //INFO("bg%i passed win1 : (%i %i) was within (%i %i)(%i %i)\n", bgnum, x, gpu->_currentScanline, gpu->WIN1H0, gpu->WIN1V0, gpu->WIN1H1, gpu->WIN1V1); - switch (LAYERID) - { - case GPULayerID_BG0: didPassWindowTest = (this->_IORegisterMap->WIN1IN.BG0_Enable != 0); break; - case GPULayerID_BG1: didPassWindowTest = (this->_IORegisterMap->WIN1IN.BG1_Enable != 0); break; - case GPULayerID_BG2: didPassWindowTest = (this->_IORegisterMap->WIN1IN.BG2_Enable != 0); break; - case GPULayerID_BG3: didPassWindowTest = (this->_IORegisterMap->WIN1IN.BG3_Enable != 0); break; - case GPULayerID_OBJ: didPassWindowTest = (this->_IORegisterMap->WIN1IN.OBJ_Enable != 0); break; - - default: - break; - } - - enableColorEffect = (this->_IORegisterMap->WIN1IN.Effect_Enable != 0); + didPassWindowTest = this->_WIN1_enable[srcLayerID]; + enableColorEffect = this->_WIN1_enable[WINDOWCONTROL_EFFECTFLAG]; return; } } @@ -1584,61 +1633,33 @@ { if (this->_sprWin[srcX] == 1) { - switch (LAYERID) - { - case GPULayerID_BG0: didPassWindowTest = (this->_IORegisterMap->WINOBJ.BG0_Enable != 0); break; - case GPULayerID_BG1: didPassWindowTest = (this->_IORegisterMap->WINOBJ.BG1_Enable != 0); break; - case GPULayerID_BG2: didPassWindowTest = (this->_IORegisterMap->WINOBJ.BG2_Enable != 0); break; - case GPULayerID_BG3: didPassWindowTest = (this->_IORegisterMap->WINOBJ.BG3_Enable != 0); break; - case GPULayerID_OBJ: didPassWindowTest = (this->_IORegisterMap->WINOBJ.OBJ_Enable != 0); break; - - default: - break; - } - - enableColorEffect = (this->_IORegisterMap->WINOBJ.Effect_Enable != 0); + didPassWindowTest = this->_WINOBJ_enable[srcLayerID]; + enableColorEffect = this->_WINOBJ_enable[WINDOWCONTROL_EFFECTFLAG]; return; } } // If the pixel isn't inside any windows, then the pixel is outside, and therefore uses the WINOUT flags. // This has the lowest priority, and is always checked last. - switch (LAYERID) - { - case GPULayerID_BG0: didPassWindowTest = (this->_IORegisterMap->WINOUT.BG0_Enable != 0); break; - case GPULayerID_BG1: didPassWindowTest = (this->_IORegisterMap->WINOUT.BG1_Enable != 0); break; - case GPULayerID_BG2: didPassWindowTest = (this->_IORegisterMap->WINOUT.BG2_Enable != 0); break; - case GPULayerID_BG3: didPassWindowTest = (this->_IORegisterMap->WINOUT.BG3_Enable != 0); break; - case GPULayerID_OBJ: didPassWindowTest = (this->_IORegisterMap->WINOUT.OBJ_Enable != 0); break; - - default: - break; - } - - enableColorEffect = (this->_IORegisterMap->WINOUT.Effect_Enable != 0); + didPassWindowTest = this->_WINOUT_enable[srcLayerID]; + enableColorEffect = this->_WINOUT_enable[WINDOWCONTROL_EFFECTFLAG]; } #ifdef ENABLE_SSE2 -template <GPULayerID LAYERID, bool ISCUSTOMRENDERINGNEEDED> -FORCEINLINE void GPUEngineBase::_RenderPixel_CheckWindows16_SSE2(const size_t dstX, __m128i &didPassWindowTest, __m128i &enableColorEffect) const +template <bool ISCUSTOMRENDERINGNEEDED> +FORCEINLINE void GPUEngineBase::_RenderPixel_CheckWindows16_SSE2(const size_t dstX, const GPULayerID srcLayerID, __m128i &didPassWindowTest, __m128i &enableColorEffect) const { + didPassWindowTest = _mm_set1_epi8(0xFF); + enableColorEffect = _mm_set1_epi8(0xFF); + // If no windows are enabled, then we don't need to perform any window tests. // In this case, the pixel always passes and the color effect is always processed. if (!this->_isAnyWindowEnabled) { - didPassWindowTest = _mm_set1_epi8(0xFF); - enableColorEffect = _mm_set1_epi8(0xFF); return; } - // Since all comparisons are made against values of 1, we will use state values - // of 0 and 1 while doing window processing, and then convert to mask values - // 0x00 and 0xFF, respectively, once the processing is finished. - didPassWindowTest = _mm_set1_epi8(1); - enableColorEffect = _mm_set1_epi8(1); - - u8 didPassValue; __m128i win_vec128; __m128i win0HandledMask = _mm_setzero_si128(); @@ -1654,19 +1675,6 @@ { if (this->_isWindowInsideVerticalRange[0]) { - switch (LAYERID) - { - case GPULayerID_BG0: didPassValue = this->_IORegisterMap->WIN0IN.BG0_Enable; break; - case GPULayerID_BG1: didPassValue = this->_IORegisterMap->WIN0IN.BG1_Enable; break; - case GPULayerID_BG2: didPassValue = this->_IORegisterMap->WIN0IN.BG2_Enable; break; - case GPULayerID_BG3: didPassValue = this->_IORegisterMap->WIN0IN.BG3_Enable; break; - case GPULayerID_OBJ: didPassValue = this->_IORegisterMap->WIN0IN.OBJ_Enable; break; - - default: - didPassValue = 1; - break; - } - if (ISCUSTOMRENDERINGNEEDED) { if (this->_windowLeftCustom[0] > this->_windowRightCustom[0]) @@ -1689,8 +1697,8 @@ win0HandledMask = _mm_cmpeq_epi8(win_vec128, _mm_set1_epi8(1)); } - didPassWindowTest = _mm_and_si128(win0HandledMask, _mm_set1_epi8(didPassValue)); - enableColorEffect = _mm_and_si128(win0HandledMask, _mm_set1_epi8(this->_IORegisterMap->WIN0IN.Effect_Enable)); + didPassWindowTest = _mm_and_si128(win0HandledMask, this->_WIN0_enable_SSE2[srcLayerID]); + enableColorEffect = _mm_and_si128(win0HandledMask, this->_WIN0_enable_SSE2[WINDOWCONTROL_EFFECTFLAG]); } else { @@ -1702,19 +1710,6 @@ // Window 1 has medium priority, and is checked after Window 0. if (this->_WIN1_ENABLED && this->_isWindowInsideVerticalRange[1]) { - switch (LAYERID) - { - case GPULayerID_BG0: didPassValue = this->_IORegisterMap->WIN1IN.BG0_Enable; break; - case GPULayerID_BG1: didPassValue = this->_IORegisterMap->WIN1IN.BG1_Enable; break; - case GPULayerID_BG2: didPassValue = this->_IORegisterMap->WIN1IN.BG2_Enable; break; - case GPULayerID_BG3: didPassValue = this->_IORegisterMap->WIN1IN.BG3_Enable; break; - case GPULayerID_OBJ: didPassValue = this->_IORegisterMap->WIN1IN.OBJ_Enable; break; - - default: - didPassValue = 1; - break; - } - if (ISCUSTOMRENDERINGNEEDED) { if (this->_windowLeftCustom[1] > this->_windowRightCustom[1]) @@ -1737,26 +1732,13 @@ win1HandledMask = _mm_andnot_si128(win0HandledMask, _mm_cmpeq_epi8(win_vec128, _mm_set1_epi8(1))); } - didPassWindowTest = _mm_or_si128( didPassWindowTest, _mm_and_si128(win1HandledMask, _mm_set1_epi8(didPassValue)) ); - enableColorEffect = _mm_or_si128( enableColorEffect, _mm_and_si128(win1HandledMask, _mm_set1_epi8(this->_IORegisterMap->WIN1IN.Effect_Enable)) ); + didPassWindowTest = _mm_or_si128( didPassWindowTest, _mm_and_si128(win1HandledMask, this->_WIN1_enable_SSE2[srcLayerID]) ); + enableColorEffect = _mm_or_si128( enableColorEffect, _mm_and_si128(win1HandledMask, this->_WIN1_enable_SSE2[WINDOWCONTROL_EFFECTFLAG]) ); } // Window OBJ has low priority, and is checked after both Window 0 and Window 1. if (this->_WINOBJ_ENABLED) { - switch (LAYERID) - { - case GPULayerID_BG0: didPassValue = this->_IORegisterMap->WINOBJ.BG0_Enable; break; - case GPULayerID_BG1: didPassValue = this->_IORegisterMap->WINOBJ.BG1_Enable; break; - case GPULayerID_BG2: didPassValue = this->_IORegisterMap->WINOBJ.BG2_Enable; break; - case GPULayerID_BG3: didPassValue = this->_IORegisterMap->WINOBJ.BG3_Enable; break; - case GPULayerID_OBJ: didPassValue = this->_IORegisterMap->WINOBJ.OBJ_Enable; break; - - default: - didPassValue = 1; - break; - } - if (ISCUSTOMRENDERINGNEEDED) { win_vec128 = _mm_set_epi8(this->_sprWin[_gpuDstToSrcIndex[dstX+15]], @@ -1782,32 +1764,15 @@ } winOBJHandledMask = _mm_andnot_si128( _mm_or_si128(win0HandledMask, win1HandledMask), _mm_cmpeq_epi8(win_vec128, _mm_set1_epi8(1)) ); - didPassWindowTest = _mm_or_si128( didPassWindowTest, _mm_and_si128(winOBJHandledMask, _mm_set1_epi8(didPassValue)) ); - enableColorEffect = _mm_or_si128( enableColorEffect, _mm_and_si128(winOBJHandledMask, _mm_set1_epi8(this->_IORegisterMap->WINOBJ.Effect_Enable)) ); + didPassWindowTest = _mm_or_si128( didPassWindowTest, _mm_and_si128(winOBJHandledMask, this->_WINOBJ_enable_SSE2[srcLayerID]) ); + enableColorEffect = _mm_or_si128( enableColorEffect, _mm_and_si128(winOBJHandledMask, this->_WINOBJ_enable_SSE2[WINDOWCONTROL_EFFECTFLAG]) ); } // If the pixel isn't inside any windows, then the pixel is outside, and therefore uses the WINOUT flags. // This has the lowest priority, and is always checked last. - switch (LAYERID) - { - case GPULayerID_BG0: didPassValue = this->_IORegisterMap->WINOUT.BG0_Enable; break; - case GPULayerID_BG1: didPassValue = this->_IORegisterMap->WINOUT.BG1_Enable; break; - case GPULayerID_BG2: didPassValue = this->_IORegisterMap->WINOUT.BG2_Enable; break; - case GPULayerID_BG3: didPassValue = this->_IORegisterMap->WINOUT.BG3_Enable; break; - case GPULayerID_OBJ: didPassValue = this->_IORegisterMap->WINOUT.OBJ_Enable; break; - - default: - break; - } - winOUTHandledMask = _mm_xor_si128( _mm_or_si128(win0HandledMask, _mm_or_si128(win1HandledMask, winOBJHandledMask)), _mm_set1_epi32(0xFFFFFFFF) ); - didPassWindowTest = _mm_or_si128( didPassWindowTest, _mm_and_si128(winOUTHandledMask, _mm_set1_epi8(didPassValue)) ); - enableColorEffect = _mm_or_si128( enableColorEffect, _mm_and_si128(winOUTHandledMask, _mm_set1_epi8(this->_IORegisterMap->WINOUT.Effect_Enable)) ); - - // Now that we've finished processing, convert the values of 0 and 1 back into - // the mask values of 0x00 and 0xFF, respectively. - didPassWindowTest = _mm_cmpeq_epi8(didPassWindowTest, _mm_set1_epi8(1)); - enableColorEffect = _mm_cmpeq_epi8(enableColorEffect, _mm_set1_epi8(1)); + didPassWindowTest = _mm_or_si128( didPassWindowTest, _mm_and_si128(winOUTHandledMask, this->_WINOUT_enable_SSE2[srcLayerID]) ); + enableColorEffect = _mm_or_si128( enableColorEffect, _mm_and_si128(winOUTHandledMask, this->_WINOUT_enable_SSE2[WINDOWCONTROL_EFFECTFLAG]) ); } #endif @@ -1815,8 +1780,8 @@ /*****************************************************************************/ // PIXEL RENDERING /*****************************************************************************/ -template <NDSColorFormat OUTPUTFORMAT, GPULayerID LAYERID, bool ISDEBUGRENDER, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> -FORCEINLINE void GPUEngineBase::_RenderPixel(const size_t srcX, const u16 src, const u8 srcAlpha, void *__restrict dstColorLine, u8 *__restrict dstLayerIDLine) +template <NDSColorFormat OUTPUTFORMAT, bool ISSRCLAYEROBJ, bool ISDEBUGRENDER, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> +FORCEINLINE void GPUEngineBase::_RenderPixel(const size_t srcX, const u16 srcColor, const u8 srcAlpha, const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerIDLine) { if (ISDEBUGRENDER) { @@ -1825,19 +1790,19 @@ switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: - *(u16 *)dstColorLine = src | 0x8000; + *(u16 *)dstColorLine = srcColor | 0x8000; break; case NDSColorFormat_BGR666_Rev: - (*(FragmentColor *)dstColorLine).color = ConvertColor555To6665Opaque<false>(src); + (*(FragmentColor *)dstColorLine).color = ConvertColor555To6665Opaque<false>(srcColor); break; case NDSColorFormat_BGR888_Rev: - (*(FragmentColor *)dstColorLine).color = ConvertColor555To8888Opaque<false>(src); + (*(FragmentColor *)dstColorLine).color = ConvertColor555To8888Opaque<false>(srcColor); break; } - *dstLayerIDLine = LAYERID; + *dstLayerIDLine = srcLayerID; return; } @@ -1845,8 +1810,8 @@ if (!NOWINDOWSENABLEDHINT) { - bool didPassWindowTest = true; - this->_RenderPixel_CheckWindows<LAYERID>(srcX, didPassWindowTest, enableColorEffect); + bool didPassWindowTest; + this->_RenderPixel_CheckWindows(srcX, srcLayerID, didPassWindowTest, enableColorEffect); if (!didPassWindowTest) { @@ -1854,24 +1819,24 @@ } } - if ((LAYERID != GPULayerID_OBJ) && COLOREFFECTDISABLEDHINT) + if (!ISSRCLAYEROBJ && COLOREFFECTDISABLEDHINT) { switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: - *(u16 *)dstColorLine = src | 0x8000; + *(u16 *)dstColorLine = srcColor | 0x8000; break; case NDSColorFormat_BGR666_Rev: - (*(FragmentColor *)dstColorLine).color = ConvertColor555To6665Opaque<false>(src); + (*(FragmentColor *)dstColorLine).color = ConvertColor555To6665Opaque<false>(srcColor); break; case NDSColorFormat_BGR888_Rev: - (*(FragmentColor *)dstColorLine).color = ConvertColor555To8888Opaque<false>(src); + (*(FragmentColor *)dstColorLine).color = ConvertColor555To8888Opaque<false>(srcColor); break; } - *dstLayerIDLine = LAYERID; + *dstLayerIDLine = srcLayerID; return; } @@ -1884,39 +1849,12 @@ { const IOREG_BLDCNT &BLDCNT = this->_IORegisterMap->BLDCNT; const GPULayerID dstLayerID = (GPULayerID)*dstLayerIDLine; - bool srcEffectEnable = false; - const bool dstEffectEnable = (dstLayerID != LAYERID) && this->_blend2[dstLayerID]; + const bool dstEffectEnable = (dstLayerID != srcLayerID) && this->_dstBlendEnable[dstLayerID]; - switch (LAYERID) - { - case GPULayerID_BG0: - srcEffectEnable = (BLDCNT.BG0_Target1 != 0); - break; - - case GPULayerID_BG1: - srcEffectEnable = (BLDCNT.BG1_Target1 != 0); - break; - - case GPULayerID_BG2: - srcEffectEnable = (BLDCNT.BG2_Target1 != 0); - break; - - case GPULayerID_BG3: - srcEffectEnable = (BLDCNT.BG3_Target1 != 0); - break; - - case GPULayerID_OBJ: - srcEffectEnable = (BLDCNT.OBJ_Target1 != 0); - break; - - default: - break; - } - // Select the color effect based on the BLDCNT target flags. bool forceBlendEffect = false; - if (LAYERID == GPULayerID_OBJ) + if (ISSRCLAYEROBJ) { //translucent-capable OBJ are forcing the function to blend when the second target is satisfied const OBJMode objMode = (OBJMode)this->_sprType[srcX]; @@ -1940,7 +1878,7 @@ { selectedEffect = ColorEffect_Blend; } - else if (srcEffectEnable) + else if (this->_srcBlendEnable[srcLayerID]) { switch ((ColorEffect)BLDCNT.ColorEffect) { @@ -1972,7 +1910,7 @@ { case ColorEffect_Disable: { - finalDstColor16 = src; + finalDstColor16 = srcColor; switch (OUTPUTFORMAT) { @@ -1993,7 +1931,7 @@ case ColorEffect_IncreaseBrightness: { - finalDstColor16 = this->_ColorEffectIncreaseBrightness(src & 0x7FFF); + finalDstColor16 = this->_ColorEffectIncreaseBrightness(srcColor & 0x7FFF); switch (OUTPUTFORMAT) { @@ -2014,7 +1952,7 @@ case ColorEffect_DecreaseBrightness: { - finalDstColor16 = this->_ColorEffectDecreaseBrightness(src & 0x7FFF); + finalDstColor16 = this->_ColorEffectDecreaseBrightness(srcColor & 0x7FFF); switch (OUTPUTFORMAT) { @@ -2038,18 +1976,18 @@ switch (OUTPUTFORMAT) { case NDSColorFormat_BGR555_Rev: - finalDstColor16 = this->_ColorEffectBlend(src, *(u16 *)dstColorLine, selectedBlendTable); + finalDstColor16 = this->_ColorEffectBlend(srcColor, *(u16 *)dstColorLine, selectedBlendTable); finalDstColor16 |= 0x8000; break; case NDSColorFormat_BGR666_Rev: - finalDstColor32.color = ConvertColor555To6665Opaque<false>(src); + finalDstColor32.color = ConvertColor555To6665Opaque<false>(srcColor); finalDstColor32 = this->_ColorEffectBlend<OUTPUTFORMAT>(finalDstColor32, *(FragmentColor *)dstColorLine, blendEVA, blendEVB); finalDstColor32.a = 0x1F; break; case NDSColorFormat_BGR888_Rev: - finalDstColor32.color = ConvertColor555To8888Opaque<false>(src); + finalDstColor32.color = ConvertColor555To8888Opaque<false>(srcColor); finalDstColor32 = this->_ColorEffectBlend<OUTPUTFORMAT>(finalDstColor32, *(FragmentColor *)dstColorLine, blendEVA, blendEVB); finalDstColor32.a = 0xFF; break; @@ -2070,14 +2008,15 @@ break; } - *dstLayerIDLine = LAYERID; + *dstLayerIDLine = srcLayerID; } #ifdef ENABLE_SSE2 -template <NDSColorFormat OUTPUTFORMAT, GPULayerID LAYERID, bool ISDEBUGRENDER, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +template <NDSColorFormat OUTPUTFORMAT, bool ISSRCLAYEROBJ, bool ISDEBUGRENDER, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> FORCEINLINE void GPUEngineBase::_RenderPixel16_SSE2(const size_t dstX, const ColorEffect colorEffect, + const GPULayerID srcLayerID, const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0, const __m128i &srcAlpha, const __m128i &srcEffectEnableMask, @@ -2085,6 +2024,7 @@ __m128i &dstLayerID, __m128i &passMask8) { + const __m128i srcLayerID_vec128 = _mm_set1_epi8(srcLayerID); __m128i passMask16[2] = { _mm_unpacklo_epi8(passMask8, passMask8), _mm_unpackhi_epi8(passMask8, passMask8) }; @@ -2112,7 +2052,7 @@ dst3 = _mm_blendv_epi8(dst3, _mm_or_si128(src3, alphaBits), passMask32[3]); } - dstLayerID = _mm_blendv_epi8(dstLayerID, _mm_set1_epi8(LAYERID), passMask8); + dstLayerID = _mm_blendv_epi8(dstLayerID, srcLayerID_vec128, passMask8); return; } @@ -2122,14 +2062,14 @@ { // Do the window test. __m128i didPassWindowTest; - this->_RenderPixel_CheckWindows16_SSE2<LAYERID, ISCUSTOMRENDERINGNEEDED>(dstX, didPassWindowTest, enableColorEffectMask); + this->_RenderPixel_CheckWindows16_SSE2<ISCUSTOMRENDERINGNEEDED>(dstX, srcLayerID, didPassWindowTest, enableColorEffectMask); passMask8 = _mm_and_si128(passMask8, didPassWindowTest); passMask16[0] = _mm_unpacklo_epi8(passMask8, passMask8); passMask16[1] = _mm_unpackhi_epi8(passMask8, passMask8); } - if ( ((LAYERID != GPULayerID_OBJ) && COLOREFFECTDISABLEDHINT) || (_mm_movemask_epi8(srcEffectEnableMask) == 0) ) + if ( (!ISSRCLAYEROBJ && COLOREFFECTDISABLEDHINT) || (_mm_movemask_epi8(srcEffectEnableMask) == 0) ) { if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) { @@ -2146,25 +2086,25 @@ dst3 = _mm_blendv_epi8(dst3, _mm_or_si128(src3, alphaBits), passMask32[3]); } - dstLayerID = _mm_blendv_epi8(dstLayerID, _mm_set1_epi8(LAYERID), passMask8); + dstLayerID = _mm_blendv_epi8(dstLayerID, srcLayerID_vec128, passMask8); return; } __m128i dstEffectEnableMask; #ifdef ENABLE_SSSE3 - dstEffectEnableMask = _mm_shuffle_epi8(this->_blend2_SSSE3, dstLayerID); + dstEffectEnableMask = _mm_shuffle_epi8(this->_dstBlendEnable_SSSE3, dstLayerID); dstEffectEnableMask = _mm_xor_si128( _mm_cmpeq_epi8(dstEffectEnableMask, _mm_setzero_si128()), _mm_set1_epi32(0xFFFFFFFF) ); #else - dstEffectEnableMask = _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG0)), this->_blend2_SSE2[GPULayerID_BG0]); - dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG1)), this->_blend2_SSE2[GPULayerID_BG1]) ); - dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG2)), this->_blend2_SSE2[GPULayerID_BG2]) ); - dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG3)), this->_blend2_SSE2[GPULayerID_BG3]) ); - dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_OBJ)), this->_blend2_SSE2[GPULayerID_OBJ]) ); - dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_Backdrop)), this->_blend2_SSE2[GPULayerID_Backdrop]) ); + dstEffectEnableMask = _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG0)), this->_dstBlendEnable_SSE2[GPULayerID_BG0]); + dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG1)), this->_dstBlendEnable_SSE2[GPULayerID_BG1]) ); + dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG2)), this->_dstBlendEnable_SSE2[GPULayerID_BG2]) ); + dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG3)), this->_dstBlendEnable_SSE2[GPULayerID_BG3]) ); + dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_OBJ)), this->_dstBlendEnable_SSE2[GPULayerID_OBJ]) ); + dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_Backdrop)), this->_dstBlendEnable_SSE2[GPULayerID_Backdrop]) ); #endif - dstEffectEnableMask = _mm_andnot_si128( _mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(LAYERID)), dstEffectEnableMask ); + dstEffectEnableMask = _mm_andnot_si128( _mm_cmpeq_epi8(dstLayerID, srcLayerID_vec128), dstEffectEnableMask ); // Select the color effect based on the BLDCNT target flags. __m128i forceBlendEffectMask = _mm_setzero_si128(); @@ -2174,7 +2114,7 @@ __m128i evb_vec128 = _mm_set1_epi16(this->_BLDALPHA_EVB); const __m128i evy_vec128 = _mm_set1_epi16(this->_BLDALPHA_EVY); - if (LAYERID == GPULayerID_OBJ) + if (ISSRCLAYEROBJ) { const __m128i objMode_vec128 = _mm_loadu_si128((__m128i *)(this->_sprType + dstX)); const __m128i isObjTranslucentMask = _mm_and_si128( dstEffectEnableMask, _mm_or_si128(_mm_cmpeq_epi8(objMode_vec128, _mm_set1_epi8(OBJMode_Transparent)), _mm_cmpeq_epi8(objMode_vec128, _mm_set1_epi8(OBJMode_Bitmap))) ); @@ -2291,7 +2231,7 @@ dst3 = _mm_blendv_epi8(dst3, tmpSrc[3], passMask32[3]); } - dstLayerID = _mm_blendv_epi8(dstLayerID, _mm_set1_epi8(LAYERID), passMask8); + dstLayerID = _mm_blendv_epi8(dstLayerID, srcLayerID_vec128, passMask8); } #endif @@ -2307,8 +2247,7 @@ if (enableColorEffect) { const IOREG_BLDCNT &BLDCNT = this->_IORegisterMap->BLDCNT; - const bool srcEffectEnable = (BLDCNT.BG0_Target1 != 0); - const bool dstEffectEnable = (dstLayerID != GPULayerID_BG0) && this->_blend2[dstLayerID]; + const bool dstEffectEnable = (dstLayerID != GPULayerID_BG0) && this->_dstBlendEnable[dstLayerID]; // Select the color effect based on the BLDCNT target flags. bool forceBlendEffect = false; @@ -2322,7 +2261,7 @@ { selectedEffect = ColorEffect_Blend; } - else if (srcEffectEnable) + else if (this->_srcBlendEnable[GPULayerID_BG0]) { switch ((ColorEffect)BLDCNT.ColorEffect) { @@ -2379,8 +2318,7 @@ if (enableColorEffect) { const IOREG_BLDCNT &BLDCNT = this->_IORegisterMap->BLDCNT; - const bool srcEffectEnable = (BLDCNT.BG0_Target1 != 0); - const bool dstEffectEnable = (dstLayerID != GPULayerID_BG0) && this->_blend2[dstLayerID]; + const bool dstEffectEnable = (dstLayerID != GPULayerID_BG0) && this->_dstBlendEnable[dstLayerID]; // Select the color effect based on the BLDCNT target flags. bool forceBlendEffect = false; @@ -2394,7 +2332,7 @@ { selectedEffect = ColorEffect_Blend; } - else if (srcEffectEnable) + else if (this->_srcBlendEnable[GPULayerID_BG0]) { switch ((ColorEffect)BLDCNT.ColorEffect) { @@ -2495,19 +2433,19 @@ } const IOREG_BLDCNT &BLDCNT = this->_IORegisterMap->BLDCNT; - const __m128i srcEffectEnableMask = _mm_cmpeq_epi8(_mm_set1_epi8(BLDCNT.BG0_Target1), _mm_set1_epi8(1)); + const __m128i srcEffectEnableMask = this->_srcBlendEnable_SSE2[GPULayerID_BG0]; __m128i dstEffectEnableMask; #ifdef ENABLE_SSSE3 - dstEffectEnableMask = _mm_shuffle_epi8(this->_blend2_SSSE3, dstLayerID); + dstEffectEnableMask = _mm_shuffle_epi8(this->_dstBlendEnable_SSSE3, dstLayerID); dstEffectEnableMask = _mm_xor_si128( _mm_cmpeq_epi8(dstEffectEnableMask, _mm_setzero_si128()), _mm_set1_epi32(0xFFFFFFFF) ); #else - dstEffectEnableMask = _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG0)), this->_blend2_SSE2[GPULayerID_BG0]); - dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG1)), this->_blend2_SSE2[GPULayerID_BG1]) ); - dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG2)), this->_blend2_SSE2[GPULayerID_BG2]) ); - dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG3)), this->_blend2_SSE2[GPULayerID_BG3]) ); - dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_OBJ)), this->_blend2_SSE2[GPULayerID_OBJ]) ); - dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_Backdrop)), this->_blend2_SSE2[GPULayerID_Backdrop]) ); + dstEffectEnableMask = _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG0)), this->_dstBlendEnable_SSE2[GPULayerID_BG0]); + dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG1)), this->_dstBlendEnable_SSE2[GPULayerID_BG1]) ); + dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG2)), this->_dstBlendEnable_SSE2[GPULayerID_BG2]) ); + dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG3)), this->_dstBlendEnable_SSE2[GPULayerID_BG3]) ); + dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_OBJ)), this->_dstBlendEnable_SSE2[GPULayerID_OBJ]) ); + dstEffectEnableMask = _mm_or_si128(dstEffectEnableMask, _mm_and_si128(_mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_Backdrop)), this->_dstBlendEnable_SSE2[GPULayerID_Backdrop]) ); #endif dstEffectEnableMask = _mm_andnot_si128( _mm_cmpeq_epi8(dstLayerID, _mm_set1_epi8(GPULayerID_BG0)), dstEffectEnableMask ); @@ -2668,14 +2606,14 @@ } } -template<GPULayerID LAYERID, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> -void GPUEngineBase::_RenderPixelIterate_Final(u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) +template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> +void GPUEngineBase::_RenderPixelIterate_Final(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) { - const u16 lineWidth = (ISDEBUGRENDER) ? this->_BGLayer[LAYERID].size.width : GPU_FRAMEBUFFER_NATIVE_WIDTH; + const u16 lineWidth = (ISDEBUGRENDER) ? this->_BGLayer[srcLayerID].size.width : GPU_FRAMEBUFFER_NATIVE_WIDTH; const s16 dx = (s16)LOCAL_TO_LE_16(param.BGnPA.value); const s16 dy = (s16)LOCAL_TO_LE_16(param.BGnPC.value); - const s32 wh = this->_BGLayer[LAYERID].size.width; - const s32 ht = this->_BGLayer[LAYERID].size.height; + const s32 wh = this->_BGLayer[srcLayerID].size.width; + const s32 ht = this->_BGLayer[srcLayerID].size.height; const s32 wmask = wh - 1; const s32 hmask = ht - 1; @@ -2715,7 +2653,7 @@ } else { - this->_RenderPixelSingle<LAYERID, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, i, (index != 0)); + this->_RenderPixelSingle<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, i, (index != 0)); } auxX++; @@ -2746,28 +2684,28 @@ } else { - this->_RenderPixelSingle<LAYERID, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, i, (index != 0)); + this->_RenderPixelSingle<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcLayerID, dstColorLine, this->_renderLineLayerIDNative, lineIndex, color, i, (index != 0)); } } } } -template<GPULayerID LAYERID, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> -void GPUEngineBase::_RenderPixelIterate_ApplyWrap(u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) +template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc, bool WRAP> +void GPUEngineBase::_RenderPixelIterate_ApplyWrap(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) { - this->_RenderPixelIterate_Final<LAYERID, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, WRAP>(dstColorLine, lineIndex, param, map, tile, pal); + this->_RenderPixelIterate_Final<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, WRAP>(srcLayerID, dstColorLine, lineIndex, param, map, tile, pal); } -template<GPULayerID LAYERID, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc> -void GPUEngineBase::_RenderPixelIterate(u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) +template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED, PixelLookupFunc GetPixelFunc> +void GPUEngineBase::_RenderPixelIterate(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 lineIndex, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) { - if (this->_BGLayer[LAYERID].isDisplayWrapped) + if (this->_BGLayer[srcLayerID].isDisplayWrapped) { - this->_RenderPixelIterate_ApplyWrap<LAYERID, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, true>(dstColorLine, lineIndex, param, map, tile, pal); + this->_RenderPixelIterate_ApplyWrap<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, true>(srcLayerID, dstColorLine, lineIndex, param, map, tile, pal); } else { - this->_RenderPixelIterate_ApplyWrap<LAYERID, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, false>(dstColorLine, lineIndex, param, map, tile, pal); + this->_RenderPixelIterate_ApplyWrap<ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, GetPixelFunc, false>(srcLayerID, dstColorLine, lineIndex, param, map, tile, pal); } } @@ -2783,8 +2721,8 @@ return theTileEntry; } -template <GPULayerID LAYERID, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> -FORCEINLINE void GPUEngineBase::_RenderPixelSingle(void *__restrict dstColorLine, u8 *__restrict dstLayerID, const size_t lineIndex, u16 color, const size_t srcX, const bool opaque) +template <bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> +FORCEINLINE void GPUEngineBase::_RenderPixelSingle(const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerID, const size_t lineIndex, u16 color, const size_t srcX, const bool opaque) { bool willRenderColor = opaque; @@ -2798,10 +2736,10 @@ if (!this->_mosaicWidthBG[srcX].begin || !this->_mosaicHeightBG[lineIndex].begin) { - color = this->_mosaicColors.bg[LAYERID][this->_mosaicWidthBG[srcX].trunc]; + color = this->_mosaicColors.bg[srcLayerID][this->_mosaicWidthBG[srcX].trunc]; } - this->_mosaicColors.bg[LAYERID][srcX] = color; + this->_mosaicColors.bg[srcLayerID][srcX] = color; willRenderColor = (color != 0xFFFF); } @@ -2809,16 +2747,17 @@ if (willRenderColor) { // TODO: This should flag a warning. Fix this when we get proper color format support. - this->_RenderPixel<NDSColorFormat_BGR555_Rev, LAYERID, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcX, - color, - 0, - (NDSColorFormat_BGR555_Rev == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + srcX) : (void *)((FragmentColor *)dstColorLine + srcX), - dstLayerID + srcX); + this->_RenderPixel<NDSColorFormat_BGR555_Rev, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(srcX, + color, + 0, + srcLayerID, + (NDSColorFormat_BGR555_Rev == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + srcX) : (void *)((FragmentColor *)dstColorLine + srcX), + dstLayerID + srcX); } } -template<GPULayerID LAYERID, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> -void GPUEngineBase::_RenderPixelsCustom(void *__restrict dstColorLine, u8 *__restrict dstLayerID, const size_t lineIndex) +template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> +void GPUEngineBase::_RenderPixelsCustom(const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerID, const size_t lineIndex) { const size_t lineWidth = GPU->GetDisplayInfo().customWidth; @@ -2843,16 +2782,16 @@ const __m128i mosaicHeightMask = _mm_cmpeq_epi16(_mm_set1_epi16(this->_mosaicHeightBG[lineIndex].begin), _mm_setzero_si128()); const __m128i mosaicMask = _mm_or_si128(mosaicWidthMask, mosaicHeightMask); - this->_mosaicColors.bg[LAYERID][x+0] = (_mm_extract_epi16(mosaicMask, 0) != 0) ? this->_mosaicColors.bg[LAYERID][this->_mosaicWidthBG[x+0].trunc] : _mm_extract_epi16(tmpColor_vec128, 0); - this->_mosaicColors.bg[LAYERID][x+1] = (_mm_extract_epi16(mosaicMask, 1) != 0) ? this->_mosaicColors.bg[LAYERID][this->_mosaicWidthBG[x+1].trunc] : _mm_extract_epi16(tmpColor_vec128, 1); - this->_mosaicColors.bg[LAYERID][x+2] = (_mm_extract_epi16(mosaicMask, 2) != 0) ? this->_mosaicColors.bg[LAYERID][this->_mosaicWidthBG[x+2].trunc] : _mm_extract_epi16(tmpColor_vec128, 2); - this->_mosaicColors.bg[LAYERID][x+3] = (_mm_extract_epi16(mosaicMask, 3) != 0) ? this->_mosaicColors.bg[LAYERID][this->_mosaicWidthBG[x+3].trunc] : _mm_extract_epi16(tmpColor_vec128, 3); - this->_mosaicColors.bg[LAYERID][x+4] = (_mm_extract_epi16(mosaicMask, 4) != 0) ? this->_mosaicColors.bg[LAYERID][this->_mosaicWidthBG[x+4].trunc] : _mm_extract_epi16(tmpColor_vec128, 4); - this->_mosaicColors.bg[LAYERID][x+5] = (_mm_extract_epi16(mosaicMask, 5) != 0) ? this->_mosaicColors.bg[LAYERID][this->_mosaicWidthBG[x+5].trunc] : _mm_extract_epi16(tmpColor_vec128, 5); - this->_mosaicColors.bg[LAYERID][x+6] = (_mm_extract_epi16(mosaicMask, 6) != 0) ? this->_mosaicColors.bg[LAYERID][this->_mosaicWidthBG[x+6].trunc] : _mm_extract_epi16(tmpColor_vec128, 6); - this->_mosaicColors.bg[LAYERID][x+7] = (_mm_extract_epi16(mosaicMask, 7) != 0) ? this->_mosaicColors.bg[LAYERID][this->_mosaicWidthBG[x+7].trunc] : _mm_extract_epi16(tmpColor_vec128, 7); + this->_mosaicColors.bg[srcLayerID][x+0] = (_mm_extract_epi16(mosaicMask, 0) != 0) ? this->_mosaicColors.bg[srcLayerID][this->_mosaicWidthBG[x+0].trunc] : _mm_extract_epi16(tmpColor_vec128, 0); + this->_mosaicColors.bg[srcLayerID][x+1] = (_mm_extract_epi16(mosaicMask, 1) != 0) ? this->_mosaicColors.bg[srcLayerID][this->_mosaicWidthBG[x+1].trunc] : _mm_extract_epi16(tmpColor_vec128, 1); + this->_mosaicColors.bg[srcLayerID][x+2] = (_mm_extract_epi16(mosaicMask, 2) != 0) ? this->_mosaicColors.bg[srcLayerID][this->_mosaicWidthBG[x+2].trunc] : _mm_extract_epi16(tmpColor_vec128, 2); + this->_mosaicColors.bg[srcLayerID][x+3] = (_mm_extract_epi16(mosaicMask, 3) != 0) ? this->_mosaicColors.bg[srcLayerID][this->_mosaicWidthBG[x+3].trunc] : _mm_extract_epi16(tmpColor_vec128, 3); + this->_mosaicColors.bg[srcLayerID][x+4] = (_mm_extract_epi16(mosaicMask, 4) != 0) ? this->_mosaicColors.bg[srcLayerID][this->_mosaicWidthBG[x+4].trunc] : _mm_extract_epi16(tmpColor_vec128, 4); + this->_mosaicColors.bg[srcLayerID][x+5] = (_mm_extract_epi16(mosaicMask, 5) != 0) ? this->_mosaicColors.bg[srcLayerID][this->_mosaicWidthBG[x+5].trunc] : _mm_extract_epi16(tmpColor_vec128, 5); + this->_mosaicColors.bg[srcLayerID][x+6] = (_mm_extract_epi16(mosaicMask, 6) != 0) ? this->_mosaicColors.bg[srcLayerID][this->_mosaicWidthBG[x+6].trunc] : _mm_extract_epi16(tmpColor_vec128, 6); + this->_mosaicColors.bg[srcLayerID][x+7] = (_mm_extract_epi16(mosaicMask, 7) != 0) ? this->_mosaicColors.bg[srcLayerID][this->_mosaicWidthBG[x+7].trunc] : _mm_extract_epi16(tmpColor_vec128, 7); - const __m128i mosaicColor_vec128 = _mm_loadu_si128((__m128i *)(this->_mosaicColors.bg[LAYERID] + x)); + const __m128i mosaicColor_vec128 = _mm_loadu_si128((__m128i *)(this->_mosaicColors.bg[srcLayerID] + x)); const __m128i mosaicColorMask = _mm_cmpeq_epi16(mosaicColor_vec128, _mm_set1_epi16(0xFFFF)); _mm_storel_epi64( (__m128i *)(this->_bgLayerIndex + x), _mm_andnot_si128(_mm_packs_epi16(mosaicColorMask, _mm_setzero_si128()), index_vec128) ); _mm_store_si128( (__m128i *)(this->_bgLayerColor + x), _mm_blendv_epi8(mosaicColor_vec128, col_vec128, mosaicColorMask) ); @@ -2925,38 +2864,8 @@ #ifdef ENABLE_SSE2 const size_t ssePixCount = (lineWidth - (lineWidth % 16)); - const IOREG_BLDCNT &BLDCNT = this->_IORegisterMap->BLDCNT; - u8 srcEffectEnableValue; - - switch (LAYERID) - { - case GPULayerID_BG0: - srcEffectEnableValue = BLDCNT.BG0_Target1; - break; - - case GPULayerID_BG1: - srcEffectEnableValue = BLDCNT.BG1_Target1; - break; - - case GPULayerID_BG2: - srcEffectEnableValue = BLDCNT.BG2_Target1; - break; - - case GPULayerID_BG3: - srcEffectEnableValue = BLDCNT.BG3_Target1; - break; - - case GPULayerID_OBJ: - srcEffectEnableValue = BLDCNT.OBJ_Target1; - break; - - default: - srcEffectEnableValue = 0; - break; - } - - const __m128i srcEffectEnableMask = _mm_cmpeq_epi8(_mm_set1_epi8(srcEffectEnableValue), _mm_set1_epi8(1)); + const __m128i srcEffectEnableMask = this->_srcBlendEnable_SSE2[srcLayerID]; #endif for (size_t l = 0; l < lineCount; l++) @@ -3012,14 +2921,15 @@ dst[3] = _mm_load_si128((__m128i *)dstColorLine + 3); } - this->_RenderPixel16_SSE2<NDSColorFormat_BGR555_Rev, LAYERID, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, true>(i, - (ColorEffect)BLDCNT.ColorEffect, - src[3], src[2], src[1], src[0], - srcAlpha, - srcEffectEnableMask, - dst[3], dst[2], dst[1], dst[0], - dstLayerID_vec128, - passMask8); + this->_RenderPixel16_SSE2<NDSColorFormat_BGR555_Rev, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, true>(i, + (ColorEffect)BLDCNT.ColorEffect, + srcLayerID, + src[3], src[2], src[1], src[0], + srcAlpha, + srcEffectEnableMask, + dst[3], dst[2], dst[1], dst[0], + dstLayerID_vec128, + passMask8); _mm_store_si128((__m128i *)dstColorLine + 0, dst[0]); _mm_store_si128((__m128i *)dstColorLine + 1, dst[1]); @@ -3043,59 +2953,31 @@ continue; } - this->_RenderPixel<NDSColorFormat_BGR555_Rev, LAYERID, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(_gpuDstToSrcIndex[i], - this->_bgLayerColorCustom[i], - 0, - dstColorLine, - dstLayerID); + this->_RenderPixel<NDSColorFormat_BGR555_Rev, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(_gpuDstToSrcIndex[i], + this->_bgLayerColorCustom[i], + 0, + srcLayerID, + dstColorLine, + dstLayerID); } } } -template<GPULayerID LAYERID, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> -void GPUEngineBase::_RenderPixelsCustomVRAM(void *__restrict dstColorLine, u8 *__restrict dstLayerID, const size_t lineIndex) +template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> +void GPUEngineBase::_RenderPixelsCustomVRAM(const GPULayerID srcLayerID, void *__restrict dstColorLine, u8 *__restrict dstLayerID, const size_t lineIndex) { const NDSColorFormat outputFormat = GPU->GetDisplayInfo().colorFormat; const size_t lineWidth = GPU->GetDisplayInfo().customWidth; const size_t lineCount = _gpuDstLineCount[lineIndex]; const size_t dstPixCount = lineWidth * lineCount; - const u16 *__restrict srcLine = GPU->GetCustomVRAMAddressUsingMappedAddress(this->_BGLayer[LAYERID].BMPAddress) + (_gpuDstLineIndex[lineIndex] * lineWidth); + const u16 *__restrict srcLine = GPU->GetCustomVRAMAddressUsingMappedAddress(this->_BGLayer[srcLayerID].BMPAddress) + (_gpuDstLineIndex[lineIndex] * lineWidth); size_t i = 0; #ifdef ENABLE_SSE2 const IOREG_BLDCNT &BLDCNT = this->_IORegisterMap->BLDCNT; - u8 srcEffectEnableValue; + const __m128i srcEffectEnableMask = this->_srcBlendEnable_SSE2[srcLayerID]; - switch (LAYERID) - { - case GPULayerID_BG0: - srcEffectEnableValue = BLDCNT.BG0_Target1; - break; - - case GPULayerID_BG1: - srcEffectEnableValue = BLDCNT.BG1_Target1; - break; - - case GPULayerID_BG2: - srcEffectEnableValue = BLDCNT.BG2_Target1; - break; - - case GPULayerID_BG3: - srcEffectEnableValue = BLDCNT.BG3_Target1; - break; - - case GPULayerID_OBJ: - srcEffectEnableValue = BLDCNT.OBJ_Target1; - break; - - default: - srcEffectEnableValue = 0; - break; - } - - const __m128i srcEffectEnableMask = _mm_cmpeq_epi8(_mm_set1_epi8(srcEffectEnableValue), _mm_set1_epi8(1)); - const size_t ssePixCount = (dstPixCount - (dstPixCount % 16)); for (; i < ssePixCount; i+=16, dstLayerID+=16, dstColorLine = (outputFormat == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLine + 16) : (void *)((FragmentColor *)dstColorLine + 16)) { @@ -3145,14 +3027,15 @@ dst[3] = _mm_load_si128((__m128i *)dstColorLine + 3); } - this->_RenderPixel16_SSE2<NDSColorFormat_BGR555_Rev, LAYERID, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, true>(i, - (ColorEffect)BLDCNT.ColorEffect, - src[3], src[2], src[1], src[0], - srcAlpha, - srcEffectEnableMask, - dst[3], dst[2], dst[1], dst[0], - dstLayerID_vec128, - passMask8); + this->_RenderPixel16_SSE2<NDSColorFormat_BGR555_Rev, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, true>(i, + (ColorEffect)BLDCNT.ColorEffect, + srcLayerID, + src[3], src[2], src[1], src[0], + srcAlpha, + srcEffectEnableMask, + dst[3], dst[2], dst[1], dst[0], + dstLayerID_vec128, + passMask8); _mm_store_si128((__m128i *)dstColorLine + 0, dst[0]); _mm_store_si128((__m128i *)dstColorLine + 1, dst[1]); @@ -3176,11 +3059,12 @@ continue; } - this->_RenderPixel<NDSColorFormat_BGR555_Rev, LAYERID, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(_gpuDstToSrcIndex[i], - srcLine[i], - 0, - dstColorLine, - dstLayerID); + this->_RenderPixel<NDSColorFormat_BGR555_Rev, false, ISDEBUGRENDER, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT>(_gpuDstToSrcIndex[i], + srcLine[i], + 0, + srcLayerID, + dstColorLine, + dstLayerID); } } @@ -3188,14 +3072,14 @@ // BACKGROUND RENDERING -TEXT- /*****************************************************************************/ // render a text background to the combined pixelbuffer -template<GPULayerID LAYERID, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> -void GPUEngineBase::_RenderLine_BGText(u16 *__restrict dstColorLine, const u16 lineIndex, const u16 XBG, const u16 YBG) +template<bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> +void GPUEngineBase::_RenderLine_BGText(const GPULayerID srcLayerID, u16 *__restrict dstColorLine, const u16 ... [truncated message content] |
From: <rog...@us...> - 2016-07-14 05:49:50
|
Revision: 5505 http://sourceforge.net/p/desmume/code/5505 Author: rogerman Date: 2016-07-14 05:49:47 +0000 (Thu, 14 Jul 2016) Log Message: ----------- GPU: - Do some refactoring and code cleanup. Modified Paths: -------------- trunk/desmume/src/GPU.cpp trunk/desmume/src/GPU.h Modified: trunk/desmume/src/GPU.cpp =================================================================== --- trunk/desmume/src/GPU.cpp 2016-07-13 23:14:38 UTC (rev 5504) +++ trunk/desmume/src/GPU.cpp 2016-07-14 05:49:47 UTC (rev 5505) @@ -475,9 +475,6 @@ this->_needUpdateWINH[0] = true; this->_needUpdateWINH[1] = true; - this->isCustomRenderingNeeded = false; - this->vramBGLayer = VRAM_NO_3D_USAGE; - this->vramBlockBGIndex = VRAM_NO_3D_USAGE; this->vramBlockOBJIndex = VRAM_NO_3D_USAGE; this->nativeLineRenderCount = GPU_FRAMEBUFFER_NATIVE_HEIGHT; @@ -4179,11 +4176,222 @@ } } +template <NDSColorFormat OUTPUTFORMAT> void* GPUEngineBase::_RenderLine_Layers(const u16 l) { - return ((u8 *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU->GetDisplayInfo().pixelBytes)); + const NDSDisplayInfo &dispInfo = GPU->GetDisplayInfo(); + itemsForPriority_t *item; + + // Optimization: For normal display mode, render straight to the output buffer when that is what we are going to end + // up displaying anyway. Otherwise, we need to use the working buffer. + void *currentRenderLineTarget = (this->_displayOutputMode == GPUDisplayMode_Normal) ? (u8 *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH * dispInfo.pixelBytes) : (u8 *)this->_internalRenderLineTargetNative; + + const u16 backdropColor = LE_TO_LOCAL_16(this->_paletteBG[0]) & 0x7FFF; + this->_RenderLine_Clear(backdropColor, l, currentRenderLineTarget); + + // for all the pixels in the line + if (this->_enableLayer[GPULayerID_OBJ]) + { + this->_RenderLine_SetupSprites(backdropColor, l); + } + + // paint lower priorities first + // then higher priorities on top + for (size_t prio = NB_PRIORITIES; prio > 0; ) + { + prio--; + item = &(this->_itemsForPriority[prio]); + // render BGs + if (this->_isAnyBGLayerEnabled) + { + for (size_t i = 0; i < item->nbBGs; i++) + { + const GPULayerID layerID = (GPULayerID)item->BGs[i]; + if (this->_enableLayer[layerID]) + { + if (this->_engineID == GPUEngineID_Main) + { + if ( (layerID == GPULayerID_BG0) && GPU->GetEngineMain()->WillRender3DLayer() ) + { + currentRenderLineTarget = GPU->GetEngineMain()->RenderLine_Layer3D<OUTPUTFORMAT>(currentRenderLineTarget, l); + continue; + } + } + + if (this->isLineRenderNative[l]) + { + switch (layerID) + { + case GPULayerID_BG0: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG0, false, false>(currentRenderLineTarget, l); break; + case GPULayerID_BG1: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG1, false, false>(currentRenderLineTarget, l); break; + case GPULayerID_BG2: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG2, false, false>(currentRenderLineTarget, l); break; + case GPULayerID_BG3: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG3, false, false>(currentRenderLineTarget, l); break; + + default: + break; + } + } + else + { + switch (layerID) + { + case GPULayerID_BG0: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG0, false, true>(currentRenderLineTarget, l); break; + case GPULayerID_BG1: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG1, false, true>(currentRenderLineTarget, l); break; + case GPULayerID_BG2: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG2, false, true>(currentRenderLineTarget, l); break; + case GPULayerID_BG3: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG3, false, true>(currentRenderLineTarget, l); break; + + default: + break; + } + } + } //layer enabled + } + } + + // render sprite Pixels + if ( this->_enableLayer[GPULayerID_OBJ] && (item->nbPixelsX > 0) ) + { + currentRenderLineTarget = this->_RenderLine_LayerOBJ<OUTPUTFORMAT>(item, currentRenderLineTarget, l); + } + } + + return currentRenderLineTarget; } +void GPUEngineBase::_RenderLine_SetupSprites(const u16 backdropColor, const u16 lineIndex) +{ + itemsForPriority_t *item; + + //n.b. - this is clearing the sprite line buffer to the background color, + memset_u16_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>(this->_sprColor, backdropColor); + memset(this->_sprAlpha, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH); + memset(this->_sprType, OBJMode_Normal, GPU_FRAMEBUFFER_NATIVE_WIDTH); + memset(this->_sprPrio, 0x7F, GPU_FRAMEBUFFER_NATIVE_WIDTH); + + //zero 06-may-09: I properly supported window color effects for backdrop, but I am not sure + //how it interacts with this. I wish we knew why we needed this + + this->_SpriteRender<false>(lineIndex, this->_sprColor, this->_sprAlpha, this->_sprType, this->_sprPrio); + this->_MosaicSpriteLine(lineIndex, this->_sprColor, this->_sprAlpha, this->_sprType, this->_sprPrio); + + for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH; i++) + { + // assign them to the good priority item + const size_t prio = this->_sprPrio[i]; + if (prio >= 4) continue; + + item = &(this->_itemsForPriority[prio]); + item->PixelsX[item->nbPixelsX] = i; + item->nbPixelsX++; + } +} + +template <NDSColorFormat OUTPUTFORMAT> +void* GPUEngineBase::_RenderLine_LayerOBJ(itemsForPriority_t *__restrict item, void *__restrict dstColorLine, const u16 lineIndex) +{ + const NDSDisplayInfo &dispInfo = GPU->GetDisplayInfo(); + const size_t customLineWidth = dispInfo.customWidth; + const size_t customLineCount = _gpuDstLineCount[lineIndex]; + const size_t customLineIndex = _gpuDstLineIndex[lineIndex]; + + if (this->vramBlockOBJIndex != VRAM_NO_3D_USAGE) + { + if (GPU->GetEngineMain()->VerifyVRAMLineDidChange(this->vramBlockOBJIndex, lineIndex)) + { + void *newRenderLineTarget = (this->_displayOutputMode == GPUDisplayMode_Normal) ? (u8 *)this->nativeBuffer + (lineIndex * GPU_FRAMEBUFFER_NATIVE_WIDTH * dispInfo.pixelBytes) : (u8 *)this->_internalRenderLineTargetNative; + + switch (OUTPUTFORMAT) + { + case NDSColorFormat_BGR555_Rev: + this->_LineColorCopy<true, false, false, false, 2>(newRenderLineTarget, dstColorLine, lineIndex); + break; + + case NDSColorFormat_BGR666_Rev: + case NDSColorFormat_BGR888_Rev: + this->_LineColorCopy<true, false, false, false, 4>(newRenderLineTarget, dstColorLine, lineIndex); + break; + } + + this->_LineLayerIDCopy<true, false>(this->_renderLineLayerIDNative, this->_renderLineLayerIDCustom, lineIndex); + dstColorLine = newRenderLineTarget; + } + } + + const bool useCustomVRAM = (this->vramBlockOBJIndex != VRAM_NO_3D_USAGE) && !GPU->GetEngineMain()->isLineCaptureNative[this->vramBlockOBJIndex][lineIndex]; + const u16 *__restrict srcLine = (useCustomVRAM) ? GPU->GetEngineMain()->GetCustomVRAMBlockPtr(this->vramBlockOBJIndex) + (customLineIndex * customLineWidth) : NULL; + if (this->isLineRenderNative[lineIndex] && useCustomVRAM) + { + void *newRenderLineTarget = (this->_displayOutputMode == GPUDisplayMode_Normal) ? (u8 *)this->customBuffer + (customLineIndex * customLineWidth * dispInfo.pixelBytes) : (u8 *)this->_internalRenderLineTargetCustom; + + switch (OUTPUTFORMAT) + { + case NDSColorFormat_BGR555_Rev: + this->_LineColorCopy<false, true, false, false, 2>(newRenderLineTarget, dstColorLine, lineIndex); + break; + + case NDSColorFormat_BGR666_Rev: + case NDSColorFormat_BGR888_Rev: + this->_LineColorCopy<false, true, false, false, 4>(newRenderLineTarget, dstColorLine, lineIndex); + break; + } + + this->_LineLayerIDCopy<false, true>(this->_renderLineLayerIDCustom, this->_renderLineLayerIDNative, lineIndex); + dstColorLine = newRenderLineTarget; + + this->isLineRenderNative[lineIndex] = false; + this->nativeLineRenderCount--; + } + + u16 *__restrict dstColorLine16 = (u16 *)dstColorLine; + FragmentColor *__restrict dstColorLine32 = (FragmentColor *)dstColorLine; + + if (this->isLineRenderNative[lineIndex]) + { + u8 *__restrict dstLayerIDPtr = this->_renderLineLayerIDNative; + + for (size_t i = 0; i < item->nbPixelsX; i++) + { + const size_t srcX = item->PixelsX[i]; + + this->_RenderPixel<OUTPUTFORMAT, GPULayerID_OBJ, false, false, false>(srcX, + this->_sprColor[srcX], + this->_sprAlpha[srcX], + (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)(dstColorLine16 + srcX) : (void *)(dstColorLine32 + srcX), + dstLayerIDPtr + srcX); + } + } + else + { + u8 *__restrict dstLayerIDPtr = this->_renderLineLayerIDCustom; + + for (size_t line = 0; line < customLineCount; line++) + { + for (size_t i = 0; i < item->nbPixelsX; i++) + { + const size_t srcX = item->PixelsX[i]; + + for (size_t p = 0; p < _gpuDstPitchCount[srcX]; p++) + { + const size_t dstX = _gpuDstPitchIndex[srcX] + p; + + this->_RenderPixel<OUTPUTFORMAT, GPULayerID_OBJ, false, false, false>(srcX, + (useCustomVRAM) ? srcLine[dstX] : this->_sprColor[srcX], + this->_sprAlpha[srcX], + (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)(dstColorLine16 + dstX) : (void *)(dstColorLine32 + dstX), + dstLayerIDPtr + dstX); + } + } + + srcLine += customLineWidth; + dstColorLine16 += customLineWidth; + dstColorLine32 += customLineWidth; + dstLayerIDPtr += customLineWidth; + } + } + + return dstColorLine; +} + template <bool ISFULLINTENSITYHINT> void GPUEngineBase::ApplyMasterBrightness() { @@ -4507,50 +4715,6 @@ } } -void GPUEngineBase::UpdateVRAM3DUsageProperties_BGLayer(const size_t bankIndex) -{ - const bool isBG2UsingVRAM = this->_enableLayer[GPULayerID_BG2] && (this->_BGLayer[GPULayerID_BG2].type == BGType_AffineExt_Direct) && (this->_BGLayer[GPULayerID_BG2].size.width == 256) && (this->_BGLayer[GPULayerID_BG2].size.height == 256); - const bool isBG3UsingVRAM = this->_enableLayer[GPULayerID_BG3] && (this->_BGLayer[GPULayerID_BG3].type == BGType_AffineExt_Direct) && (this->_BGLayer[GPULayerID_BG3].size.width == 256) && (this->_BGLayer[GPULayerID_BG3].size.height == 256); - u8 selectedBGLayer = VRAM_NO_3D_USAGE; - - if (!isBG2UsingVRAM && !isBG3UsingVRAM) - { - return; - } - else if (!isBG2UsingVRAM && isBG3UsingVRAM) - { - selectedBGLayer = GPULayerID_BG3; - } - else if (isBG2UsingVRAM && !isBG3UsingVRAM) - { - selectedBGLayer = GPULayerID_BG2; - } - else if (isBG2UsingVRAM && isBG3UsingVRAM) - { - selectedBGLayer = (this->_BGLayer[GPULayerID_BG3].priority <= this->_BGLayer[GPULayerID_BG2].priority) ? GPULayerID_BG3 : GPULayerID_BG2; - } - - if (selectedBGLayer != VRAM_NO_3D_USAGE) - { - const IOREG_BGnParameter *bgParams = (selectedBGLayer == GPULayerID_BG2) ? (IOREG_BGnParameter *)&this->_IORegisterMap->BG2Param : (IOREG_BGnParameter *)&this->_IORegisterMap->BG3Param; - const IOREG_BGnX &savedBGnX = (selectedBGLayer == GPULayerID_BG2) ? this->savedBG2X : this->savedBG3X; - const IOREG_BGnY &savedBGnY = (selectedBGLayer == GPULayerID_BG2) ? this->savedBG2Y : this->savedBG3Y; - - if ( (bgParams->BGnPA.value != 0x100) || - (bgParams->BGnPB.value != 0) || - (bgParams->BGnPC.value != 0) || - (bgParams->BGnPD.value != 0x100) || - (savedBGnX.value != 0) || - (savedBGnY.value != 0) ) - { - selectedBGLayer = VRAM_NO_3D_USAGE; - } - } - - this->vramBGLayer = selectedBGLayer; - this->vramBlockBGIndex = (selectedBGLayer != VRAM_NO_3D_USAGE) ? bankIndex : VRAM_NO_3D_USAGE; -} - void GPUEngineBase::UpdateVRAM3DUsageProperties_OBJLayer(const size_t bankIndex) { const IOREG_DISPCNT &DISPCNT = this->_IORegisterMap->DISPCNT; @@ -5221,7 +5385,7 @@ this->_SetupWindows<1>(l); // Render the line - void *renderLineTarget = this->_RenderLine_Layers(l); + void *renderLineTarget = this->_RenderLine_Layers<NDSColorFormat_BGR555_Rev>(l); // Fill the display output switch (this->_displayOutputMode) @@ -5261,457 +5425,226 @@ } } -void* GPUEngineA::_RenderLine_Layers(const u16 l) +template <NDSColorFormat OUTPUTFORMAT> +void* GPUEngineA::RenderLine_Layer3D(void *dstColorLine, const u16 lineIndex) { + const FragmentColor *__restrict framebuffer3D = CurrentRenderer->GetFramebuffer(); + if (framebuffer3D == NULL) + { + return dstColorLine; + } + const NDSDisplayInfo &dispInfo = GPU->GetDisplayInfo(); const size_t customLineWidth = dispInfo.customWidth; - const size_t customLineCount = _gpuDstLineCount[l]; - const size_t customLineIndex = _gpuDstLineIndex[l]; + const size_t customLineCount = _gpuDstLineCount[lineIndex]; + const size_t customLineIndex = _gpuDstLineIndex[lineIndex]; - // Optimization: For normal display mode, render straight to the output buffer when that is what we are going to end - // up displaying anyway. Otherwise, we need to use the working buffer. - const bool isDisplayModeNormal = (this->_displayOutputMode == GPUDisplayMode_Normal); - - void *renderLineTargetNative = (isDisplayModeNormal) ? (u8 *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH * dispInfo.pixelBytes) : (u8 *)this->_internalRenderLineTargetNative; - void *renderLineTargetCustom = (isDisplayModeNormal) ? (u8 *)this->customBuffer + (customLineIndex * customLineWidth * dispInfo.pixelBytes) : (u8 *)this->_internalRenderLineTargetCustom; - void *currentRenderLineTarget = renderLineTargetNative; - - const u16 backdropColor = LE_TO_LOCAL_16(this->_paletteBG[0]) & 0x7FFF; - this->_RenderLine_Clear(backdropColor, l, renderLineTargetNative); - - itemsForPriority_t *__restrict item; - - // for all the pixels in the line - if (this->_enableLayer[GPULayerID_OBJ]) + if (this->isLineRenderNative[lineIndex] && !CurrentRenderer->IsFramebufferNativeSize()) { - //n.b. - this is clearing the sprite line buffer to the background color, - memset_u16_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>(this->_sprColor, backdropColor); - memset(this->_sprAlpha, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH); - memset(this->_sprType, OBJMode_Normal, GPU_FRAMEBUFFER_NATIVE_WIDTH); - memset(this->_sprPrio, 0x7F, GPU_FRAMEBUFFER_NATIVE_WIDTH); + void *newRenderLineTarget = (this->_displayOutputMode == GPUDisplayMode_Normal) ? (u8 *)this->customBuffer + (customLineIndex * customLineWidth * dispInfo.pixelBytes) : (u8 *)this->_internalRenderLineTargetCustom; - //zero 06-may-09: I properly supported window color effects for backdrop, but I am not sure - //how it interacts with this. I wish we knew why we needed this + switch (OUTPUTFORMAT) + { + case NDSColorFormat_BGR555_Rev: + this->_LineColorCopy<false, true, false, false, 2>(newRenderLineTarget, dstColorLine, lineIndex); + break; + + case NDSColorFormat_BGR666_Rev: + case NDSColorFormat_BGR888_Rev: + this->_LineColorCopy<false, true, false, false, 4>(newRenderLineTarget, dstColorLine, lineIndex); + break; + } - this->_SpriteRender<false>(l, this->_sprColor, this->_sprAlpha, this->_sprType, this->_sprPrio); - this->_MosaicSpriteLine(l, this->_sprColor, this->_sprAlpha, this->_sprType, this->_sprPrio); + this->_LineLayerIDCopy<false, true>(this->_renderLineLayerIDCustom, this->_renderLineLayerIDNative, lineIndex); + dstColorLine = newRenderLineTarget; - for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH; i++) - { - // assign them to the good priority item - const size_t prio = this->_sprPrio[i]; - if (prio >= 4) continue; - - item = &(this->_itemsForPriority[prio]); - item->PixelsX[item->nbPixelsX] = i; - item->nbPixelsX++; - } + this->isLineRenderNative[lineIndex] = false; + this->nativeLineRenderCount--; } - // paint lower priorities first - // then higher priorities on top - for (size_t prio = NB_PRIORITIES; prio > 0; ) + const float customWidthScale = (float)customLineWidth / (float)GPU_FRAMEBUFFER_NATIVE_WIDTH; + const FragmentColor *__restrict srcLinePtr = framebuffer3D + (customLineIndex * customLineWidth); + void *__restrict dstColorLinePtr = dstColorLine; + u8 *__restrict dstLayerIDPtr = (this->isLineRenderNative[lineIndex]) ? this->_renderLineLayerIDNative : this->_renderLineLayerIDCustom; + + // Horizontally offset the 3D layer by this amount. + // Test case: Blowing up large objects in Nanostray 2 will cause the main screen to shake horizontally. + const u16 hofs = (u16)( ((float)this->_BGLayer[GPULayerID_BG0].xOffset * customWidthScale) + 0.5f ); + + if (hofs == 0) { - prio--; - item = &(this->_itemsForPriority[prio]); - // render BGs - if (this->_isAnyBGLayerEnabled) + for (size_t line = 0; line < customLineCount; line++) { - for (size_t i = 0; i < item->nbBGs; i++) + size_t dstX = 0; +#ifdef ENABLE_SSE2 + const size_t ssePixCount = customLineWidth - (customLineWidth % 16); + + for (; dstX < ssePixCount; dstX+=16, srcLinePtr+=16, dstLayerIDPtr+=16, dstColorLinePtr = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLinePtr + 16) : (void *)((FragmentColor *)dstColorLinePtr + 16)) { - const GPULayerID layerID = (GPULayerID)item->BGs[i]; - if (this->_enableLayer[layerID]) + const __m128i src[4] = { _mm_load_si128((__m128i *)srcLinePtr + 0), + _mm_load_si128((__m128i *)srcLinePtr + 1), + _mm_load_si128((__m128i *)srcLinePtr + 2), + _mm_load_si128((__m128i *)srcLinePtr + 3) }; + + // Determine which pixels pass by doing the alpha test and the window test. + const __m128i srcAlpha = _mm_packs_epi16( _mm_packs_epi32(_mm_srli_epi32(src[0], 24), _mm_srli_epi32(src[1], 24)), + _mm_packs_epi32(_mm_srli_epi32(src[2], 24), _mm_srli_epi32(src[3], 24)) ); + + // Do the window test. + __m128i passMask8; + __m128i enableColorEffectMask; + this->_RenderPixel_CheckWindows16_SSE2<GPULayerID_BG0, true>(dstX, passMask8, enableColorEffectMask); + + // Do the alpha test. Pixels with an alpha value of 0 are rejected. + passMask8 = _mm_andnot_si128(_mm_cmpeq_epi8(srcAlpha, _mm_setzero_si128()), passMask8); + + // If none of the pixels within the vector pass, then reject them all at once. + if (_mm_movemask_epi8(passMask8) == 0) { - if ( (layerID == GPULayerID_BG0) && this->WillRender3DLayer() ) - { - const FragmentColor *__restrict framebuffer3D = CurrentRenderer->GetFramebuffer(); - if (framebuffer3D == NULL) - { - continue; - } - - if (this->isLineRenderNative[l] && !CurrentRenderer->IsFramebufferNativeSize()) - { - void *newRenderLineTarget = renderLineTargetCustom; - - switch (dispInfo.colorFormat) - { - case NDSColorFormat_BGR555_Rev: - this->_LineColorCopy<false, true, false, false, 2>(newRenderLineTarget, currentRenderLineTarget, l); - break; - - case NDSColorFormat_BGR666_Rev: - case NDSColorFormat_BGR888_Rev: - this->_LineColorCopy<false, true, false, false, 4>(newRenderLineTarget, currentRenderLineTarget, l); - break; - } - - this->_LineLayerIDCopy<false, true>(this->_renderLineLayerIDCustom, this->_renderLineLayerIDNative, l); - currentRenderLineTarget = newRenderLineTarget; - - this->isLineRenderNative[l] = false; - this->nativeLineRenderCount--; - } - - const float customWidthScale = (float)customLineWidth / (float)GPU_FRAMEBUFFER_NATIVE_WIDTH; - const FragmentColor *__restrict srcLinePtr = framebuffer3D + (customLineIndex * customLineWidth); - void *__restrict dstColorLinePtr = currentRenderLineTarget; - u8 *__restrict dstLayerIDPtr = (this->isLineRenderNative[l]) ? this->_renderLineLayerIDNative : this->_renderLineLayerIDCustom; - - // Horizontally offset the 3D layer by this amount. - // Test case: Blowing up large objects in Nanostray 2 will cause the main screen to shake horizontally. - const u16 hofs = (u16)( ((float)this->_BGLayer[GPULayerID_BG0].xOffset * customWidthScale) + 0.5f ); - - if (hofs == 0) - { - for (size_t line = 0; line < customLineCount; line++) - { - size_t dstX = 0; -#ifdef ENABLE_SSE2 - const size_t ssePixCount = customLineWidth - (customLineWidth % 16); - - for (; dstX < ssePixCount; dstX+=16, srcLinePtr+=16, dstLayerIDPtr+=16, dstColorLinePtr = (dispInfo.colorFormat == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLinePtr + 16) : (void *)((FragmentColor *)dstColorLinePtr + 16)) - { - const __m128i src[4] = { _mm_load_si128((__m128i *)srcLinePtr + 0), - _mm_load_si128((__m128i *)srcLinePtr + 1), - _mm_load_si128((__m128i *)srcLinePtr + 2), - _mm_load_si128((__m128i *)srcLinePtr + 3) }; - - // Determine which pixels pass by doing the alpha test and the window test. - const __m128i srcAlpha = _mm_packs_epi16( _mm_packs_epi32(_mm_srli_epi32(src[0], 24), _mm_srli_epi32(src[1], 24)), - _mm_packs_epi32(_mm_srli_epi32(src[2], 24), _mm_srli_epi32(src[3], 24)) ); - - // Do the window test. - __m128i passMask8; - __m128i enableColorEffectMask; - this->_RenderPixel_CheckWindows16_SSE2<GPULayerID_BG0, true>(dstX, passMask8, enableColorEffectMask); - - // Do the alpha test. Pixels with an alpha value of 0 are rejected. - passMask8 = _mm_andnot_si128(_mm_cmpeq_epi8(srcAlpha, _mm_setzero_si128()), passMask8); - - // If none of the pixels within the vector pass, then reject them all at once. - if (_mm_movemask_epi8(passMask8) == 0) - { - continue; - } - - // Perform the blending function. - __m128i dstLayerID_vec128 = _mm_load_si128((__m128i *)dstLayerIDPtr); - - __m128i dst[4]; - dst[0] = _mm_load_si128((__m128i *)dstColorLinePtr + 0); - dst[1] = _mm_load_si128((__m128i *)dstColorLinePtr + 1); - - if (dispInfo.colorFormat == NDSColorFormat_BGR555_Rev) - { - // Instead of letting these vectors go to waste, let's convert the src colors to 16-bit now and - // then pack the converted 16-bit colors into these vectors. - dst[2] = _mm_packs_epi32( _mm_or_si128(_mm_or_si128(_mm_srli_epi32(_mm_and_si128(src[0], _mm_set1_epi32(0x0000003E)), 1), _mm_srli_epi32(_mm_and_si128(src[0], _mm_set1_epi32(0x00003E00)), 4)), _mm_srli_epi32(_mm_and_si128(src[0], _mm_set1_epi32(0x003E0000)), 7)), - _mm_or_si128(_mm_or_si128(_mm_srli_epi32(_mm_and_si128(src[1], _mm_set1_epi32(0x0000003E)), 1), _mm_srli_epi32(_mm_and_si128(src[1], _mm_set1_epi32(0x00003E00)), 4)), _mm_srli_epi32(_mm_and_si128(src[1], _mm_set1_epi32(0x003E0000)), 7)) ); - dst[3] = _mm_packs_epi32( _mm_or_si128(_mm_or_si128(_mm_srli_epi32(_mm_and_si128(src[2], _mm_set1_epi32(0x0000003E)), 1), _mm_srli_epi32(_mm_and_si128(src[2], _mm_set1_epi32(0x00003E00)), 4)), _mm_srli_epi32(_mm_and_si128(src[2], _mm_set1_epi32(0x003E0000)), 7)), - _mm_or_si128(_mm_or_si128(_mm_srli_epi32(_mm_and_si128(src[3], _mm_set1_epi32(0x0000003E)), 1), _mm_srli_epi32(_mm_and_si128(src[3], _mm_set1_epi32(0x00003E00)), 4)), _mm_srli_epi32(_mm_and_si128(src[3], _mm_set1_epi32(0x003E0000)), 7)) ); - } - else - { - dst[2] = _mm_load_si128((__m128i *)dstColorLinePtr + 2); - dst[3] = _mm_load_si128((__m128i *)dstColorLinePtr + 3); - } - - switch (dispInfo.colorFormat) - { - case NDSColorFormat_BGR555_Rev: - { - this->_RenderPixel3D_SSE2<NDSColorFormat_BGR555_Rev>(passMask8, - enableColorEffectMask, - src[3], src[2], src[1], src[0], - dst[3], dst[2], dst[1], dst[0], - dstLayerID_vec128); - break; - } - - case NDSColorFormat_BGR666_Rev: - { - this->_RenderPixel3D_SSE2<NDSColorFormat_BGR666_Rev>(passMask8, - enableColorEffectMask, - src[3], src[2], src[1], src[0], - dst[3], dst[2], dst[1], dst[0], - dstLayerID_vec128); - break; - } - - case NDSColorFormat_BGR888_Rev: - { - this->_RenderPixel3D_SSE2<NDSColorFormat_BGR888_Rev>(passMask8, - enableColorEffectMask, - src[3], src[2], src[1], src[0], - dst[3], dst[2], dst[1], dst[0], - dstLayerID_vec128); - break; - } - } - - _mm_store_si128((__m128i *)dstColorLinePtr + 0, dst[0]); - _mm_store_si128((__m128i *)dstColorLinePtr + 1, dst[1]); - - if (dispInfo.colorFormat != NDSColorFormat_BGR555_Rev) - { - _mm_store_si128((__m128i *)dstColorLinePtr + 2, dst[2]); - _mm_store_si128((__m128i *)dstColorLinePtr + 3, dst[3]); - } - - _mm_store_si128((__m128i *)dstLayerIDPtr, dstLayerID_vec128); - } + continue; + } + + // Perform the blending function. + __m128i dstLayerID_vec128 = _mm_load_si128((__m128i *)dstLayerIDPtr); + + __m128i dst[4]; + dst[0] = _mm_load_si128((__m128i *)dstColorLinePtr + 0); + dst[1] = _mm_load_si128((__m128i *)dstColorLinePtr + 1); + + if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) + { + // Instead of letting these vectors go to waste, let's convert the src colors to 16-bit now and + // then pack the converted 16-bit colors into these vectors. + dst[2] = _mm_packs_epi32( _mm_or_si128(_mm_or_si128(_mm_srli_epi32(_mm_and_si128(src[0], _mm_set1_epi32(0x0000003E)), 1), _mm_srli_epi32(_mm_and_si128(src[0], _mm_set1_epi32(0x00003E00)), 4)), _mm_srli_epi32(_mm_and_si128(src[0], _mm_set1_epi32(0x003E0000)), 7)), + _mm_or_si128(_mm_or_si128(_mm_srli_epi32(_mm_and_si128(src[1], _mm_set1_epi32(0x0000003E)), 1), _mm_srli_epi32(_mm_and_si128(src[1], _mm_set1_epi32(0x00003E00)), 4)), _mm_srli_epi32(_mm_and_si128(src[1], _mm_set1_epi32(0x003E0000)), 7)) ); + dst[3] = _mm_packs_epi32( _mm_or_si128(_mm_or_si128(_mm_srli_epi32(_mm_and_si128(src[2], _mm_set1_epi32(0x0000003E)), 1), _mm_srli_epi32(_mm_and_si128(src[2], _mm_set1_epi32(0x00003E00)), 4)), _mm_srli_epi32(_mm_and_si128(src[2], _mm_set1_epi32(0x003E0000)), 7)), + _mm_or_si128(_mm_or_si128(_mm_srli_epi32(_mm_and_si128(src[3], _mm_set1_epi32(0x0000003E)), 1), _mm_srli_epi32(_mm_and_si128(src[3], _mm_set1_epi32(0x00003E00)), 4)), _mm_srli_epi32(_mm_and_si128(src[3], _mm_set1_epi32(0x003E0000)), 7)) ); + } + else + { + dst[2] = _mm_load_si128((__m128i *)dstColorLinePtr + 2); + dst[3] = _mm_load_si128((__m128i *)dstColorLinePtr + 3); + } + + this->_RenderPixel3D_SSE2<OUTPUTFORMAT>(passMask8, + enableColorEffectMask, + src[3], src[2], src[1], src[0], + dst[3], dst[2], dst[1], dst[0], + dstLayerID_vec128); + + _mm_store_si128((__m128i *)dstColorLinePtr + 0, dst[0]); + _mm_store_si128((__m128i *)dstColorLinePtr + 1, dst[1]); + + if (OUTPUTFORMAT != NDSColorFormat_BGR555_Rev) + { + _mm_store_si128((__m128i *)dstColorLinePtr + 2, dst[2]); + _mm_store_si128((__m128i *)dstColorLinePtr + 3, dst[3]); + } + + _mm_store_si128((__m128i *)dstLayerIDPtr, dstLayerID_vec128); + } #endif - + #ifdef ENABLE_SSE2 #pragma LOOPVECTORIZE_DISABLE #endif - - for (; dstX < customLineWidth; dstX++, srcLinePtr++, dstLayerIDPtr++, dstColorLinePtr = (dispInfo.colorFormat == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLinePtr + 1) : (void *)((FragmentColor *)dstColorLinePtr + 1)) - { - if (srcLinePtr->a == 0) - { - continue; - } - - bool didPassWindowTest = true; - bool enableColorEffect = true; - - this->_RenderPixel_CheckWindows<GPULayerID_BG0>(_gpuDstToSrcIndex[dstX], didPassWindowTest, enableColorEffect); - - if (!didPassWindowTest) - { - continue; - } - - switch (dispInfo.colorFormat) - { - case NDSColorFormat_BGR555_Rev: - { - this->_RenderPixel3D(*srcLinePtr, - *(u16 *)dstColorLinePtr, - *dstLayerIDPtr, - enableColorEffect); - break; - } - - case NDSColorFormat_BGR666_Rev: - { - this->_RenderPixel3D<NDSColorFormat_BGR666_Rev>(*srcLinePtr, - *(FragmentColor *)dstColorLinePtr, - *dstLayerIDPtr, - enableColorEffect); - break; - } - - case NDSColorFormat_BGR888_Rev: - { - this->_RenderPixel3D<NDSColorFormat_BGR888_Rev>(*srcLinePtr, - *(FragmentColor *)dstColorLinePtr, - *dstLayerIDPtr, - enableColorEffect); - break; - } - } - } - } - } - else - { - for (size_t line = 0; line < customLineCount; line++) - { - for (size_t dstX = 0; dstX < customLineWidth; dstX++, dstLayerIDPtr++, dstColorLinePtr = (dispInfo.colorFormat == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLinePtr + 1) : (void *)((FragmentColor *)dstColorLinePtr + 1)) - { - size_t srcX = dstX + hofs; - if (srcX >= customLineWidth * 2) - { - srcX -= customLineWidth * 2; - } - - if (srcX >= customLineWidth || srcLinePtr[srcX].a == 0) - { - continue; - } - - bool didPassWindowTest = true; - bool enableColorEffect = true; - - this->_RenderPixel_CheckWindows<GPULayerID_BG0>(_gpuDstToSrcIndex[dstX], didPassWindowTest, enableColorEffect); - - if (!didPassWindowTest) - { - continue; - } - - switch (dispInfo.colorFormat) - { - case NDSColorFormat_BGR555_Rev: - { - this->_RenderPixel3D(srcLinePtr[srcX], - *(u16 *)dstColorLinePtr, - *dstLayerIDPtr, - enableColorEffect); - break; - } - - case NDSColorFormat_BGR666_Rev: - { - this->_RenderPixel3D<NDSColorFormat_BGR666_Rev>(srcLinePtr[srcX], - *(FragmentColor *)dstColorLinePtr, - *dstLayerIDPtr, - enableColorEffect); - break; - } - - case NDSColorFormat_BGR888_Rev: - { - this->_RenderPixel3D<NDSColorFormat_BGR888_Rev>(srcLinePtr[srcX], - *(FragmentColor *)dstColorLinePtr, - *dstLayerIDPtr, - enableColorEffect); - break; - } - } - } - - srcLinePtr += customLineWidth; - } - } - - continue; - } - - if (this->isLineRenderNative[l]) + + for (; dstX < customLineWidth; dstX++, srcLinePtr++, dstLayerIDPtr++, dstColorLinePtr = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLinePtr + 1) : (void *)((FragmentColor *)dstColorLinePtr + 1)) + { + if (srcLinePtr->a == 0) + { + continue; + } + + bool didPassWindowTest = true; + bool enableColorEffect = true; + + this->_RenderPixel_CheckWindows<GPULayerID_BG0>(_gpuDstToSrcIndex[dstX], didPassWindowTest, enableColorEffect); + + if (!didPassWindowTest) + { + continue; + } + + switch (OUTPUTFORMAT) + { + case NDSColorFormat_BGR555_Rev: { - switch (layerID) - { - case GPULayerID_BG0: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG0, false, false>(currentRenderLineTarget, l); break; - case GPULayerID_BG1: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG1, false, false>(currentRenderLineTarget, l); break; - case GPULayerID_BG2: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG2, false, false>(currentRenderLineTarget, l); break; - case GPULayerID_BG3: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG3, false, false>(currentRenderLineTarget, l); break; - - default: - break; - } + this->_RenderPixel3D(*srcLinePtr, + *(u16 *)dstColorLinePtr, + *dstLayerIDPtr, + enableColorEffect); + break; } - else + + case NDSColorFormat_BGR666_Rev: + case NDSColorFormat_BGR888_Rev: { - switch (layerID) - { - case GPULayerID_BG0: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG0, false, true>(currentRenderLineTarget, l); break; - case GPULayerID_BG1: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG1, false, true>(currentRenderLineTarget, l); break; - case GPULayerID_BG2: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG2, false, true>(currentRenderLineTarget, l); break; - case GPULayerID_BG3: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG3, false, true>(currentRenderLineTarget, l); break; - - default: - break; - } + this->_RenderPixel3D<OUTPUTFORMAT>(*srcLinePtr, + *(FragmentColor *)dstColorLinePtr, + *dstLayerIDPtr, + enableColorEffect); + break; } - } //layer enabled + } } } - - // render sprite Pixels - if ( this->_enableLayer[GPULayerID_OBJ] && (item->nbPixelsX > 0) ) + } + else + { + for (size_t line = 0; line < customLineCount; line++) { - if (this->vramBlockOBJIndex != VRAM_NO_3D_USAGE) + for (size_t dstX = 0; dstX < customLineWidth; dstX++, dstLayerIDPtr++, dstColorLinePtr = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorLinePtr + 1) : (void *)((FragmentColor *)dstColorLinePtr + 1)) { - if (GPU->GetEngineMain()->VerifyVRAMLineDidChange(this->vramBlockOBJIndex, l)) + size_t srcX = dstX + hofs; + if (srcX >= customLineWidth * 2) { - void *newRenderLineTarget = (void *)renderLineTargetNative; - - switch (dispInfo.colorFormat) - { - case NDSColorFormat_BGR555_Rev: - this->_LineColorCopy<true, false, false, false, 2>(newRenderLineTarget, currentRenderLineTarget, l); - break; - - case NDSColorFormat_BGR666_Rev: - case NDSColorFormat_BGR888_Rev: - this->_LineColorCopy<true, false, false, false, 4>(newRenderLineTarget, currentRenderLineTarget, l); - break; - } - - this->_LineLayerIDCopy<true, false>(this->_renderLineLayerIDNative, this->_renderLineLayerIDCustom, l); - currentRenderLineTarget = newRenderLineTarget; + srcX -= customLineWidth * 2; } - } - - const bool useCustomVRAM = (this->vramBlockOBJIndex != VRAM_NO_3D_USAGE) && !GPU->GetEngineMain()->isLineCaptureNative[this->vramBlockOBJIndex][l]; - const u16 *__restrict srcLine = (useCustomVRAM) ? GPU->GetEngineMain()->GetCustomVRAMBlockPtr(this->vramBlockOBJIndex) + (customLineIndex * customLineWidth) : NULL; - if (this->isLineRenderNative[l] && useCustomVRAM) - { - void *newRenderLineTarget = renderLineTargetCustom; - switch (dispInfo.colorFormat) + if (srcX >= customLineWidth || srcLinePtr[srcX].a == 0) { - case NDSColorFormat_BGR555_Rev: - this->_LineColorCopy<false, true, false, false, 2>(newRenderLineTarget, currentRenderLineTarget, l); - break; - - case NDSColorFormat_BGR666_Rev: - case NDSColorFormat_BGR888_Rev: - this->_LineColorCopy<false, true, false, false, 4>(newRenderLineTarget, currentRenderLineTarget, l); - break; + continue; } - this->_LineLayerIDCopy<false, true>(this->_renderLineLayerIDCustom, this->_renderLineLayerIDNative, l); - currentRenderLineTarget = newRenderLineTarget; + bool didPassWindowTest = true; + bool enableColorEffect = true; - this->isLineRenderNative[l] = false; - this->nativeLineRenderCount--; - } - - u16 *__restrict dstColorLine16 = (u16 *)currentRenderLineTarget; - FragmentColor *__restrict dstColorLine32 = (FragmentColor *)currentRenderLineTarget; - - if (this->isLineRenderNative[l]) - { - u8 *__restrict dstLayerIDPtr = this->_renderLineLayerIDNative; + this->_RenderPixel_CheckWindows<GPULayerID_BG0>(_gpuDstToSrcIndex[dstX], didPassWindowTest, enableColorEffect); - for (size_t i = 0; i < item->nbPixelsX; i++) + if (!didPassWindowTest) { - const size_t srcX = item->PixelsX[i]; - - this->_RenderPixel<NDSColorFormat_BGR555_Rev, GPULayerID_OBJ, false, false, false>(srcX, - this->_sprColor[srcX], - this->_sprAlpha[srcX], - (dispInfo.colorFormat == NDSColorFormat_BGR555_Rev) ? (void *)(dstColorLine16 + srcX) : (void *)(dstColorLine32 + srcX), - dstLayerIDPtr + srcX); + continue; } - } - else - { - u8 *__restrict dstLayerIDPtr = this->_renderLineLayerIDCustom; - for (size_t line = 0; line < customLineCount; line++) + switch (OUTPUTFORMAT) { - for (size_t i = 0; i < item->nbPixelsX; i++) + case NDSColorFormat_BGR555_Rev: { - const size_t srcX = item->PixelsX[i]; + this->_RenderPixel3D(srcLinePtr[srcX], + *(u16 *)dstColorLinePtr, + *dstLayerIDPtr, + enableColorEffect); + break; + } - for (size_t p = 0; p < _gpuDstPitchCount[srcX]; p++) - { - const size_t dstX = _gpuDstPitchIndex[srcX] + p; - - this->_RenderPixel<NDSColorFormat_BGR555_Rev, GPULayerID_OBJ, false, false, false>(srcX, - (useCustomVRAM) ? srcLine[dstX] : this->_sprColor[srcX], - this->_sprAlpha[srcX], - (dispInfo.colorFormat == NDSColorFormat_BGR555_Rev) ? (void *)(dstColorLine16 + dstX) : (void *)(dstColorLine32 + dstX), - dstLayerIDPtr + dstX); - } + case NDSColorFormat_BGR666_Rev: + case NDSColorFormat_BGR888_Rev: + { + this->_RenderPixel3D<OUTPUTFORMAT>(srcLinePtr[srcX], + *(FragmentColor *)dstColorLinePtr, + *dstLayerIDPtr, + enableColorEffect); + break; } - - srcLine += customLineWidth; - dstColorLine16 += customLineWidth; - dstColorLine32 += customLineWidth; - dstLayerIDPtr += customLineWidth; } } + + srcLinePtr += customLineWidth; } } - return currentRenderLineTarget; + return dstColorLine; } template<size_t CAPTURELENGTH> @@ -6902,7 +6835,7 @@ break; case GPUDisplayMode_Normal: // Display BG and OBJ layers - this->_RenderLine_Layers(l); + this->_RenderLine_Layers<NDSColorFormat_BGR555_Rev>(l); this->_HandleDisplayModeNormal(l); break; @@ -6911,197 +6844,6 @@ } } -void* GPUEngineB::_RenderLine_Layers(const u16 l) -{ - const NDSDisplayInfo &dispInfo = GPU->GetDisplayInfo(); - const size_t customLineWidth = dispInfo.customWidth; - const size_t customLineCount = _gpuDstLineCount[l]; - const size_t customLineIndex = _gpuDstLineIndex[l]; - - void *currentRenderLineTarget = (u8 *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH * dispInfo.pixelBytes); - itemsForPriority_t *__restrict item; - - const u16 backdropColor = LE_TO_LOCAL_16(this->_paletteBG[0]) & 0x7FFF; - this->_RenderLine_Clear(backdropColor, l, currentRenderLineTarget); - - // for all the pixels in the line - if (this->_enableLayer[GPULayerID_OBJ]) - { - //n.b. - this is clearing the sprite line buffer to the background color, - memset_u16_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>(this->_sprColor, backdropColor); - memset(this->_sprAlpha, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH); - memset(this->_sprType, OBJMode_Normal, GPU_FRAMEBUFFER_NATIVE_WIDTH); - memset(this->_sprPrio, 0x7F, GPU_FRAMEBUFFER_NATIVE_WIDTH); - - //zero 06-may-09: I properly supported window color effects for backdrop, but I am not sure - //how it interacts with this. I wish we knew why we needed this - - this->_SpriteRender<false>(l, this->_sprColor, this->_sprAlpha, this->_sprType, this->_sprPrio); - this->_MosaicSpriteLine(l, this->_sprColor, this->_sprAlpha, this->_sprType, this->_sprPrio); - - for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH; i++) - { - // assign them to the good priority item - const size_t prio = this->_sprPrio[i]; - if (prio >= 4) continue; - - item = &(this->_itemsForPriority[prio]); - item->PixelsX[item->nbPixelsX] = i; - item->nbPixelsX++; - } - } - - // paint lower priorities first - // then higher priorities on top - for (size_t prio = NB_PRIORITIES; prio > 0; ) - { - prio--; - item = &(this->_itemsForPriority[prio]); - // render BGs - if (this->_isAnyBGLayerEnabled) - { - for (size_t i = 0; i < item->nbBGs; i++) - { - const GPULayerID layerID = (GPULayerID)item->BGs[i]; - if (this->_enableLayer[layerID]) - { - if (this->isLineRenderNative[l]) - { - switch (layerID) - { - case GPULayerID_BG0: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG0, false, false>(currentRenderLineTarget, l); break; - case GPULayerID_BG1: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG1, false, false>(currentRenderLineTarget, l); break; - case GPULayerID_BG2: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG2, false, false>(currentRenderLineTarget, l); break; - case GPULayerID_BG3: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG3, false, false>(currentRenderLineTarget, l); break; - - default: - break; - } - } - else - { - switch (layerID) - { - case GPULayerID_BG0: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG0, false, true>(currentRenderLineTarget, l); break; - case GPULayerID_BG1: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG1, false, true>(currentRenderLineTarget, l); break; - case GPULayerID_BG2: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG2, false, true>(currentRenderLineTarget, l); break; - case GPULayerID_BG3: currentRenderLineTarget = this->_RenderLine_LayerBG<GPULayerID_BG3, false, true>(currentRenderLineTarget, l); break; - - default: - break; - } - } - } //layer enabled - } - } - - // render sprite Pixels - if ( this->_enableLayer[GPULayerID_OBJ] && (item->nbPixelsX > 0) ) - { - if (this->vramBlockOBJIndex != VRAM_NO_3D_USAGE) - { - if (GPU->GetEngineMain()->VerifyVRAMLineDidChange(this->vramBlockOBJIndex, l)) - { - void *newRenderLineTarget; - - switch (dispInfo.colorFormat) - { - case NDSColorFormat_BGR555_Rev: - newRenderLineTarget = (u8 *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16)); - this->_LineColorCopy<true, false, false, false, 2>(newRenderLineTarget, currentRenderLineTarget, l); - break; - - case NDSColorFormat_BGR666_Rev: - case NDSColorFormat_BGR888_Rev: - newRenderLineTarget = (u8 *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(FragmentColor)); - this->_LineColorCopy<true, false, false, false, 4>(newRenderLineTarget, currentRenderLineTarget, l); - break; - } - - this->_LineLayerIDCopy<true, false>(this->_renderLineLayerIDNative, this->_renderLineLayerIDCustom, l); - currentRenderLineTarget = newRenderLineTarget; - } - } - - const bool useCustomVRAM = (this->vramBlockOBJIndex != VRAM_NO_3D_USAGE) && !GPU->GetEngineMain()->isLineCaptureNative[this->vramBlockOBJIndex][l]; - const u16 *__restrict srcLine = (useCustomVRAM) ? GPU->GetEngineMain()->GetCustomVRAMBlockPtr(this->vramBlockOBJIndex) + (customLineIndex * customLineWidth) : NULL; - if (this->isLineRenderNative[l] && useCustomVRAM) - { - void *newRenderLineTarget; - - switch (dispInfo.colorFormat) - { - case NDSColorFormat_BGR555_Rev: - newRenderLineTarget = (u8 *)this->customBuffer + (customLineIndex * customLineWidth * sizeof(u16)); - this->_LineColorCopy<false, true, false, false, 2>(newRenderLineTarget, currentRenderLineTarget, l); - break; - - case NDSColorFormat_BGR666_Rev: - case NDSColorFormat_BGR888_Rev: - newRenderLineTarget = (u8 *)this->customBuffer + (customLineIndex * customLineWidth * sizeof(FragmentColor)); - this->_LineColorCopy<false, true, false, false, 4>(newRenderLineTarget, currentRenderLineTarget, l); - break; - } - - this->_LineLayerIDCopy<false, true>(this->_renderLineLayerIDCustom, this->_renderLineLayerIDNative, l); - currentRenderLineTarget = newRenderLineTarget; - - this->isLineRenderNative[l] = false; - this->nativeLineRenderCount--; - } - - u16 *__restrict dstColorLine16 = (u16 *)currentRenderLineTarget; - FragmentColor *__restrict dstColorLine32 = (FragmentColor *)currentRenderLineTarget; - - if (this->isLineRenderNative[l]) - { - u8 *__restrict dstLayerIDPtr = this->_renderLineLayerIDNative; - - for (size_t i = 0; i < item->nbPixelsX; i++) - { - const size_t srcX = item->PixelsX[i]; - - this->_RenderPixel<NDSColorFormat_BGR555_Rev, GPULayerID_OBJ, false, false, false>(srcX, - this->_sprColor[srcX], - this->_sprAlpha[srcX], - (dispInfo.colorFormat == NDSColorFormat_BGR555_Rev) ? (void *)(dstColorLine16 + srcX) : (void *)(dstColorLine32 + srcX), - dstLayerIDPtr + srcX); - } - } - else - { - u8 *__restrict dstLayerIDPtr = this->_renderLineLayerIDCustom; - - for (size_t line = 0; line < customLineCount; line++) - { - for (size_t i = 0; i < item->nbPixelsX; i++) - { - const size_t srcX = item->PixelsX[i]; - - for (size_t p = 0; p < _gpuDstPitchCount[srcX]; p++) - { - const size_t dstX = _gpuDstPitchIndex[srcX] + p; - - this->_RenderPixel<NDSColorFormat_BGR555_Rev, GPULayerID_OBJ, false, false, false>(srcX, - (useCustomVRAM) ? srcLine[dstX] : this->_sprColor[srcX], - this->_sprAlpha[srcX], - (dispInfo.colorFormat == NDSColorFormat_BGR555_Rev) ? (void *)(dstColorLine16 + dstX) : (void *)(dstColorLine32 + dstX), - dstLayerIDPtr + dstX); - } - } - - srcLine += customLineWidth; - dstColorLine16 += customLineWidth; - dstColorLine32 += customLineWidth; - dstLayerIDPtr += customLineWidth; - } - } - } - } - - return currentRenderLineTarget; -} - GPUSubsystem::GPUSubsystem() { static bool needInitTables = true; @@ -7259,18 +7001,12 @@ void GPUSubsystem::UpdateRenderProperties() { - this->_engineMain->isCustomRenderingNeeded = false; - this->_engineMain->vramBlockBGIndex = VRAM_NO_3D_USAGE; this->_engineMain->vramBlockOBJIndex = VRAM_NO_3D_USAGE; - this->_engineMain->vramBGLayer = VRAM_NO_3D_USAGE; this->_engineMain->renderedWidth = GPU_FRAMEBUFFER_NATIVE_WIDTH; this->_engineMain->renderedHeight = GPU_FRAMEBUFFER_NATIVE_HEIGHT; this->_engineMain->renderedBuffer = this->_engineMain->nativeBuffer; - this->_engineSub->isCustomRenderingNeeded = false; - this->_engineSub->vramBlockBGIndex = VRAM_NO_3D_USAGE; this->_engineSub->vramBlockOBJIndex = VRAM_NO_3D_USAGE; - this->_engineSub->vramBGLayer = VRAM_NO_3D_USAGE; this->_engineSub->renderedWidth = GPU_FRAMEBUFFER_NATIVE_WIDTH; this->_engineSub->renderedHeight = GPU_FRAMEBUFFER_NATIVE_HEIGHT; this->_engineSub->renderedBuffer = this->_engineSub->nativeBuffer; @@ -7316,11 +7052,8 @@ switch (vramConfiguration.banks[i].purpose) { case VramConfiguration::ABG: - this->_engineMain->UpdateVRAM3DUsageProperties_BGLayer(i); - break; - case VramConfiguration::BBG: - this->_engineSub->UpdateVRAM3DUsageProperties_BGLayer(i); + case VramConfiguration::LCDC: break; case VramConfiguration::AOBJ: @@ -7331,9 +7064,6 @@ this->_engineSub->UpdateVRAM3DUsageProperties_OBJLayer(i); break; - case VramConfiguration::LCDC: - break; - default: { this->_engineMain->nativeLineCaptureCount[i] = GPU_VRAM_BLOCK_LINES; @@ -7345,13 +7075,6 @@ } } } - - this->_engineMain->isCustomRenderingNeeded = (this->_engineMain->WillRender3DLayer() && !CurrentRenderer->IsFramebufferNativeSize()) || - (this->_engineMain->vramBlockBGIndex != VRAM_NO_3D_USAGE) || - (this->_engineMain->vramBlockOBJIndex != VRAM_NO_3D_USAGE); - - this->_engineSub->isCustomRenderingNeeded = (this->_engineSub->vramBlockBGIndex != VRAM_NO_3D_USAGE) || - (this->_engineSub->vramBlockOBJIndex != VRAM_NO_3D_USAGE); } const NDSDisplayInfo& GPUSubsystem::GetDisplayInfo() Modified: trunk/desmume/src/GPU.h =================================================================== --- trunk/desmume/src/GPU.h 2016-07-13 23:14:38 UTC (rev 5504) +++ trunk/desmume/src/GPU.h 2016-07-14 05:49:47 UTC (rev 5505) @@ -1324,10 +1324,11 @@ template<GPULayerID LAYERID, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _LineRot(void *__restrict dstColorLine, const u16 lineIndex); template<GPULayerID LAYERID, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void* _LineExtRot(void *__restrict dstColorLine, const u16 lineIndex, bool &outUseCustomVRAM); - template <GPULayerID LAYERID> void _RenderPixel_CheckWindows(const size_t srcX, bool &didPassWindowTest, bool &enableColorEffect) const; + template<GPULayerID LAYERID> void _RenderPixel_CheckWindows(const size_t srcX, bool &didPassWindowTest, bool &enableColorEffect) const; void _RenderLine_Clear(const u16 clearColor, const u16 l, void *renderLineTarget); - void* _RenderLine_Layers(const u16 l); + void _RenderLine_SetupSprites(const u16 backdropColor, const u16 lineIndex); + template<NDSColorFormat OUTPUTFORMAT> void* _RenderLine_Layers(const u16 l); void _HandleDisplayModeOff(const size_t l); void _HandleDisplayModeNormal(const size_t l); @@ -1340,6 +1341,8 @@ template<GPULayerID LAYERID, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void* _RenderLine_LayerBG_ApplyNoWindowsEnabledHint(void *dstColorLine, const u16 lineIndex); template<GPULayerID LAYERID, bool ISDEBUGRENDER, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> void* _RenderLine_LayerBG_ApplyMosaic(void *dstColorLine, const u16 lineIndex); template<GPULayerID LAYERID, bool ISDEBUGRENDER, bool ISCUSTOMRENDERINGNEEDED> void* _RenderLine_LayerBG(void *dstColorLine, const u16 lineIndex); + + template<NDSColorFormat OUTPUTFORMAT> void* _RenderLine_LayerOBJ(itemsForPriority_t *__restrict item, void *__restrict dstColorLine, const u16 lineIndex); template<NDSColorFormat OUTPUTFORMAT, GPULayerID LAYERID, bool ISDEBUGRENDER, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT> FORCEINLINE void _RenderPixel(const size_t srcX, const u16 src, const u8 srcAlpha, void *__restrict dstColorLine, u8 *__restrict dstLayerIDLine); FORCEINLINE void _RenderPixel3D(const FragmentColor src, u16 &dstColor, u8 &dstLayerID, bool enableColorEffect); @@ -1408,9 +1411,6 @@ void UpdatePropertiesWithoutRender(const u16 l); void FramebufferPostprocess(); - bool isCustomRenderingNeeded; - u8 vramBGLayer; - u8 vramBlockBGIndex; u8 vramBlockOBJIndex; size_t nativeLineRenderCount; @@ -1443,7 +1443,6 @@ const BGLayerInfo& GetBGLayerInfoByID(const GPULayerID layerID); - void UpdateVRAM3DUsageProperties_BGLayer(const size_t bankIndex); void UpdateVRAM3DUsageProperties_OBJLayer(const size_t bankIndex); void SpriteRenderDebug(const u16 lineIndex, u16 *dst); @@ -1482,7 +1481,6 @@ template<GPULayerID LAYERID, bool ISDEBUGRENDER, bool MOSAIC, bool NOWINDOWSENABLEDHINT, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _LineLarge8bpp(u16 *__restrict dstColorLine, const u16 lineIndex); - void* _RenderLine_Layers(const u16 l); template<size_t CAPTURELENGTH> void _RenderLine_DisplayCapture(const void *renderedLineSrcA, const u16 l); void _RenderLine_DispCapture_FIFOToBuffer(u16 *fifoLineBuffer); @@ -1526,6 +1524,8 @@ virtual void Reset(); virtual void RenderLine(const u16 l); + + template<NDSColorFormat OUTPUTFORMAT> void* RenderLine_Layer3D(void *dstColorLine, const u16 lineIndex); }; class GPUEngineB : public GPUEngineBase @@ -1534,9 +1534,6 @@ GPUEngineB(); ~GPUEngineB(); -protected: - void* _RenderLine_Layers(const u16 l); - public: static GPUEngineB* Allocate(); void FinalizeAndDeallocate(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-07-13 23:14:40
|
Revision: 5504 http://sourceforge.net/p/desmume/code/5504 Author: rogerman Date: 2016-07-13 23:14:38 +0000 (Wed, 13 Jul 2016) Log Message: ----------- GPU: - Do some minor code cleanup. Modified Paths: -------------- trunk/desmume/src/GPU.cpp trunk/desmume/src/rasterize.h Modified: trunk/desmume/src/GPU.cpp =================================================================== --- trunk/desmume/src/GPU.cpp 2016-07-13 23:13:14 UTC (rev 5503) +++ trunk/desmume/src/GPU.cpp 2016-07-13 23:14:38 UTC (rev 5504) @@ -7687,23 +7687,29 @@ { this->UpdateRenderProperties(); - if (!CommonSettings.showGpu.main) + if (!isFramebufferRenderNeeded[GPUEngineID_Main]) { - memset(this->_engineMain->renderedBuffer, 0, this->_engineMain->renderedWidth * this->_engineMain->renderedHeight * this->_displayInfo.pixelBytes); + if (!CommonSettings.showGpu.main) + { + memset(this->_engineMain->renderedBuffer, 0, this->_engineMain->renderedWidth * this->_engineMain->renderedHeight * this->_displayInfo.pixelBytes); + } + else if (this->_engineMain->GetIsMasterBrightFullIntensity()) + { + this->_engineMain->ApplyMasterBrightness<true>(); + } } - else if (this->_engineMain->GetIsMasterBrightFullIntensity() && (this->_engineMain->GetIORegisterMap().DISPCAPCNT.CaptureEnable == 0)) - { - this->_engineMain->ApplyMasterBrightness<true>(); - } - if (!CommonSettings.showGpu.sub) + if (!isFramebufferRenderNeeded[GPUEngineID_Sub]) { - memset(this->_engineSub->renderedBuffer, 0, this->_engineSub->renderedWidth * this->_engineSub->renderedHeight * this->_displayInfo.pixelBytes); + if (!CommonSettings.showGpu.sub) + { + memset(this->_engineSub->renderedBuffer, 0, this->_engineSub->renderedWidth * this->_engineSub->renderedHeight * this->_displayInfo.pixelBytes); + } + else if (this->_engineSub->GetIsMasterBrightFullIntensity()) + { + this->_engineSub->ApplyMasterBrightness<true>(); + } } - else if (this->_engineSub->GetIsMasterBrightFullIntensity()) - { - this->_engineSub->ApplyMasterBrightness<true>(); - } } } @@ -7718,15 +7724,19 @@ // originates from the top of the screen, the BG0 layer will only be enabled at line 46. This // means that we need to check the states at that particular time to ensure that the 3D renderer // finishes before we read the 3D framebuffer. Otherwise, the map will render incorrectly. - const bool need3DDisplayFramebuffer = this->_engineMain->WillRender3DLayer(); - const bool need3DCaptureFramebuffer = this->_engineMain->WillCapture3DLayerDirect(); - - if ( CurrentRenderer->GetRenderNeedsFinish() && (need3DDisplayFramebuffer || need3DCaptureFramebuffer) ) + + if (CurrentRenderer->GetRenderNeedsFinish()) { - CurrentRenderer->SetFramebufferFlushStates(need3DDisplayFramebuffer, need3DCaptureFramebuffer); - CurrentRenderer->RenderFinish(); - CurrentRenderer->SetRenderNeedsFinish(false); - this->_event->DidRender3DEnd(); + const bool need3DDisplayFramebuffer = this->_engineMain->WillRender3DLayer(); + const bool need3DCaptureFramebuffer = this->_engineMain->WillCapture3DLayerDirect(); + + if (need3DDisplayFramebuffer || need3DCaptureFramebuffer) + { + CurrentRenderer->SetFramebufferFlushStates(need3DDisplayFramebuffer, need3DCaptureFramebuffer); + CurrentRenderer->RenderFinish(); + CurrentRenderer->SetRenderNeedsFinish(false); + this->_event->DidRender3DEnd(); + } } this->_engineMain->RenderLine(l); Modified: trunk/desmume/src/rasterize.h =================================================================== --- trunk/desmume/src/rasterize.h 2016-07-13 23:13:14 UTC (rev 5503) +++ trunk/desmume/src/rasterize.h 2016-07-13 23:14:38 UTC (rev 5504) @@ -57,7 +57,7 @@ // SoftRasterizer-specific methods virtual Render3DError InitTables(); - template<bool useHiResInterpolate> size_t performClipping(const VERTLIST *vertList, const POLYLIST *polyList, const INDEXLIST *indexList); + template<bool USEHIRESINTERPOLATE> size_t performClipping(const VERTLIST *vertList, const POLYLIST *polyList, const INDEXLIST *indexList); // Base rendering methods virtual Render3DError BeginRender(const GFX3D &engine); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |