[Desmume-cvs] SF.net SVN: desmume:[5228] trunk/desmume/src

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 454-5900

Revision: 5228
          http://sourceforge.net/p/desmume/code/5228
Author:   rogerman
Date:     2015-07-27 05:06:53 +0000 (Mon, 27 Jul 2015)
Log Message:
-----------
Render3D:
- Revert the SSE2 bit shift optimizations that were done in r5216. Fixes a regression related to fog, as well as a regression that caused a flickering problem in the title screen of Pokemon Ranger: Shadows of Almia. (Fixes bug #1487.)

Revision Links:
--------------
    http://sourceforge.net/p/desmume/code/5216

Modified Paths:
--------------
    trunk/desmume/src/OGLRender.cpp
    trunk/desmume/src/render3D.cpp

Modified: trunk/desmume/src/OGLRender.cpp
===================================================================

--- trunk/desmume/src/OGLRender.cpp	2015-07-25 23:51:45 UTC (rev 5227)
+++ trunk/desmume/src/OGLRender.cpp	2015-07-27 05:06:53 UTC (rev 5228)
@@ -925,19 +925,19 @@
 			color = _mm_load_si128((__m128i *)(this->_framebufferColor + ir));
 			
 			__m128i b = _mm_and_si128(color, _mm_set1_epi32(0x000000F8));	// Read from R
-			b = _mm_slli_si128(b, 7);										// Shift to B
+			b = _mm_slli_epi32(b, 7);										// Shift to B
 			
 			__m128i g = _mm_and_si128(color, _mm_set1_epi32(0x0000F800));	// Read from G
-			g = _mm_srli_si128(g, 6);										// Shift in G
+			g = _mm_srli_epi32(g, 6);										// Shift in G
 			
 			__m128i r = _mm_and_si128(color, _mm_set1_epi32(0x00F80000));	// Read from B
-			r = _mm_srli_si128(r, 19);										// Shift to R
+			r = _mm_srli_epi32(r, 19);										// Shift to R
 			
 			a = _mm_and_si128(color, _mm_set1_epi32(0xFF000000));			// Read from A
-			a = _mm_cmpgt_epi32(a, _mm_set1_epi32(0x00000000));				// Determine A
-			a = _mm_and_si128(a, _mm_set1_epi32(0x00008000));				// Mask to A
+			a = _mm_cmpeq_epi32(a, _mm_setzero_si128());					// Determine A
+			a = _mm_andnot_si128(a, _mm_set1_epi32(0x00008000));			// Mask to A
 			
-			color = b | g | r | a;
+			color = _mm_or_si128(_mm_or_si128(_mm_or_si128(b, g), r), a);
 			
 			// All the colors are currently placed every other 16 bits, so we need to swizzle them
 			// to the lower 64 bits of our vector before we store them back to memory.

Modified: trunk/desmume/src/render3D.cpp
===================================================================
--- trunk/desmume/src/render3D.cpp	2015-07-25 23:51:45 UTC (rev 5227)
+++ trunk/desmume/src/render3D.cpp	2015-07-27 05:06:53 UTC (rev 5228)
@@ -493,16 +493,16 @@
 		
 		// Convert to RGBA5551
 		__m128i r = _mm_and_si128(color, _mm_set1_epi32(0x0000003E));	// Read from R
-		r = _mm_srli_si128(r, 1);										// Shift to R
+		r = _mm_srli_epi32(r, 1);										// Shift to R
 		
 		__m128i g = _mm_and_si128(color, _mm_set1_epi32(0x00003E00));	// Read from G
-		g = _mm_srli_si128(g, 4);										// Shift in G
+		g = _mm_srli_epi32(g, 4);										// Shift in G
 		
 		__m128i b = _mm_and_si128(color, _mm_set1_epi32(0x003E0000));	// Read from B
-		b = _mm_srli_si128(b, 7);										// Shift to B
+		b = _mm_srli_epi32(b, 7);										// Shift to B
 		
 		__m128i a = _mm_and_si128(color, _mm_set1_epi32(0xFF000000));	// Read from A
-		a = _mm_cmpgt_epi32(a, zero_vec128);							// Determine A
+		a = _mm_cmpeq_epi32(a, zero_vec128);							// Determine A
 		
 		// From here on, we're going to do an SSE2 trick to pack 32-bit down to unsigned
 		// 16-bit. Since SSE2 only has packssdw (signed 16-bit pack), then the alpha bit
@@ -514,9 +514,9 @@
 		// packssdw, then shift the bit back to its original position. Then we por the
 		// alpha vector with the post-packed color vector to get the final color.
 		
-		a = _mm_and_si128(a, _mm_set1_epi32(0x00004000));				// Mask out the bit before A
+		a = _mm_andnot_si128(a, _mm_set1_epi32(0x00004000));			// Mask out the bit before A
 		a = _mm_packs_epi32(a, zero_vec128);							// Pack 32-bit down to 16-bit
-		a = _mm_slli_si128(a, 1);										// Shift the A bit back to where it needs to be
+		a = _mm_slli_epi16(a, 1);										// Shift the A bit back to where it needs to be
 		
 		// Assemble the RGB colors, pack the 32-bit color into a signed 16-bit color, then por the alpha bit back in.
 		color = _mm_or_si128(_mm_or_si128(r, g), b);
@@ -607,8 +607,8 @@
 				clearDepthLo_vec128 = _mm_loadu_si128((__m128i *)(clearDepthBuffer + i));
 				clearDepthHi_vec128 = _mm_and_si128(clearDepthHi_vec128, fogBufferBitMask_vec128);
 				clearDepthLo_vec128 = _mm_and_si128(clearDepthLo_vec128, fogBufferBitMask_vec128);
-				clearDepthHi_vec128 = _mm_srli_si128(clearDepthHi_vec128, 15);
-				clearDepthLo_vec128 = _mm_srli_si128(clearDepthLo_vec128, 15);
+				clearDepthHi_vec128 = _mm_srli_epi16(clearDepthHi_vec128, 15);
+				clearDepthLo_vec128 = _mm_srli_epi16(clearDepthLo_vec128, 15);
 				
 				_mm_store_si128((__m128i *)(this->clearImageFogBuffer + i), _mm_packus_epi16(clearDepthLo_vec128, clearDepthHi_vec128));
 				

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.





[Desmume-cvs] SF.net SVN: desmume:[5228] trunk/desmume/src

DeSmuME is a Nintendo DS emulator

[Desmume-cvs] SF.net SVN: desmume:[5228] trunk/desmume/src