[Hgengine-cvs] SF.net SVN: hgengine:[733] Mercury2/src

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Revision: 733
          http://hgengine.svn.sourceforge.net/hgengine/?rev=733&view=rev
Author:   axlecrusher
Date:     2010-05-17 23:58:27 +0000 (Mon, 17 May 2010)

Log Message:
-----------
Fix SSE on windows. We can't rely on the stack to align variables properly so we need to do it ourself.

Modified Paths:
--------------
    Mercury2/src/AlignedBuffer.h
    Mercury2/src/MercuryMath.cpp
    Mercury2/src/MercuryMatrix.cpp
    Mercury2/src/MercuryMatrix.h

Modified: Mercury2/src/AlignedBuffer.h
===================================================================

--- Mercury2/src/AlignedBuffer.h	2010-05-16 15:28:56 UTC (rev 732)
+++ Mercury2/src/AlignedBuffer.h	2010-05-17 23:58:27 UTC (rev 733)
@@ -18,7 +18,10 @@
 		
 		void Allocate(unsigned long count, uint8_t alignment = 32)
 		{
-			SAFE_FREE(m_mem);
+			if (m_mem!=0)
+				free(m_mem);
+			m_mem=0;
+
 			void * m_memret;
 			m_data = (T*)mmemalign(alignment, sizeof(T)*count, m_memret);
 			m_mem = (T*)m_memret;
@@ -27,7 +30,8 @@
 		
 		void Free()
 		{
-			SAFE_FREE(m_mem);
+			if (m_mem!=0)
+				free(m_mem);
 			m_data = NULL;
 			m_length = 0;
 		}

Modified: Mercury2/src/MercuryMath.cpp
===================================================================
--- Mercury2/src/MercuryMath.cpp	2010-05-16 15:28:56 UTC (rev 732)
+++ Mercury2/src/MercuryMath.cpp	2010-05-17 23:58:27 UTC (rev 733)
@@ -320,13 +320,15 @@
 	_mm_store_ps( f, r );
 }
 */
+
 void MMCrossProduct( const FloatRow& r1, const FloatRow& r2, FloatRow& result)
 {
 	__m128 a,b,c,d,r;//using more registers is faster
 	__m128 t1,t2;
 	
-	t1 = _mm_load_ps(r1);
-	t2 = _mm_load_ps(r2);
+	//unaligned load, vectors are not aligned
+	t1 = _mm_loadu_ps(r1);
+	t2 = _mm_loadu_ps(r2);
 
 	a = _mm_shuffle_ps(t1, t1, 0xc9);
 	b = _mm_shuffle_ps(t2, t2, 0xd2);
@@ -337,7 +339,7 @@
 	a = _mm_mul_ps( c, d );
 	r = _mm_sub_ps(r,a);
 
-	_mm_store_ps(result, r);
+	_mm_storeu_ps(result, r);
 }
 
 #endif

Modified: Mercury2/src/MercuryMatrix.cpp
===================================================================
--- Mercury2/src/MercuryMatrix.cpp	2010-05-16 15:28:56 UTC (rev 732)
+++ Mercury2/src/MercuryMatrix.cpp	2010-05-17 23:58:27 UTC (rev 733)
@@ -1,6 +1,11 @@
 #include "MercuryMatrix.h"
 #include <MercuryLog.h>
 
+MercuryMatrixMemory::MercuryMatrixMemory()
+{
+	m_data.Allocate(rows,16);
+}
+
 MercuryMatrixMemory& MercuryMatrixMemory::GetInstance()
 {
 	static MercuryMatrixMemory* mmm = NULL;
@@ -19,7 +24,7 @@
 	MSemaphoreLock lock(&m_lock);
 
 	for (unsigned int i = 0; i < rows;i++)
-		m_free.push_back( m_data+i );
+		m_free.push_back( m_data.Buffer()+i );
 }
 
 FloatRow* MercuryMatrixMemory::GetNewMatrix()

Modified: Mercury2/src/MercuryMatrix.h
===================================================================
--- Mercury2/src/MercuryMatrix.h	2010-05-16 15:28:56 UTC (rev 732)
+++ Mercury2/src/MercuryMatrix.h	2010-05-17 23:58:27 UTC (rev 733)
@@ -10,6 +10,8 @@
 #include <list>
 #include <MSemaphore.h>
 
+#include <AlignedBuffer.h>
+
 ///Memory holder for matrices
 class MercuryMatrixMemory
 {
@@ -17,15 +19,16 @@
 	to try to take advantage of data prefetching. Some matrix data should get a
 	free ride into the CPU cache. */
 	public:
+		MercuryMatrixMemory();
 		void Init();
 		static MercuryMatrixMemory& GetInstance();
 		FloatRow* GetNewMatrix();
 		void FreeMatrix(FloatRow* m);
 	private:
+		static const unsigned int rows = 1024; //1024 matrices * 64bytes each = 64kb
 		typedef FloatRow MatrixArray[4]; //64kb
-		static const unsigned int rows = 1024; //1024 matrices * 64bytes each = 64kb
+		AlignedBuffer<MatrixArray> m_data;
 		std::list< MatrixArray* > m_free;
-		MatrixArray m_data[rows];
 		MSemaphore m_lock;
 };
 


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.