From: <axl...@us...> - 2010-05-17 23:58:33
|
Revision: 733 http://hgengine.svn.sourceforge.net/hgengine/?rev=733&view=rev Author: axlecrusher Date: 2010-05-17 23:58:27 +0000 (Mon, 17 May 2010) Log Message: ----------- Fix SSE on windows. We can't rely on the stack to align variables properly so we need to do it ourself. Modified Paths: -------------- Mercury2/src/AlignedBuffer.h Mercury2/src/MercuryMath.cpp Mercury2/src/MercuryMatrix.cpp Mercury2/src/MercuryMatrix.h Modified: Mercury2/src/AlignedBuffer.h =================================================================== --- Mercury2/src/AlignedBuffer.h 2010-05-16 15:28:56 UTC (rev 732) +++ Mercury2/src/AlignedBuffer.h 2010-05-17 23:58:27 UTC (rev 733) @@ -18,7 +18,10 @@ void Allocate(unsigned long count, uint8_t alignment = 32) { - SAFE_FREE(m_mem); + if (m_mem!=0) + free(m_mem); + m_mem=0; + void * m_memret; m_data = (T*)mmemalign(alignment, sizeof(T)*count, m_memret); m_mem = (T*)m_memret; @@ -27,7 +30,8 @@ void Free() { - SAFE_FREE(m_mem); + if (m_mem!=0) + free(m_mem); m_data = NULL; m_length = 0; } Modified: Mercury2/src/MercuryMath.cpp =================================================================== --- Mercury2/src/MercuryMath.cpp 2010-05-16 15:28:56 UTC (rev 732) +++ Mercury2/src/MercuryMath.cpp 2010-05-17 23:58:27 UTC (rev 733) @@ -320,13 +320,15 @@ _mm_store_ps( f, r ); } */ + void MMCrossProduct( const FloatRow& r1, const FloatRow& r2, FloatRow& result) { __m128 a,b,c,d,r;//using more registers is faster __m128 t1,t2; - t1 = _mm_load_ps(r1); - t2 = _mm_load_ps(r2); + //unaligned load, vectors are not aligned + t1 = _mm_loadu_ps(r1); + t2 = _mm_loadu_ps(r2); a = _mm_shuffle_ps(t1, t1, 0xc9); b = _mm_shuffle_ps(t2, t2, 0xd2); @@ -337,7 +339,7 @@ a = _mm_mul_ps( c, d ); r = _mm_sub_ps(r,a); - _mm_store_ps(result, r); + _mm_storeu_ps(result, r); } #endif Modified: Mercury2/src/MercuryMatrix.cpp =================================================================== --- Mercury2/src/MercuryMatrix.cpp 2010-05-16 15:28:56 UTC (rev 732) +++ Mercury2/src/MercuryMatrix.cpp 2010-05-17 23:58:27 UTC (rev 733) @@ -1,6 +1,11 @@ #include "MercuryMatrix.h" #include <MercuryLog.h> +MercuryMatrixMemory::MercuryMatrixMemory() +{ + m_data.Allocate(rows,16); +} + MercuryMatrixMemory& MercuryMatrixMemory::GetInstance() { static MercuryMatrixMemory* mmm = NULL; @@ -19,7 +24,7 @@ MSemaphoreLock lock(&m_lock); for (unsigned int i = 0; i < rows;i++) - m_free.push_back( m_data+i ); + m_free.push_back( m_data.Buffer()+i ); } FloatRow* MercuryMatrixMemory::GetNewMatrix() Modified: Mercury2/src/MercuryMatrix.h =================================================================== --- Mercury2/src/MercuryMatrix.h 2010-05-16 15:28:56 UTC (rev 732) +++ Mercury2/src/MercuryMatrix.h 2010-05-17 23:58:27 UTC (rev 733) @@ -10,6 +10,8 @@ #include <list> #include <MSemaphore.h> +#include <AlignedBuffer.h> + ///Memory holder for matrices class MercuryMatrixMemory { @@ -17,15 +19,16 @@ to try to take advantage of data prefetching. Some matrix data should get a free ride into the CPU cache. */ public: + MercuryMatrixMemory(); void Init(); static MercuryMatrixMemory& GetInstance(); FloatRow* GetNewMatrix(); void FreeMatrix(FloatRow* m); private: + static const unsigned int rows = 1024; //1024 matrices * 64bytes each = 64kb typedef FloatRow MatrixArray[4]; //64kb - static const unsigned int rows = 1024; //1024 matrices * 64bytes each = 64kb + AlignedBuffer<MatrixArray> m_data; std::list< MatrixArray* > m_free; - MatrixArray m_data[rows]; MSemaphore m_lock; }; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |