|
From: <axl...@us...> - 2010-05-17 23:58:33
|
Revision: 733
http://hgengine.svn.sourceforge.net/hgengine/?rev=733&view=rev
Author: axlecrusher
Date: 2010-05-17 23:58:27 +0000 (Mon, 17 May 2010)
Log Message:
-----------
Fix SSE on windows. We can't rely on the stack to align variables properly so we need to do it ourself.
Modified Paths:
--------------
Mercury2/src/AlignedBuffer.h
Mercury2/src/MercuryMath.cpp
Mercury2/src/MercuryMatrix.cpp
Mercury2/src/MercuryMatrix.h
Modified: Mercury2/src/AlignedBuffer.h
===================================================================
--- Mercury2/src/AlignedBuffer.h 2010-05-16 15:28:56 UTC (rev 732)
+++ Mercury2/src/AlignedBuffer.h 2010-05-17 23:58:27 UTC (rev 733)
@@ -18,7 +18,10 @@
void Allocate(unsigned long count, uint8_t alignment = 32)
{
- SAFE_FREE(m_mem);
+ if (m_mem!=0)
+ free(m_mem);
+ m_mem=0;
+
void * m_memret;
m_data = (T*)mmemalign(alignment, sizeof(T)*count, m_memret);
m_mem = (T*)m_memret;
@@ -27,7 +30,8 @@
void Free()
{
- SAFE_FREE(m_mem);
+ if (m_mem!=0)
+ free(m_mem);
m_data = NULL;
m_length = 0;
}
Modified: Mercury2/src/MercuryMath.cpp
===================================================================
--- Mercury2/src/MercuryMath.cpp 2010-05-16 15:28:56 UTC (rev 732)
+++ Mercury2/src/MercuryMath.cpp 2010-05-17 23:58:27 UTC (rev 733)
@@ -320,13 +320,15 @@
_mm_store_ps( f, r );
}
*/
+
void MMCrossProduct( const FloatRow& r1, const FloatRow& r2, FloatRow& result)
{
__m128 a,b,c,d,r;//using more registers is faster
__m128 t1,t2;
- t1 = _mm_load_ps(r1);
- t2 = _mm_load_ps(r2);
+ //unaligned load, vectors are not aligned
+ t1 = _mm_loadu_ps(r1);
+ t2 = _mm_loadu_ps(r2);
a = _mm_shuffle_ps(t1, t1, 0xc9);
b = _mm_shuffle_ps(t2, t2, 0xd2);
@@ -337,7 +339,7 @@
a = _mm_mul_ps( c, d );
r = _mm_sub_ps(r,a);
- _mm_store_ps(result, r);
+ _mm_storeu_ps(result, r);
}
#endif
Modified: Mercury2/src/MercuryMatrix.cpp
===================================================================
--- Mercury2/src/MercuryMatrix.cpp 2010-05-16 15:28:56 UTC (rev 732)
+++ Mercury2/src/MercuryMatrix.cpp 2010-05-17 23:58:27 UTC (rev 733)
@@ -1,6 +1,11 @@
#include "MercuryMatrix.h"
#include <MercuryLog.h>
+MercuryMatrixMemory::MercuryMatrixMemory()
+{
+ m_data.Allocate(rows,16);
+}
+
MercuryMatrixMemory& MercuryMatrixMemory::GetInstance()
{
static MercuryMatrixMemory* mmm = NULL;
@@ -19,7 +24,7 @@
MSemaphoreLock lock(&m_lock);
for (unsigned int i = 0; i < rows;i++)
- m_free.push_back( m_data+i );
+ m_free.push_back( m_data.Buffer()+i );
}
FloatRow* MercuryMatrixMemory::GetNewMatrix()
Modified: Mercury2/src/MercuryMatrix.h
===================================================================
--- Mercury2/src/MercuryMatrix.h 2010-05-16 15:28:56 UTC (rev 732)
+++ Mercury2/src/MercuryMatrix.h 2010-05-17 23:58:27 UTC (rev 733)
@@ -10,6 +10,8 @@
#include <list>
#include <MSemaphore.h>
+#include <AlignedBuffer.h>
+
///Memory holder for matrices
class MercuryMatrixMemory
{
@@ -17,15 +19,16 @@
to try to take advantage of data prefetching. Some matrix data should get a
free ride into the CPU cache. */
public:
+ MercuryMatrixMemory();
void Init();
static MercuryMatrixMemory& GetInstance();
FloatRow* GetNewMatrix();
void FreeMatrix(FloatRow* m);
private:
+ static const unsigned int rows = 1024; //1024 matrices * 64bytes each = 64kb
typedef FloatRow MatrixArray[4]; //64kb
- static const unsigned int rows = 1024; //1024 matrices * 64bytes each = 64kb
+ AlignedBuffer<MatrixArray> m_data;
std::list< MatrixArray* > m_free;
- MatrixArray m_data[rows];
MSemaphore m_lock;
};
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|