[Hgengine-cvs] SF.net SVN: hgengine:[143] Mercury2/src

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Revision: 143
          http://hgengine.svn.sourceforge.net/hgengine/?rev=143&view=rev
Author:   axlecrusher
Date:     2009-01-04 16:37:53 +0000 (Sun, 04 Jan 2009)

Log Message:
-----------
First stage of overhauling mercury math so squeeze out a little more speed
Use our own FloatRow to define rows for matrices instead of float arrays.
We can define these differently if we are using SSE or not.
So far this makes matrix multiplies about %14 faster.

Modified Paths:
--------------
    Mercury2/src/MercuryMath.cpp
    Mercury2/src/MercuryMath.h
    Mercury2/src/MercuryMatrix.cpp
    Mercury2/src/MercuryMatrix.h

Modified: Mercury2/src/MercuryMath.cpp
===================================================================

--- Mercury2/src/MercuryMath.cpp	2009-01-03 17:50:45 UTC (rev 142)
+++ Mercury2/src/MercuryMath.cpp	2009-01-04 16:37:53 UTC (rev 143)
@@ -4,6 +4,11 @@
 
 //Generic Math functions. Compile these if you can not use optimized functions.
 
+void ZeroFloatRow(FloatRow& r)
+{
+	Copy4f(&r, (FloatRow){ 0.0f, 0.0f, 0.0f, 0.0f });
+}
+
 void Mul4f(const float* first, const float* second, float* out)
 {
     out[0] = first[0] * second[0];
@@ -80,8 +85,14 @@
     ((float*)dest)[15] = ((float*)source)[15];
 }
 
-void R_ConcatTransforms4 ( const float* in1, const float* in2, float* out)
+void R_ConcatTransforms4 ( const FloatRow* in1a, const FloatRow* in2a, FloatRow* outa)
 {
+	float *in1, *in2, *out;
+	
+	in1 = (float*)in1a;
+	in2 = (float*)in2a;
+	out = (float*)outa;
+	
 	out[0] = in1[0] * in2[0] + in1[1] * in2[4] +
 				in1[2] * in2[8] + in1[3] * in2[12];
 	out[1] = in1[0] * in2[1] + in1[1] * in2[5] +
@@ -128,9 +139,8 @@
 }
 
 #else
-#include <xmmintrin.h>
 
-inline __m128 Hadd4(__m128 x);
+//inline __m128 Hadd4(__m128 x);
 __m128 Hadd4(__m128 x)
 {
 	//add the low and high components of x
@@ -225,33 +235,27 @@
 	_mm_store_ps((float*)&(((float*)dest)[12]), xmm[3]);
 }
 
-void R_ConcatTransforms4 ( const float* in1, const float* in2, float* out)
+void R_ConcatTransforms4 ( const FloatRow* in1, const FloatRow* in2, FloatRow* out)
 {
-	unsigned int x, y;
-	__m128 xmm[8];
-
-	xmm[1] = _mm_load_ps((float*)&(in2[0]));
-	xmm[3] = _mm_load_ps((float*)&(in2[4]));
-	xmm[5] = _mm_load_ps((float*)&(in2[8]));
-	xmm[7] = _mm_load_ps((float*)&(in2[12]));
-
+	unsigned int y;
+	__m128 xmm[4];
+	
 	for (y = 0; y < 4; ++y)
 	{
-		xmm[0] = _mm_set_ps1(in1[(y*4)+0]);
-		xmm[2] = _mm_set_ps1(in1[(y*4)+1]);
-		xmm[4] = _mm_set_ps1(in1[(y*4)+2]);
-		xmm[6] = _mm_set_ps1(in1[(y*4)+3]);
+		//load columns
+		xmm[3] = _mm_shuffle_ps (in1[y], in1[y], 0xff);
+		xmm[2] = _mm_shuffle_ps (in1[y], in1[y], 0xaa);
+		xmm[1] = _mm_shuffle_ps (in1[y], in1[y], 0x55);
+		xmm[0] = _mm_shuffle_ps (in1[y], in1[y], 0x00);
 
-		xmm[0] = _mm_mul_ps( xmm[0], xmm[1] );
-		xmm[2] = _mm_mul_ps( xmm[2], xmm[3] );
-		xmm[4] = _mm_mul_ps( xmm[4], xmm[5] );
-		xmm[6] = _mm_mul_ps( xmm[6], xmm[7] );
+		xmm[0] = _mm_mul_ps( xmm[0], in2[0] );
+		xmm[1] = _mm_mul_ps( xmm[1], in2[1] );
+		xmm[2] = _mm_mul_ps( xmm[2], in2[2] );
+		xmm[3] = _mm_mul_ps( xmm[3], in2[3] );
 
-		xmm[0] = _mm_add_ps( xmm[0], xmm[2] );
-		xmm[4] = _mm_add_ps( xmm[4], xmm[6] );
-		xmm[0] = _mm_add_ps( xmm[0], xmm[4] );
-
-		_mm_store_ps(&(out[(y*4)]), xmm[0]);
+		xmm[0] = _mm_add_ps( xmm[0], xmm[1] );
+		xmm[2] = _mm_add_ps( xmm[2], xmm[3] );
+		out[y] = _mm_add_ps( xmm[0], xmm[2] );
 	}
 }
 
@@ -288,6 +292,11 @@
 	_mm_store_ps(out, tmp);
 }
 
+void ZeroFloatRow(FloatRow& r)
+{
+	r = (FloatRow)_mm_setzero_ps();
+}
+
 #endif
 
 /*

Modified: Mercury2/src/MercuryMath.h
===================================================================
--- Mercury2/src/MercuryMath.h	2009-01-03 17:50:45 UTC (rev 142)
+++ Mercury2/src/MercuryMath.h	2009-01-04 16:37:53 UTC (rev 143)
@@ -3,6 +3,15 @@
 
 #include <math.h>
 
+#ifdef USE_SSE
+#include <xmmintrin.h>
+typedef __m128 FloatRow __attribute__((aligned(16)));
+#else
+typedef float FloatRow[4];
+#endif
+
+void ZeroFloatRow(FloatRow& r);
+
 #define DEGRAD	0.01745329251994329576f		//degree to radian
 #define RADDEG	57.2957795130823208767f		//radian to degree
 #define	Q_PI	3.14159265358979323846f
@@ -42,7 +51,8 @@
 void Copy4f( void * dest, const void * source );
 void Copy8f( void * dest, const void * source );
 void Copy16f( void * dest, const void * source );
-void R_ConcatTransforms4 ( const float* in1, const float* in2, float* out );
+//void R_ConcatTransforms4 ( const float* in1, const float* in2, float* out );
+void R_ConcatTransforms4 ( const FloatRow* in1, const FloatRow* in2, FloatRow* out );
 void VectorMultiply4f(const float *m, float *p, float *out );
 
 #endif

Modified: Mercury2/src/MercuryMatrix.cpp
===================================================================
--- Mercury2/src/MercuryMatrix.cpp	2009-01-03 17:50:45 UTC (rev 142)
+++ Mercury2/src/MercuryMatrix.cpp	2009-01-04 16:37:53 UTC (rev 143)
@@ -25,56 +25,47 @@
 
 void MercuryMatrix::Zero()
 {
-	m_matrix[0][0] = 0;
-	m_matrix[0][1] = 0;
-	m_matrix[0][2] = 0;
-	m_matrix[0][3] = 0;
-	
-	m_matrix[1][0] = 0;
-	m_matrix[1][1] = 0;
-	m_matrix[1][2] = 0;
-	m_matrix[1][3] = 0;
-	
-	m_matrix[2][0] = 0;
-	m_matrix[2][1] = 0;
-	m_matrix[2][2] = 0;
-	m_matrix[2][3] = 0;
-	
-	m_matrix[3][0] = 0;
-	m_matrix[3][1] = 0;
-	m_matrix[3][2] = 0;
-	m_matrix[3][3] = 0;
+	ZeroFloatRow( m_matrix[0] );
+	ZeroFloatRow( m_matrix[1] );
+	ZeroFloatRow( m_matrix[2] );
+	ZeroFloatRow( m_matrix[3] );
 }
 
 void MercuryMatrix::Identity()
 {
-	m_matrix[0][0] = 1;
-	m_matrix[0][1] = 0;
-	m_matrix[0][2] = 0;
-	m_matrix[0][3] = 0;
+	Copy4f(&m_matrix[0], (void*)&((FloatRow){ 1.0f, 0.0f, 0.0f, 0.0f }));
+	Copy4f(&m_matrix[1], (void*)&((FloatRow){ 0.0f, 1.0f, 0.0f, 0.0f }));
+	Copy4f(&m_matrix[2], (void*)&((FloatRow){ 0.0f, 0.0f, 1.0f, 0.0f }));
+	Copy4f(&m_matrix[3], (void*)&((FloatRow){ 0.0f, 0.0f, 0.0f, 1.0f }));
+/*
+	(*this)[0][0] = 1;
+	(*this)[0][1] = 0;
+	(*this)[0][2] = 0;
+	(*this)[0][3] = 0;
 	
-	m_matrix[1][0] = 0;
-	m_matrix[1][1] = 1;
-	m_matrix[1][2] = 0;
-	m_matrix[1][3] = 0;
+	(*this)[1][0] = 0;
+	(*this)[1][1] = 1;
+	(*this)[1][2] = 0;
+	(*this)[1][3] = 0;
 	
-	m_matrix[2][0] = 0;
-	m_matrix[2][1] = 0;
-	m_matrix[2][2] = 1;
-	m_matrix[2][3] = 0;
+	(*this)[2][0] = 0;
+	(*this)[2][1] = 0;
+	(*this)[2][2] = 1;
+	(*this)[2][3] = 0;
 	
-	m_matrix[3][0] = 0;
-	m_matrix[3][1] = 0;
-	m_matrix[3][2] = 0;
-	m_matrix[3][3] = 1;
+	(*this)[3][0] = 0;
+	(*this)[3][1] = 0;
+	(*this)[3][2] = 0;
+	(*this)[3][3] = 1;
+	*/
 }
 
 void MercuryMatrix::Translate(float x, float y, float z)
 {
 	MercuryMatrix m;
-	m.m_matrix[0][3] = x;
-	m.m_matrix[1][3] = y;
-	m.m_matrix[2][3] = z;
+	m[0][3] = x;
+	m[1][3] = y;
+	m[2][3] = z;
 	*this *= m;
 }
 
@@ -95,25 +86,25 @@
 
 	//Row major
 	//manually transposed
-	matrix.m_matrix[0][0] = cy*cz;
-	matrix.m_matrix[1][0] = (sx*sy*cz)-(cx*sz);
-	matrix.m_matrix[2][0] = (cx*sy*cz)+(sx*sz);
-	matrix.m_matrix[3][0] = 0;
+	matrix[0][0] = cy*cz;
+	matrix[1][0] = (sx*sy*cz)-(cx*sz);
+	matrix[2][0] = (cx*sy*cz)+(sx*sz);
+	matrix[3][0] = 0;
 
-	matrix.m_matrix[0][1] = cy*sz;
-	matrix.m_matrix[1][1] = (sx*sy*sz)+(cx*cz);
-	matrix.m_matrix[2][1] = (cx*sy*sz)-(sx*cz);
-	matrix.m_matrix[3][1] = 0;
+	matrix[0][1] = cy*sz;
+	matrix[1][1] = (sx*sy*sz)+(cx*cz);
+	matrix[2][1] = (cx*sy*sz)-(sx*cz);
+	matrix[3][1] = 0;
 
-	matrix.m_matrix[0][2] = -sy;
-	matrix.m_matrix[1][2] = sx*cy;
-	matrix.m_matrix[2][2] = cx*cy;
-	matrix.m_matrix[3][2] = 0;
+	matrix[0][2] = -sy;
+	matrix[1][2] = sx*cy;
+	matrix[2][2] = cx*cy;
+	matrix[3][2] = 0;
 
-	matrix.m_matrix[0][3] = 0;
-	matrix.m_matrix[1][3] = 0;
-	matrix.m_matrix[2][3] = 0;
-	matrix.m_matrix[3][3] = 1;
+	matrix[0][3] = 0;
+	matrix[1][3] = 0;
+	matrix[2][3] = 0;
+	matrix[3][3] = 1;
 
 	*this *= matrix;	
 }
@@ -127,25 +118,25 @@
 	float y = iy/absin;
 	float z = iz/absin;
 
-	m_matrix[0][0] = x*x*(1-c)+c;
-	m_matrix[0][1] = x*y*(1-c)-z*s;
-	m_matrix[0][2] = x*z*(1-c)+y*s;
-	m_matrix[0][3] = 0;
+	(*this)[0][0] = x*x*(1-c)+c;
+	(*this)[0][1] = x*y*(1-c)-z*s;
+	(*this)[0][2] = x*z*(1-c)+y*s;
+	(*this)[0][3] = 0;
 
-	m_matrix[1][0] = y*x*(1-c)+z*s;
-	m_matrix[1][1] = y*y*(1-c)+c;
-	m_matrix[1][2] = y*z*(1-c)-x*s;
-	m_matrix[1][3] = 0;
+	(*this)[1][0] = y*x*(1-c)+z*s;
+	(*this)[1][1] = y*y*(1-c)+c;
+	(*this)[1][2] = y*z*(1-c)-x*s;
+	(*this)[1][3] = 0;
 
-	m_matrix[2][0] = x*z*(1-c)-y*s;
-	m_matrix[2][1] = y*z*(1-c)+x*s;
-	m_matrix[2][2] = z*z*(1-c)+c;
-	m_matrix[2][3] = 0;
+	(*this)[2][0] = x*z*(1-c)-y*s;
+	(*this)[2][1] = y*z*(1-c)+x*s;
+	(*this)[2][2] = z*z*(1-c)+c;
+	(*this)[2][3] = 0;
 
-	m_matrix[3][0] = 0;
-	m_matrix[3][1] = 0;
-	m_matrix[3][2] = 0;
-	m_matrix[3][3] = 1;
+	(*this)[3][0] = 0;
+	(*this)[3][1] = 0;
+	(*this)[3][2] = 0;
+	(*this)[3][3] = 1;
 }
 
 void MercuryMatrix::Transotale( float tX, float tY, float tZ, float rX, float rY, float rZ, float sX, float sY, float sZ )
@@ -165,25 +156,25 @@
 
 	//Row major
 	//manually transposed
-	matrix.m_matrix[0][0] = sX*cy*cz;
-	matrix.m_matrix[1][0] = sX*((sx*sy*cz)-(cx*sz));
-	matrix.m_matrix[2][0] = sX*((cx*sy*cz)+(sx*sz));
-	matrix.m_matrix[3][0] = 0;
+	matrix[0][0] = sX*cy*cz;
+	matrix[1][0] = sX*((sx*sy*cz)-(cx*sz));
+	matrix[2][0] = sX*((cx*sy*cz)+(sx*sz));
+	matrix[3][0] = 0;
 
-	matrix.m_matrix[0][1] = sY*cy*sz;
-	matrix.m_matrix[1][1] = sY*((sx*sy*sz)+(cx*cz));
-	matrix.m_matrix[2][1] = sY*((cx*sy*sz)-(sx*cz));
-	matrix.m_matrix[3][1] = 0;
+	matrix[0][1] = sY*cy*sz;
+	matrix[1][1] = sY*((sx*sy*sz)+(cx*cz));
+	matrix[2][1] = sY*((cx*sy*sz)-(sx*cz));
+	matrix[3][1] = 0;
 
-	matrix.m_matrix[0][2] = sZ*(-sy);
-	matrix.m_matrix[1][2] = sZ*sx*cy;
-	matrix.m_matrix[2][2] = sZ*cx*cy;
-	matrix.m_matrix[3][2] = 0;
+	matrix[0][2] = sZ*(-sy);
+	matrix[1][2] = sZ*sx*cy;
+	matrix[2][2] = sZ*cx*cy;
+	matrix[3][2] = 0;
 
-	matrix.m_matrix[0][3] = tX;
-	matrix.m_matrix[1][3] = tY;
-	matrix.m_matrix[2][3] = tZ;
-	matrix.m_matrix[3][3] = 1;
+	matrix[0][3] = tX;
+	matrix[1][3] = tY;
+	matrix[2][3] = tZ;
+	matrix[3][3] = 1;
 
 	*this *= matrix;	
 }
@@ -193,9 +184,9 @@
 {
 	MercuryMatrix m;
 
-	m.m_matrix[0][0] = x;
-	m.m_matrix[1][1] = y;
-	m.m_matrix[2][2] = z;
+	m[0][0] = x;
+	m[1][1] = y;
+	m[2][2] = z;
 
 	*this *= m;
 }
@@ -203,14 +194,16 @@
 MercuryMatrix MercuryMatrix::operator*(const MercuryMatrix& m) const
 {
 	MercuryMatrix r(*this);
-	R_ConcatTransforms4 ( (float*)&m_matrix, (float*)&m.m_matrix, (float*)&r.m_matrix);
+//	R_ConcatTransforms4 ( (float*)&m_matrix, (float*)&m.m_matrix, (float*)&r.m_matrix);
+	R_ConcatTransforms4 ( m_matrix, m.m_matrix, r.m_matrix);
 	return r;
 }
 
 MercuryMatrix& MercuryMatrix::operator*=(const MercuryMatrix& m) 
 {
 	MercuryMatrix r(*this);
-	R_ConcatTransforms4 ( (float*)&r.m_matrix, (float*)&m.m_matrix, (float*)&m_matrix);
+//	R_ConcatTransforms4 ( (float*)&r.m_matrix, (float*)&m.m_matrix, (float*)&m_matrix);
+	R_ConcatTransforms4 ( r.m_matrix, m.m_matrix, m_matrix);
 	return *this;
 }
 
@@ -218,7 +211,7 @@
 {
 	for (int i = 0; i < 4; ++i)
 	{
-		printf( "%f %f %f %f\n", m_matrix[i][0], m_matrix[i][1], m_matrix[i][2], m_matrix[i][3] );
+		printf( "%f %f %f %f\n", (*this)[i][0], (*this)[i][1], (*this)[i][2], (*this)[i][3] );
 	}
 	printf("\n");
 }

Modified: Mercury2/src/MercuryMatrix.h
===================================================================
--- Mercury2/src/MercuryMatrix.h	2009-01-03 17:50:45 UTC (rev 142)
+++ Mercury2/src/MercuryMatrix.h	2009-01-04 16:37:53 UTC (rev 143)
@@ -15,13 +15,14 @@
 {
 private:
 	///[row][column] (The internal matrix)
-	float m_matrix[4][4];
+//	float m_matrix[4][4];
+	FloatRow m_matrix[4];
 public:
 	MercuryMatrix();
 	inline MercuryMatrix(const MercuryMatrix& m) { *this = m; }
-	inline float* operator[](unsigned int i) { return m_matrix[i]; }
+	inline float* operator[](unsigned int i) { return (float*)&m_matrix[i]; }
 	///Allow typecasting to float * for use in APIs
-	inline const float* operator[](unsigned int i) const { return m_matrix[i]; }
+	inline const float* operator[](unsigned int i) const { return (float*)&m_matrix[i]; }
 	const MercuryMatrix& operator=(const MercuryMatrix& m);
 	const MercuryMatrix& operator=(const float* m);
 	inline float* Ptr() { return (float*)&m_matrix; }


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.