|
From: <axl...@us...> - 2009-01-04 18:07:45
|
Revision: 145
http://hgengine.svn.sourceforge.net/hgengine/?rev=145&view=rev
Author: axlecrusher
Date: 2009-01-04 18:07:42 +0000 (Sun, 04 Jan 2009)
Log Message:
-----------
improve the vector multiply
Modified Paths:
--------------
Mercury2/src/MercuryMath.cpp
Mercury2/src/MercuryMath.h
Modified: Mercury2/src/MercuryMath.cpp
===================================================================
--- Mercury2/src/MercuryMath.cpp 2009-01-04 17:50:45 UTC (rev 144)
+++ Mercury2/src/MercuryMath.cpp 2009-01-04 18:07:42 UTC (rev 145)
@@ -130,8 +130,11 @@
in1[14] * in2[11] + in1[15] * in2[15];
}
-void VectorMultiply4f( const float * m, float *p, float *out )
+void VectorMultiply4f( const FloatRow* matrix, const FloatRow* pa, FloatRow* outa )
{
+ float *m = (float*)matrix;
+ float *p = (float*)pa;
+ float *out = (float*)outa;
out[0] = p[0] * m[0] + p[1] * m[1] + p[2] * m[2] + p[3] * m[3];
out[1] = p[0] * m[4] + p[1] * m[5] + p[2] * m[6] + p[3] * m[7];
out[2] = p[0] * m[8] + p[1] * m[9] + p[2] * m[10] + p[3] * m[11];
@@ -231,35 +234,25 @@
//This is an SSE matrix vector multiply, see the standard C++ code
//for a clear algorithim. This seems like it works.
-void VectorMultiply4f( const float * m, float *p, float *out )
+void VectorMultiply4f( const FloatRow* matrix, const FloatRow* p, FloatRow* out )
{
- __m128 xmm[5], outxmm[2], tmp;
-
- xmm[0] = _mm_load_ps((float*)p); //the vector
-
- //store the matrix
- xmm[1] = _mm_load_ps((float*)&(m[0]));
- xmm[2] = _mm_load_ps((float*)&(m[4]));
- xmm[3] = _mm_load_ps((float*)&(m[8]));
- xmm[4] = _mm_load_ps((float*)&(m[12]));
-
+ __m128 tmp;
+
//compute term 1 and term 2 and store them in the low order
//of outxmm[0]
- outxmm[0] = Hadd4( _mm_mul_ps( xmm[1], xmm[0] ) );
- tmp = Hadd4( _mm_mul_ps( xmm[2], xmm[0] ) );
- outxmm[0] = _mm_unpacklo_ps(outxmm[0], tmp);
+ out[0] = Hadd4( _mm_mul_ps( matrix[1], *p ) );
+ tmp = Hadd4( _mm_mul_ps( matrix[2], *p ) );
+ out[0] = _mm_unpacklo_ps(out[0], tmp);
//compute term 3 and term 4 and store them in the high order
//of outxmm[1]
- outxmm[1] = Hadd4( _mm_mul_ps( xmm[3], xmm[0] ) );
- tmp = Hadd4( _mm_mul_ps( xmm[4], xmm[0] ) );
- outxmm[1] = _mm_unpacklo_ps(outxmm[1], tmp);
+ out[1] = Hadd4( _mm_mul_ps( matrix[3], *p ) );
+ tmp = Hadd4( _mm_mul_ps( matrix[4], *p ) );
+ out[1] = _mm_unpacklo_ps(out[1], tmp);
//shuffle the low order of outxmm[0] into the loworder of tmp
//and shuffle the low order of outxmm[1] into the high order of tmp
- tmp = _mm_movelh_ps(outxmm[0], outxmm[1]);
-
- _mm_store_ps(out, tmp);
+ *out = _mm_movelh_ps(out[0], out[1]);
}
void ZeroFloatRow(FloatRow& r)
Modified: Mercury2/src/MercuryMath.h
===================================================================
--- Mercury2/src/MercuryMath.h 2009-01-04 17:50:45 UTC (rev 144)
+++ Mercury2/src/MercuryMath.h 2009-01-04 18:07:42 UTC (rev 145)
@@ -52,7 +52,7 @@
void Copy8f( void * dest, const void * source );
void Copy16f( void * dest, const void * source );
void MatrixMultiply4f ( const FloatRow* in1, const FloatRow* in2, FloatRow* out );
-void VectorMultiply4f(const float *m, float *p, float *out );
+void VectorMultiply4f(const FloatRow* matrix, const FloatRow* p, FloatRow* out );
#endif
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|