From: <axl...@us...> - 2009-03-03 02:02:43
|
Revision: 168 http://hgengine.svn.sourceforge.net/hgengine/?rev=168&view=rev Author: axlecrusher Date: 2009-03-03 02:02:34 +0000 (Tue, 03 Mar 2009) Log Message: ----------- Fix broken SSE math, I have to make these math functions safer Modified Paths: -------------- Mercury2/src/MercuryMath.cpp Mercury2/src/MercuryMath.h Mercury2/src/MercuryMatrix.cpp Modified: Mercury2/src/MercuryMath.cpp =================================================================== --- Mercury2/src/MercuryMath.cpp 2009-03-02 21:59:02 UTC (rev 167) +++ Mercury2/src/MercuryMath.cpp 2009-03-03 02:02:34 UTC (rev 168) @@ -278,25 +278,23 @@ //This is an SSE matrix vector multiply, see the standard C++ code //for a clear algorithim. This seems like it works. -void VectorMultiply4f( const FloatRow* matrix, const FloatRow* p, FloatRow* out ) +void VectorMultiply4f( const FloatRow* matrix, const FloatRow& p, FloatRow& out ) { - __m128 tmp; + __m128 tmp, XY; - //compute term 1 and term 2 and store them in the low order - //of outxmm[0] - out[0] = Hadd4( _mm_mul_ps( matrix[0], *p ) ); - tmp = Hadd4( _mm_mul_ps( matrix[1], *p ) ); - out[0] = _mm_unpacklo_ps(out[0], tmp); + //compute term X and term Y and store them in the low order of XY + XY = Hadd4( _mm_mul_ps( matrix[0], p ) ); //compute X + tmp = Hadd4( _mm_mul_ps( matrix[1], p ) ); //compute Y + XY = _mm_unpacklo_ps(XY, tmp); - //compute term 3 and term 4 and store them in the high order - //of outxmm[1] - out[1] = Hadd4( _mm_mul_ps( matrix[2], *p ) ); - tmp = Hadd4( _mm_mul_ps( matrix[3], *p ) ); - out[1] = _mm_unpacklo_ps(out[1], tmp); + //compute term Z and term W and store them in the low order of out + out = Hadd4( _mm_mul_ps( matrix[2], p ) ); //compute Z + tmp = Hadd4( _mm_mul_ps( matrix[3], p ) ); //compute W + out = _mm_unpacklo_ps(out, tmp); - //shuffle the low order of outxmm[0] into the loworder of tmp - //and shuffle the low order of outxmm[1] into the high order of tmp - *out = _mm_movelh_ps(out[0], out[1]); + //shuffle the low order of XY into the loworder of out + //and shuffle the low order of out into the high order of out + out = _mm_movelh_ps(XY, out); } void ZeroFloatRow(FloatRow& r) Modified: Mercury2/src/MercuryMath.h =================================================================== --- Mercury2/src/MercuryMath.h 2009-03-02 21:59:02 UTC (rev 167) +++ Mercury2/src/MercuryMath.h 2009-03-03 02:02:34 UTC (rev 168) @@ -52,7 +52,7 @@ void Copy8f( void * dest, const void * source ); void Copy16f( void * dest, const void * source ); void MatrixMultiply4f ( const FloatRow* in1, const FloatRow* in2, FloatRow* out ); -void VectorMultiply4f(const FloatRow* matrix, const FloatRow* p, FloatRow* out ); +void VectorMultiply4f(const FloatRow* matrix, const FloatRow& p, FloatRow& out ); void TransposeMatrix( FloatRow* m ); void Float2FloatRow(const float* f, FloatRow* r); Modified: Mercury2/src/MercuryMatrix.cpp =================================================================== --- Mercury2/src/MercuryMatrix.cpp 2009-03-02 21:59:02 UTC (rev 167) +++ Mercury2/src/MercuryMatrix.cpp 2009-03-03 02:02:34 UTC (rev 168) @@ -203,9 +203,9 @@ v.ConvertToVector4( tmp ); tmp[3] = 1; Float2FloatRow( tmp, &r ); - VectorMultiply4f( m_matrix, &r, &tvo); + VectorMultiply4f( m_matrix, r, tvo); FloatRow2Float( &tvo, tmp ); - + printf("%f %f %f %f\n", tmp[0], tmp[1], tmp[2], tmp[3]); return MercuryVertex(tmp); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |