|
From: <sv...@va...> - 2010-06-18 08:18:47
|
Author: sewardj
Date: 2010-06-18 09:18:38 +0100 (Fri, 18 Jun 2010)
New Revision: 11181
Log:
Valgrind-side changes needed to go with vex r1984 (Implement SSE4
insns: CMPGTQ PMAXUD PMINUD PMAXSB PMINSB PMULLD)
Modified:
trunk/Makefile.vex.am
trunk/memcheck/mc_translate.c
trunk/none/tests/amd64/sse4-64.c
Modified: trunk/Makefile.vex.am
===================================================================
--- trunk/Makefile.vex.am 2010-06-15 14:55:28 UTC (rev 11180)
+++ trunk/Makefile.vex.am 2010-06-18 08:18:38 UTC (rev 11181)
@@ -40,6 +40,7 @@
priv/guest_arm_defs.h \
priv/host_generic_regs.h \
priv/host_generic_simd64.h \
+ priv/host_generic_simd128.h \
priv/host_x86_defs.h \
priv/host_amd64_defs.h \
priv/host_ppc_defs.h \
@@ -90,6 +91,7 @@
priv/guest_arm_toIR.c \
priv/host_generic_regs.c \
priv/host_generic_simd64.c \
+ priv/host_generic_simd128.c \
priv/host_generic_reg_alloc2.c \
priv/host_x86_defs.c \
priv/host_x86_isel.c \
Modified: trunk/memcheck/mc_translate.c
===================================================================
--- trunk/memcheck/mc_translate.c 2010-06-15 14:55:28 UTC (rev 11180)
+++ trunk/memcheck/mc_translate.c 2010-06-18 08:18:38 UTC (rev 11181)
@@ -2279,10 +2279,12 @@
case Iop_Max32Sx4:
case Iop_Min32Ux4:
case Iop_Min32Sx4:
+ case Iop_Mul32x4:
return binary32Ix4(mce, vatom1, vatom2);
case Iop_Sub64x2:
case Iop_Add64x2:
+ case Iop_CmpGT64Sx2:
return binary64Ix2(mce, vatom1, vatom2);
case Iop_QNarrow32Sx4:
Modified: trunk/none/tests/amd64/sse4-64.c
===================================================================
--- trunk/none/tests/amd64/sse4-64.c 2010-06-15 14:55:28 UTC (rev 11180)
+++ trunk/none/tests/amd64/sse4-64.c 2010-06-18 08:18:38 UTC (rev 11181)
@@ -58,6 +58,14 @@
}
RMArgs;
+static void do64HLtoV128 ( /*OUT*/V128* res, ULong wHi, ULong wLo )
+{
+ // try to sidestep strict-aliasing snafus by memcpying explicitly
+ UChar* p = (UChar*)res;
+ memcpy(&p[8], (UChar*)&wHi, 8);
+ memcpy(&p[0], (UChar*)&wLo, 8);
+}
+
static UChar randUChar ( void )
{
static UInt seed = 80021;
@@ -2059,12 +2067,40 @@
}
+void test_PCMPGTQ ( void )
+{
+ V128 spec[7];
+ do64HLtoV128( &spec[0], 0x0000000000000000ULL, 0xffffffffffffffffULL );
+ do64HLtoV128( &spec[1], 0x0000000000000001ULL, 0xfffffffffffffffeULL );
+ do64HLtoV128( &spec[2], 0x7fffffffffffffffULL, 0x8000000000000001ULL );
+ do64HLtoV128( &spec[3], 0x8000000000000000ULL, 0x8000000000000000ULL );
+ do64HLtoV128( &spec[4], 0x8000000000000001ULL, 0x7fffffffffffffffULL );
+ do64HLtoV128( &spec[5], 0xfffffffffffffffeULL, 0x0000000000000001ULL );
+ do64HLtoV128( &spec[6], 0xffffffffffffffffULL, 0x0000000000000000ULL );
+ V128 src, dst;
+ Int i, j;
+ for (i = 0; i < 10; i++) {
+ randV128(&src);
+ randV128(&dst);
+ DO_mandr_r("pcmpgtq", src, dst);
+ }
+ for (i = 0; i < 7; i++) {
+ for (j = 0; j < 7; j++) {
+ memcpy(&src, &spec[i], 16);
+ memcpy(&dst, &spec[j], 16);
+ DO_mandr_r("pcmpgtq", src, dst);
+ }
+ }
+}
+
+
int main ( int argc, char** argv )
{
#if 1
+ // ------ SSE 4.1 ------
test_BLENDPD(); // done Apr.01.2010
test_BLENDPS(); // done Apr.02.2010
//test_PBLENDW();
@@ -2088,14 +2124,14 @@
//test_PINSRW(); // todo
//test_PINSRB(); // todo
//test_PHMINPOSUW();
- //test_PMAXSB();
+ test_PMAXSB();
test_PMAXSD(); // done Apr.09.2010
test_PMAXUD(); // done Apr.16.2010
- //test_PMAXUW();
- //test_PMINSB();
+ test_PMAXUW();
+ test_PMINSB();
test_PMINSD(); // done Apr.09.2010
test_PMINUD();
- //test_PMINUW();
+ test_PMINUW();
test_PMOVSXBW(); // done Apr.02.2010
test_PMOVSXBD(); // done Mar.30.2010
test_PMOVSXBQ(); // done Mar.30.2010
@@ -2112,13 +2148,16 @@
test_POPCNTL();
test_POPCNTQ();
//test_PMULDQ();
- //test_PMULLD();
+ test_PMULLD();
// PTEST
// ROUNDPD
// ROUNDPS
// ROUNDSD
// ROUNDSS
+ // ------ SSE 4.2 ------
+ test_PCMPGTQ();
#else
+ test_PMAXSB();
#endif
return 0;
|