|
From: <sv...@va...> - 2011-10-19 15:28:51
|
Author: sewardj
Date: 2011-10-19 16:24:01 +0100 (Wed, 19 Oct 2011)
New Revision: 2218
Log:
Implement the SSE4.1 insn PCMPEQQ. n-i-bz. (VEX side changes)
** MERGE TO AVX **
Modified:
trunk/priv/guest_amd64_toIR.c
trunk/priv/host_amd64_isel.c
trunk/priv/host_generic_simd128.c
trunk/priv/host_generic_simd128.h
trunk/priv/ir_defs.c
trunk/pub/libvex_ir.h
Modified: trunk/priv/guest_amd64_toIR.c
===================================================================
--- trunk/priv/guest_amd64_toIR.c 2011-10-19 14:50:27 UTC (rev 2217)
+++ trunk/priv/guest_amd64_toIR.c 2011-10-19 15:24:01 UTC (rev 2218)
@@ -16135,6 +16135,17 @@
goto decode_success;
}
+ /* 66 0F 38 29 = PCMPEQQ
+ 64x2 equality comparison
+ */
+ if ( have66noF2noF3( pfx ) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x29) {
+ /* FIXME: this needs an alignment check */
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+3,
+ "pcmpeqq", Iop_CmpEQ64x2, False );
+ goto decode_success;
+ }
+
/* ---------------------------------------------------- */
/* --- end of the SSE4 decoder --- */
/* ---------------------------------------------------- */
Modified: trunk/priv/host_amd64_isel.c
===================================================================
--- trunk/priv/host_amd64_isel.c 2011-10-19 14:50:27 UTC (rev 2217)
+++ trunk/priv/host_amd64_isel.c 2011-10-19 15:24:01 UTC (rev 2218)
@@ -3658,6 +3658,8 @@
goto do_SseAssistedBinary;
case Iop_Min8Sx16: fn = (HWord)h_generic_calc_Min8Sx16;
goto do_SseAssistedBinary;
+ case Iop_CmpEQ64x2: fn = (HWord)h_generic_calc_CmpEQ64x2;
+ goto do_SseAssistedBinary;
case Iop_CmpGT64Sx2: fn = (HWord)h_generic_calc_CmpGT64Sx2;
goto do_SseAssistedBinary;
case Iop_QNarrowBin32Sto16Ux8:
Modified: trunk/priv/host_generic_simd128.c
===================================================================
--- trunk/priv/host_generic_simd128.c 2011-10-19 14:50:27 UTC (rev 2217)
+++ trunk/priv/host_generic_simd128.c 2011-10-19 15:24:01 UTC (rev 2218)
@@ -88,6 +88,12 @@
return toUChar((xx < yy) ? xx : yy);
}
+static inline ULong cmpEQ64 ( Long xx, Long yy )
+{
+ return (((Long)xx) == ((Long)yy))
+ ? 0xFFFFFFFFFFFFFFFFULL : 0ULL;
+}
+
static inline ULong cmpGT64S ( Long xx, Long yy )
{
return (((Long)xx) > ((Long)yy))
@@ -225,6 +231,13 @@
res->w8[15] = min8S(argL->w8[15], argR->w8[15]);
}
+void h_generic_calc_CmpEQ64x2 ( /*OUT*/V128* res,
+ V128* argL, V128* argR )
+{
+ res->w64[0] = cmpEQ64(argL->w64[0], argR->w64[0]);
+ res->w64[1] = cmpEQ64(argL->w64[1], argR->w64[1]);
+}
+
void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128* res,
V128* argL, V128* argR )
{
Modified: trunk/priv/host_generic_simd128.h
===================================================================
--- trunk/priv/host_generic_simd128.h 2011-10-19 14:50:27 UTC (rev 2217)
+++ trunk/priv/host_generic_simd128.h 2011-10-19 15:24:01 UTC (rev 2218)
@@ -57,6 +57,7 @@
extern void h_generic_calc_Min16Ux8 ( /*OUT*/V128*, V128*, V128* );
extern void h_generic_calc_Max8Sx16 ( /*OUT*/V128*, V128*, V128* );
extern void h_generic_calc_Min8Sx16 ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_CmpEQ64x2 ( /*OUT*/V128*, V128*, V128* );
extern void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128*, V128*, V128* );
extern void h_generic_calc_SarN64x2 ( /*OUT*/V128*, V128*, UInt );
extern void h_generic_calc_SarN8x16 ( /*OUT*/V128*, V128*, UInt );
Modified: trunk/priv/ir_defs.c
===================================================================
--- trunk/priv/ir_defs.c 2011-10-19 14:50:27 UTC (rev 2217)
+++ trunk/priv/ir_defs.c 2011-10-19 15:24:01 UTC (rev 2218)
@@ -777,6 +777,7 @@
case Iop_CmpEQ8x16: vex_printf("CmpEQ8x16"); return;
case Iop_CmpEQ16x8: vex_printf("CmpEQ16x8"); return;
case Iop_CmpEQ32x4: vex_printf("CmpEQ32x4"); return;
+ case Iop_CmpEQ64x2: vex_printf("CmpEQ64x2"); return;
case Iop_CmpGT8Sx16: vex_printf("CmpGT8Sx16"); return;
case Iop_CmpGT16Sx8: vex_printf("CmpGT16Sx8"); return;
case Iop_CmpGT32Sx4: vex_printf("CmpGT32Sx4"); return;
@@ -2434,6 +2435,7 @@
case Iop_Min8Sx16: case Iop_Min16Sx8: case Iop_Min32Sx4:
case Iop_Min8Ux16: case Iop_Min16Ux8: case Iop_Min32Ux4:
case Iop_CmpEQ8x16: case Iop_CmpEQ16x8: case Iop_CmpEQ32x4:
+ case Iop_CmpEQ64x2:
case Iop_CmpGT8Sx16: case Iop_CmpGT16Sx8: case Iop_CmpGT32Sx4:
case Iop_CmpGT64Sx2:
case Iop_CmpGT8Ux16: case Iop_CmpGT16Ux8: case Iop_CmpGT32Ux4:
Modified: trunk/pub/libvex_ir.h
===================================================================
--- trunk/pub/libvex_ir.h 2011-10-19 14:50:27 UTC (rev 2217)
+++ trunk/pub/libvex_ir.h 2011-10-19 15:24:01 UTC (rev 2218)
@@ -1170,7 +1170,7 @@
Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4,
/* COMPARISON */
- Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4,
+ Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2,
Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2,
Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4,
|