|
From: <sv...@va...> - 2011-06-15 16:09:58
|
Author: sewardj
Date: 2011-06-15 17:05:07 +0100 (Wed, 15 Jun 2011)
New Revision: 2160
Log:
Implement PACKUSDW (SSE4.1). Fixes #274776.
Modified:
trunk/priv/guest_amd64_toIR.c
trunk/priv/host_amd64_isel.c
trunk/priv/host_generic_simd128.c
trunk/priv/host_generic_simd128.h
trunk/priv/ir_defs.c
trunk/pub/libvex_ir.h
Modified: trunk/priv/guest_amd64_toIR.c
===================================================================
--- trunk/priv/guest_amd64_toIR.c 2011-06-15 15:09:37 UTC (rev 2159)
+++ trunk/priv/guest_amd64_toIR.c 2011-06-15 16:05:07 UTC (rev 2160)
@@ -15999,6 +15999,40 @@
goto decode_success;
}
+ /* 66 0f 38 2B /r = PACKUSDW xmm1, xmm2/m128
+ 2x 32x4 S->U saturating narrow from xmm2/m128 to xmm1 */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x2B ) {
+
+ modrm = insn[3];
+
+ IRTemp argL = newTemp(Ity_V128);
+ IRTemp argR = newTemp(Ity_V128);
+
+ if ( epartIsReg(modrm) ) {
+ assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) );
+ delta += 3+1;
+ DIP( "packusdw %s,%s\n",
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( argL, loadLE( Ity_V128, mkexpr(addr) ));
+ delta += 3+alen;
+ DIP( "packusdw %s,%s\n",
+ dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) ));
+
+ putXMMReg( gregOfRexRM(pfx, modrm),
+ binop( Iop_QNarrow32Sto16Ux8, mkexpr(argL), mkexpr(argR)) );
+
+ goto decode_success;
+ }
+
/* ---------------------------------------------------- */
/* --- end of the SSE4 decoder --- */
/* ---------------------------------------------------- */
Modified: trunk/priv/host_amd64_isel.c
===================================================================
--- trunk/priv/host_amd64_isel.c 2011-06-15 15:09:37 UTC (rev 2159)
+++ trunk/priv/host_amd64_isel.c 2011-06-15 16:05:07 UTC (rev 2160)
@@ -3660,6 +3660,9 @@
goto do_SseAssistedBinary;
case Iop_CmpGT64Sx2: fn = (HWord)h_generic_calc_CmpGT64Sx2;
goto do_SseAssistedBinary;
+ case Iop_QNarrow32Sto16Ux8:
+ fn = (HWord)h_generic_calc_QNarrow32Sto16Ux8;
+ goto do_SseAssistedBinary;
do_SseAssistedBinary: {
/* RRRufff! RRRufff code is what we're generating here. Oh
well. */
Modified: trunk/priv/host_generic_simd128.c
===================================================================
--- trunk/priv/host_generic_simd128.c 2011-06-15 15:09:37 UTC (rev 2159)
+++ trunk/priv/host_generic_simd128.c 2011-06-15 16:05:07 UTC (rev 2160)
@@ -104,6 +104,14 @@
return toUChar(((Char)v) >> n);
}
+static inline UShort qnarrow32Sto16U ( UInt xx0 )
+{
+ Int xx = (Int)xx0;
+ if (xx < 0) xx = 0;
+ if (xx > 65535) xx = 65535;
+ return (UShort)xx;
+}
+
void h_generic_calc_Mul32x4 ( /*OUT*/V128* res,
V128* argL, V128* argR )
{
@@ -263,6 +271,20 @@
res->w8[15] = sar8(argL->w8[15], nn);
}
+void h_generic_calc_QNarrow32Sto16Ux8 ( /*OUT*/V128* res,
+ V128* argL, V128* argR )
+{
+ res->w16[0] = qnarrow32Sto16U(argR->w32[0]);
+ res->w16[1] = qnarrow32Sto16U(argR->w32[1]);
+ res->w16[2] = qnarrow32Sto16U(argR->w32[2]);
+ res->w16[3] = qnarrow32Sto16U(argR->w32[3]);
+ res->w16[4] = qnarrow32Sto16U(argL->w32[0]);
+ res->w16[5] = qnarrow32Sto16U(argL->w32[1]);
+ res->w16[6] = qnarrow32Sto16U(argL->w32[2]);
+ res->w16[7] = qnarrow32Sto16U(argL->w32[3]);
+}
+
+
/*---------------------------------------------------------------*/
/*--- end host_generic_simd128.c ---*/
/*---------------------------------------------------------------*/
Modified: trunk/priv/host_generic_simd128.h
===================================================================
--- trunk/priv/host_generic_simd128.h 2011-06-15 15:09:37 UTC (rev 2159)
+++ trunk/priv/host_generic_simd128.h 2011-06-15 16:05:07 UTC (rev 2160)
@@ -61,7 +61,10 @@
extern void h_generic_calc_SarN64x2 ( /*OUT*/V128*, V128*, UInt );
extern void h_generic_calc_SarN8x16 ( /*OUT*/V128*, V128*, UInt );
+extern void h_generic_calc_QNarrow32Sto16Ux8
+ ( /*OUT*/V128*, V128*, V128* );
+
#endif /* ndef __VEX_HOST_GENERIC_SIMD128_H */
/*---------------------------------------------------------------*/
Modified: trunk/priv/ir_defs.c
===================================================================
--- trunk/priv/ir_defs.c 2011-06-15 15:09:37 UTC (rev 2159)
+++ trunk/priv/ir_defs.c 2011-06-15 16:05:07 UTC (rev 2160)
@@ -846,6 +846,7 @@
case Iop_Narrow16x8: vex_printf("Narrow16x8"); return;
case Iop_Narrow32x4: vex_printf("Narrow32x4"); return;
+ case Iop_QNarrow32Sto16Ux8: vex_printf("QNarrow32Sto16Ux8"); return;
case Iop_QNarrow16Sto8Ux16: vex_printf("QNarrow16Sto8Ux16"); return;
case Iop_QNarrow32Uto16Ux8: vex_printf("QNarrow32Uto16Ux8"); return;
case Iop_QNarrow16Sto8Sx16: vex_printf("QNarrow16Sto8Sx16"); return;
@@ -2418,7 +2419,7 @@
case Iop_Sar8x16: case Iop_Sar16x8: case Iop_Sar32x4: case Iop_Sar64x2:
case Iop_Sal8x16: case Iop_Sal16x8: case Iop_Sal32x4: case Iop_Sal64x2:
case Iop_Rol8x16: case Iop_Rol16x8: case Iop_Rol32x4:
- case Iop_QNarrow16Sto8Ux16:
+ case Iop_QNarrow16Sto8Ux16: case Iop_QNarrow32Sto16Ux8:
case Iop_QNarrow16Sto8Sx16: case Iop_QNarrow32Sto16Sx8:
case Iop_QNarrow32Uto16Ux8:
case Iop_Narrow16x8: case Iop_Narrow32x4:
Modified: trunk/pub/libvex_ir.h
===================================================================
--- trunk/pub/libvex_ir.h 2011-06-15 15:09:37 UTC (rev 2159)
+++ trunk/pub/libvex_ir.h 2011-06-15 16:05:07 UTC (rev 2160)
@@ -1195,7 +1195,7 @@
/* NARROWING -- narrow 2xV128 into 1xV128, hi half from left arg */
/* See comments above w.r.t. U vs S issues in saturated narrowing. */
- Iop_QNarrow16Sto8Ux16,
+ Iop_QNarrow16Sto8Ux16, Iop_QNarrow32Sto16Ux8,
Iop_QNarrow16Sto8Sx16, Iop_QNarrow32Sto16Sx8,
Iop_QNarrow16Uto8Ux16, Iop_QNarrow32Uto16Ux8,
Iop_Narrow16x8, Iop_Narrow32x4,
|