|
From: <sv...@va...> - 2011-06-15 15:14:28
|
Author: sewardj
Date: 2011-06-15 16:09:37 +0100 (Wed, 15 Jun 2011)
New Revision: 2159
Log:
Partially fix underspecification of saturating narrowing primops that
became apparent whilst looking into the problem of implementing the
SSE4 packusdw instruction. Probably breaks Altivec.
Modified:
trunk/priv/guest_amd64_toIR.c
trunk/priv/guest_ppc_toIR.c
trunk/priv/guest_x86_toIR.c
trunk/priv/host_amd64_isel.c
trunk/priv/host_generic_simd64.c
trunk/priv/host_generic_simd64.h
trunk/priv/host_ppc_isel.c
trunk/priv/host_x86_isel.c
trunk/priv/ir_defs.c
trunk/pub/libvex_ir.h
trunk/test_main.c
Modified: trunk/priv/guest_amd64_toIR.c
===================================================================
--- trunk/priv/guest_amd64_toIR.c 2011-06-07 21:28:38 UTC (rev 2158)
+++ trunk/priv/guest_amd64_toIR.c 2011-06-15 15:09:37 UTC (rev 2159)
@@ -6504,9 +6504,9 @@
case 0x65: op = Iop_CmpGT16Sx4; break;
case 0x66: op = Iop_CmpGT32Sx2; break;
- case 0x6B: op = Iop_QNarrow32Sx2; eLeft = True; break;
- case 0x63: op = Iop_QNarrow16Sx4; eLeft = True; break;
- case 0x67: op = Iop_QNarrow16Ux4; eLeft = True; break;
+ case 0x6B: op = Iop_QNarrow32Sto16Sx4; eLeft = True; break;
+ case 0x63: op = Iop_QNarrow16Sto8Sx8; eLeft = True; break;
+ case 0x67: op = Iop_QNarrow16Sto8Ux8; eLeft = True; break;
case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break;
case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
@@ -11786,7 +11786,8 @@
if (have66noF2noF3(pfx) && sz == 2
&& insn[0] == 0x0F && insn[1] == 0x6B) {
delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
- "packssdw", Iop_QNarrow32Sx4, True );
+ "packssdw",
+ Iop_QNarrow32Sto16Sx8, True );
goto decode_success;
}
@@ -11794,7 +11795,8 @@
if (have66noF2noF3(pfx) && sz == 2
&& insn[0] == 0x0F && insn[1] == 0x63) {
delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
- "packsswb", Iop_QNarrow16Sx8, True );
+ "packsswb",
+ Iop_QNarrow16Sto8Sx16, True );
goto decode_success;
}
@@ -11802,7 +11804,8 @@
if (have66noF2noF3(pfx) && sz == 2
&& insn[0] == 0x0F && insn[1] == 0x67) {
delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
- "packuswb", Iop_QNarrow16Ux8, True );
+ "packuswb",
+ Iop_QNarrow16Sto8Ux16, True );
goto decode_success;
}
Modified: trunk/priv/guest_ppc_toIR.c
===================================================================
--- trunk/priv/guest_ppc_toIR.c 2011-06-07 21:28:38 UTC (rev 2158)
+++ trunk/priv/guest_ppc_toIR.c 2011-06-15 15:09:37 UTC (rev 2159)
@@ -9418,7 +9418,7 @@
mkU8(15))) );
putVReg( vD_addr,
- binop(Iop_QNarrow32Sx4, mkexpr(zHi), mkexpr(zLo)) );
+ binop(Iop_QNarrow32Sto16Sx8, mkexpr(zHi), mkexpr(zLo)) );
break;
}
case 0x21: { // vmhraddshs (Mult High Round, Add Signed HW Saturate, AV p186)
@@ -9452,7 +9452,8 @@
mkexpr(aHi), mkexpr(bHi))),
mkU8(15))) );
- putVReg( vD_addr, binop(Iop_QNarrow32Sx4, mkexpr(zHi), mkexpr(zLo)) );
+ putVReg( vD_addr,
+ binop(Iop_QNarrow32Sto16Sx8, mkexpr(zHi), mkexpr(zLo)) );
break;
}
case 0x22: { // vmladduhm (Mult Low, Add Unsigned HW Modulo, AV p194)
@@ -9965,14 +9966,14 @@
case 0x08E: // vpkuhus (Pack Unsigned HW Unsigned Saturate, AV p225)
DIP("vpkuhus v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
putVReg( vD_addr,
- binop(Iop_QNarrow16Ux8, mkexpr(vA), mkexpr(vB)) );
+ binop(Iop_QNarrow16Uto8Ux16, mkexpr(vA), mkexpr(vB)) );
// TODO: set VSCR[SAT]
return True;
case 0x0CE: // vpkuwus (Pack Unsigned W Unsigned Saturate, AV p227)
DIP("vpkuwus v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
putVReg( vD_addr,
- binop(Iop_QNarrow32Ux4, mkexpr(vA), mkexpr(vB)) );
+ binop(Iop_QNarrow32Uto16Ux8, mkexpr(vA), mkexpr(vB)) );
// TODO: set VSCR[SAT]
return True;
@@ -9991,7 +9992,7 @@
unop(Iop_NotV128,
binop(Iop_SarN16x8,
mkexpr(vB), mkU8(15)))) );
- putVReg( vD_addr, binop(Iop_QNarrow16Ux8,
+ putVReg( vD_addr, binop(Iop_QNarrow16Uto8Ux16,
mkexpr(vA_tmp), mkexpr(vB_tmp)) );
// TODO: set VSCR[SAT]
return True;
@@ -10011,7 +10012,7 @@
unop(Iop_NotV128,
binop(Iop_SarN32x4,
mkexpr(vB), mkU8(31)))) );
- putVReg( vD_addr, binop(Iop_QNarrow32Ux4,
+ putVReg( vD_addr, binop(Iop_QNarrow32Uto16Ux8,
mkexpr(vA_tmp), mkexpr(vB_tmp)) );
// TODO: set VSCR[SAT]
return True;
@@ -10019,14 +10020,14 @@
case 0x18E: // vpkshss (Pack Signed HW Signed Saturate, AV p220)
DIP("vpkshss v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
putVReg( vD_addr,
- binop(Iop_QNarrow16Sx8, mkexpr(vA), mkexpr(vB)) );
+ binop(Iop_QNarrow16Sto8Sx16, mkexpr(vA), mkexpr(vB)) );
// TODO: set VSCR[SAT]
return True;
case 0x1CE: // vpkswss (Pack Signed W Signed Saturate, AV p222)
DIP("vpkswss v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
putVReg( vD_addr,
- binop(Iop_QNarrow32Sx4, mkexpr(vA), mkexpr(vB)) );
+ binop(Iop_QNarrow32Sto16Sx8, mkexpr(vA), mkexpr(vB)) );
// TODO: set VSCR[SAT]
return True;
Modified: trunk/priv/guest_x86_toIR.c
===================================================================
--- trunk/priv/guest_x86_toIR.c 2011-06-07 21:28:38 UTC (rev 2158)
+++ trunk/priv/guest_x86_toIR.c 2011-06-15 15:09:37 UTC (rev 2159)
@@ -5475,9 +5475,9 @@
case 0x65: op = Iop_CmpGT16Sx4; break;
case 0x66: op = Iop_CmpGT32Sx2; break;
- case 0x6B: op = Iop_QNarrow32Sx2; eLeft = True; break;
- case 0x63: op = Iop_QNarrow16Sx4; eLeft = True; break;
- case 0x67: op = Iop_QNarrow16Ux4; eLeft = True; break;
+ case 0x6B: op = Iop_QNarrow32Sto16Sx4; eLeft = True; break;
+ case 0x63: op = Iop_QNarrow16Sto8Sx8; eLeft = True; break;
+ case 0x67: op = Iop_QNarrow16Sto8Ux8; eLeft = True; break;
case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break;
case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
@@ -10532,21 +10532,24 @@
/* 66 0F 6B = PACKSSDW */
if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6B) {
delta = dis_SSEint_E_to_G( sorb, delta+2,
- "packssdw", Iop_QNarrow32Sx4, True );
+ "packssdw",
+ Iop_QNarrow32Sto16Sx8, True );
goto decode_success;
}
/* 66 0F 63 = PACKSSWB */
if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x63) {
delta = dis_SSEint_E_to_G( sorb, delta+2,
- "packsswb", Iop_QNarrow16Sx8, True );
+ "packsswb",
+ Iop_QNarrow16Sto8Sx16, True );
goto decode_success;
}
/* 66 0F 67 = PACKUSWB */
if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x67) {
delta = dis_SSEint_E_to_G( sorb, delta+2,
- "packuswb", Iop_QNarrow16Ux8, True );
+ "packuswb",
+ Iop_QNarrow16Sto8Ux16, True );
goto decode_success;
}
Modified: trunk/priv/host_amd64_isel.c
===================================================================
--- trunk/priv/host_amd64_isel.c 2011-06-07 21:28:38 UTC (rev 2158)
+++ trunk/priv/host_amd64_isel.c 2011-06-15 15:09:37 UTC (rev 2159)
@@ -1094,12 +1094,12 @@
case Iop_QAdd16Ux4:
fn = (HWord)h_generic_calc_QAdd16Ux4; break;
- case Iop_QNarrow32Sx2:
- fn = (HWord)h_generic_calc_QNarrow32Sx2; break;
- case Iop_QNarrow16Sx4:
- fn = (HWord)h_generic_calc_QNarrow16Sx4; break;
- case Iop_QNarrow16Ux4:
- fn = (HWord)h_generic_calc_QNarrow16Ux4; break;
+ case Iop_QNarrow32Sto16Sx4:
+ fn = (HWord)h_generic_calc_QNarrow32Sto16Sx4; break;
+ case Iop_QNarrow16Sto8Sx8:
+ fn = (HWord)h_generic_calc_QNarrow16Sto8Sx8; break;
+ case Iop_QNarrow16Sto8Ux8:
+ fn = (HWord)h_generic_calc_QNarrow16Sto8Ux8; break;
case Iop_QSub8Sx8:
fn = (HWord)h_generic_calc_QSub8Sx8; break;
@@ -3544,11 +3544,11 @@
return dst;
}
- case Iop_QNarrow32Sx4:
+ case Iop_QNarrow32Sto16Sx8:
op = Asse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
- case Iop_QNarrow16Sx8:
+ case Iop_QNarrow16Sto8Sx16:
op = Asse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
- case Iop_QNarrow16Ux8:
+ case Iop_QNarrow16Sto8Ux16:
op = Asse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
case Iop_InterleaveHI8x16:
Modified: trunk/priv/host_generic_simd64.c
===================================================================
--- trunk/priv/host_generic_simd64.c 2011-06-07 21:28:38 UTC (rev 2158)
+++ trunk/priv/host_generic_simd64.c 2011-06-15 15:09:37 UTC (rev 2159)
@@ -272,7 +272,7 @@
return toUChar(xx==0 ? 0 : 0xFF);
}
-static inline Short qnarrow32Sto16 ( UInt xx0 )
+static inline Short qnarrow32Sto16S ( UInt xx0 )
{
Int xx = (Int)xx0;
if (xx < -32768) xx = -32768;
@@ -280,7 +280,7 @@
return (Short)xx;
}
-static inline Char qnarrow16Sto8 ( UShort xx0 )
+static inline Char qnarrow16Sto8S ( UShort xx0 )
{
Short xx = (Short)xx0;
if (xx < -128) xx = -128;
@@ -288,7 +288,7 @@
return (Char)xx;
}
-static inline UChar qnarrow16Uto8 ( UShort xx0 )
+static inline UChar qnarrow16Sto8U ( UShort xx0 )
{
Short xx = (Short)xx0;
if (xx < 0) xx = 0;
@@ -759,21 +759,21 @@
/* ------------ Saturating narrowing ------------ */
-ULong h_generic_calc_QNarrow32Sx2 ( ULong aa, ULong bb )
+ULong h_generic_calc_QNarrow32Sto16Sx4 ( ULong aa, ULong bb )
{
UInt d = sel32x2_1(aa);
UInt c = sel32x2_0(aa);
UInt b = sel32x2_1(bb);
UInt a = sel32x2_0(bb);
return mk16x4(
- qnarrow32Sto16(d),
- qnarrow32Sto16(c),
- qnarrow32Sto16(b),
- qnarrow32Sto16(a)
+ qnarrow32Sto16S(d),
+ qnarrow32Sto16S(c),
+ qnarrow32Sto16S(b),
+ qnarrow32Sto16S(a)
);
}
-ULong h_generic_calc_QNarrow16Sx4 ( ULong aa, ULong bb )
+ULong h_generic_calc_QNarrow16Sto8Sx8 ( ULong aa, ULong bb )
{
UShort h = sel16x4_3(aa);
UShort g = sel16x4_2(aa);
@@ -784,18 +784,18 @@
UShort b = sel16x4_1(bb);
UShort a = sel16x4_0(bb);
return mk8x8(
- qnarrow16Sto8(h),
- qnarrow16Sto8(g),
- qnarrow16Sto8(f),
- qnarrow16Sto8(e),
- qnarrow16Sto8(d),
- qnarrow16Sto8(c),
- qnarrow16Sto8(b),
- qnarrow16Sto8(a)
+ qnarrow16Sto8S(h),
+ qnarrow16Sto8S(g),
+ qnarrow16Sto8S(f),
+ qnarrow16Sto8S(e),
+ qnarrow16Sto8S(d),
+ qnarrow16Sto8S(c),
+ qnarrow16Sto8S(b),
+ qnarrow16Sto8S(a)
);
}
-ULong h_generic_calc_QNarrow16Ux4 ( ULong aa, ULong bb )
+ULong h_generic_calc_QNarrow16Sto8Ux8 ( ULong aa, ULong bb )
{
UShort h = sel16x4_3(aa);
UShort g = sel16x4_2(aa);
@@ -806,14 +806,14 @@
UShort b = sel16x4_1(bb);
UShort a = sel16x4_0(bb);
return mk8x8(
- qnarrow16Uto8(h),
- qnarrow16Uto8(g),
- qnarrow16Uto8(f),
- qnarrow16Uto8(e),
- qnarrow16Uto8(d),
- qnarrow16Uto8(c),
- qnarrow16Uto8(b),
- qnarrow16Uto8(a)
+ qnarrow16Sto8U(h),
+ qnarrow16Sto8U(g),
+ qnarrow16Sto8U(f),
+ qnarrow16Sto8U(e),
+ qnarrow16Sto8U(d),
+ qnarrow16Sto8U(c),
+ qnarrow16Sto8U(b),
+ qnarrow16Sto8U(a)
);
}
Modified: trunk/priv/host_generic_simd64.h
===================================================================
--- trunk/priv/host_generic_simd64.h 2011-06-07 21:28:38 UTC (rev 2158)
+++ trunk/priv/host_generic_simd64.h 2011-06-15 15:09:37 UTC (rev 2159)
@@ -87,9 +87,9 @@
extern ULong h_generic_calc_CmpNEZ16x4 ( ULong );
extern ULong h_generic_calc_CmpNEZ8x8 ( ULong );
-extern ULong h_generic_calc_QNarrow32Sx2 ( ULong, ULong );
-extern ULong h_generic_calc_QNarrow16Sx4 ( ULong, ULong );
-extern ULong h_generic_calc_QNarrow16Ux4 ( ULong, ULong );
+extern ULong h_generic_calc_QNarrow32Sto16Sx4 ( ULong, ULong );
+extern ULong h_generic_calc_QNarrow16Sto8Sx8 ( ULong, ULong );
+extern ULong h_generic_calc_QNarrow16Sto8Ux8 ( ULong, ULong );
extern ULong h_generic_calc_InterleaveHI8x8 ( ULong, ULong );
extern ULong h_generic_calc_InterleaveLO8x8 ( ULong, ULong );
Modified: trunk/priv/host_ppc_isel.c
===================================================================
--- trunk/priv/host_ppc_isel.c 2011-06-07 21:28:38 UTC (rev 2158)
+++ trunk/priv/host_ppc_isel.c 2011-06-15 15:09:37 UTC (rev 2159)
@@ -3678,11 +3678,11 @@
case Iop_Shr16x8: op = Pav_SHR; goto do_AvBin16x8;
case Iop_Sar16x8: op = Pav_SAR; goto do_AvBin16x8;
case Iop_Rol16x8: op = Pav_ROTL; goto do_AvBin16x8;
- case Iop_Narrow16x8: op = Pav_PACKUU; goto do_AvBin16x8;
- case Iop_QNarrow16Ux8: op = Pav_QPACKUU; goto do_AvBin16x8;
- case Iop_QNarrow16Sx8: op = Pav_QPACKSS; goto do_AvBin16x8;
- case Iop_InterleaveHI16x8: op = Pav_MRGHI; goto do_AvBin16x8;
- case Iop_InterleaveLO16x8: op = Pav_MRGLO; goto do_AvBin16x8;
+ case Iop_Narrow16x8: op = Pav_PACKUU; goto do_AvBin16x8;
+ case Iop_QNarrow16Uto8Ux16: op = Pav_QPACKUU; goto do_AvBin16x8;
+ case Iop_QNarrow16Sto8Sx16: op = Pav_QPACKSS; goto do_AvBin16x8;
+ case Iop_InterleaveHI16x8: op = Pav_MRGHI; goto do_AvBin16x8;
+ case Iop_InterleaveLO16x8: op = Pav_MRGLO; goto do_AvBin16x8;
case Iop_Add16x8: op = Pav_ADDU; goto do_AvBin16x8;
case Iop_QAdd16Ux8: op = Pav_QADDU; goto do_AvBin16x8;
case Iop_QAdd16Sx8: op = Pav_QADDS; goto do_AvBin16x8;
@@ -3712,11 +3712,11 @@
case Iop_Shr32x4: op = Pav_SHR; goto do_AvBin32x4;
case Iop_Sar32x4: op = Pav_SAR; goto do_AvBin32x4;
case Iop_Rol32x4: op = Pav_ROTL; goto do_AvBin32x4;
- case Iop_Narrow32x4: op = Pav_PACKUU; goto do_AvBin32x4;
- case Iop_QNarrow32Ux4: op = Pav_QPACKUU; goto do_AvBin32x4;
- case Iop_QNarrow32Sx4: op = Pav_QPACKSS; goto do_AvBin32x4;
- case Iop_InterleaveHI32x4: op = Pav_MRGHI; goto do_AvBin32x4;
- case Iop_InterleaveLO32x4: op = Pav_MRGLO; goto do_AvBin32x4;
+ case Iop_Narrow32x4: op = Pav_PACKUU; goto do_AvBin32x4;
+ case Iop_QNarrow32Uto16Ux8: op = Pav_QPACKUU; goto do_AvBin32x4;
+ case Iop_QNarrow32Sto16Sx8: op = Pav_QPACKSS; goto do_AvBin32x4;
+ case Iop_InterleaveHI32x4: op = Pav_MRGHI; goto do_AvBin32x4;
+ case Iop_InterleaveLO32x4: op = Pav_MRGLO; goto do_AvBin32x4;
case Iop_Add32x4: op = Pav_ADDU; goto do_AvBin32x4;
case Iop_QAdd32Ux4: op = Pav_QADDU; goto do_AvBin32x4;
case Iop_QAdd32Sx4: op = Pav_QADDS; goto do_AvBin32x4;
Modified: trunk/priv/host_x86_isel.c
===================================================================
--- trunk/priv/host_x86_isel.c 2011-06-07 21:28:38 UTC (rev 2158)
+++ trunk/priv/host_x86_isel.c 2011-06-15 15:09:37 UTC (rev 2159)
@@ -2386,12 +2386,12 @@
case Iop_QAdd16Ux4:
fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish;
- case Iop_QNarrow32Sx2:
- fn = (HWord)h_generic_calc_QNarrow32Sx2; goto binnish;
- case Iop_QNarrow16Sx4:
- fn = (HWord)h_generic_calc_QNarrow16Sx4; goto binnish;
- case Iop_QNarrow16Ux4:
- fn = (HWord)h_generic_calc_QNarrow16Ux4; goto binnish;
+ case Iop_QNarrow32Sto16Sx4:
+ fn = (HWord)h_generic_calc_QNarrow32Sto16Sx4; goto binnish;
+ case Iop_QNarrow16Sto8Sx8:
+ fn = (HWord)h_generic_calc_QNarrow16Sto8Sx8; goto binnish;
+ case Iop_QNarrow16Sto8Ux8:
+ fn = (HWord)h_generic_calc_QNarrow16Sto8Ux8; goto binnish;
case Iop_QSub8Sx8:
fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
@@ -3500,11 +3500,11 @@
return dst;
}
- case Iop_QNarrow32Sx4:
+ case Iop_QNarrow32Sto16Sx8:
op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
- case Iop_QNarrow16Sx8:
+ case Iop_QNarrow16Sto8Sx16:
op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
- case Iop_QNarrow16Ux8:
+ case Iop_QNarrow16Sto8Ux16:
op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
case Iop_InterleaveHI8x16:
Modified: trunk/priv/ir_defs.c
===================================================================
--- trunk/priv/ir_defs.c 2011-06-07 21:28:38 UTC (rev 2158)
+++ trunk/priv/ir_defs.c 2011-06-15 15:09:37 UTC (rev 2159)
@@ -506,9 +506,9 @@
case Iop_SarN8x8: vex_printf("SarN8x8"); return;
case Iop_SarN16x4: vex_printf("SarN16x4"); return;
case Iop_SarN32x2: vex_printf("SarN32x2"); return;
- case Iop_QNarrow16Ux4: vex_printf("QNarrow16Ux4"); return;
- case Iop_QNarrow16Sx4: vex_printf("QNarrow16Sx4"); return;
- case Iop_QNarrow32Sx2: vex_printf("QNarrow32Sx2"); return;
+ case Iop_QNarrow16Sto8Ux8: vex_printf("QNarrow16Sto8Ux8"); return;
+ case Iop_QNarrow16Sto8Sx8: vex_printf("QNarrow16Sto8Sx8"); return;
+ case Iop_QNarrow32Sto16Sx4: vex_printf("QNarrow32Sto16Sx4"); return;
case Iop_InterleaveHI8x8: vex_printf("InterleaveHI8x8"); return;
case Iop_InterleaveHI16x4: vex_printf("InterleaveHI16x4"); return;
case Iop_InterleaveHI32x2: vex_printf("InterleaveHI32x2"); return;
@@ -846,10 +846,10 @@
case Iop_Narrow16x8: vex_printf("Narrow16x8"); return;
case Iop_Narrow32x4: vex_printf("Narrow32x4"); return;
- case Iop_QNarrow16Ux8: vex_printf("QNarrow16Ux8"); return;
- case Iop_QNarrow32Ux4: vex_printf("QNarrow32Ux4"); return;
- case Iop_QNarrow16Sx8: vex_printf("QNarrow16Sx8"); return;
- case Iop_QNarrow32Sx4: vex_printf("QNarrow32Sx4"); return;
+ case Iop_QNarrow16Sto8Ux16: vex_printf("QNarrow16Sto8Ux16"); return;
+ case Iop_QNarrow32Uto16Ux8: vex_printf("QNarrow32Uto16Ux8"); return;
+ case Iop_QNarrow16Sto8Sx16: vex_printf("QNarrow16Sto8Sx16"); return;
+ case Iop_QNarrow32Sto16Sx8: vex_printf("QNarrow32Sto16Sx8"); return;
case Iop_Shorten16x8: vex_printf("Shorten16x8"); return;
case Iop_Shorten32x4: vex_printf("Shorten32x4"); return;
case Iop_Shorten64x2: vex_printf("Shorten64x2"); return;
@@ -2052,8 +2052,8 @@
case Iop_QAdd32Ux2: case Iop_QAdd64Ux1:
case Iop_PwAdd8x8: case Iop_PwAdd16x4: case Iop_PwAdd32x2:
case Iop_PwAdd32Fx2:
- case Iop_QNarrow32Sx2:
- case Iop_QNarrow16Sx4: case Iop_QNarrow16Ux4:
+ case Iop_QNarrow32Sto16Sx4:
+ case Iop_QNarrow16Sto8Sx8: case Iop_QNarrow16Sto8Ux8:
case Iop_Sub8x8: case Iop_Sub16x4: case Iop_Sub32x2:
case Iop_QSub8Sx8: case Iop_QSub16Sx4:
case Iop_QSub32Sx2: case Iop_QSub64Sx1:
@@ -2418,8 +2418,9 @@
case Iop_Sar8x16: case Iop_Sar16x8: case Iop_Sar32x4: case Iop_Sar64x2:
case Iop_Sal8x16: case Iop_Sal16x8: case Iop_Sal32x4: case Iop_Sal64x2:
case Iop_Rol8x16: case Iop_Rol16x8: case Iop_Rol32x4:
- case Iop_QNarrow16Ux8: case Iop_QNarrow32Ux4:
- case Iop_QNarrow16Sx8: case Iop_QNarrow32Sx4:
+ case Iop_QNarrow16Sto8Ux16:
+ case Iop_QNarrow16Sto8Sx16: case Iop_QNarrow32Sto16Sx8:
+ case Iop_QNarrow32Uto16Ux8:
case Iop_Narrow16x8: case Iop_Narrow32x4:
case Iop_InterleaveHI8x16: case Iop_InterleaveHI16x8:
case Iop_InterleaveHI32x4: case Iop_InterleaveHI64x2:
Modified: trunk/pub/libvex_ir.h
===================================================================
--- trunk/pub/libvex_ir.h 2011-06-07 21:28:38 UTC (rev 2158)
+++ trunk/pub/libvex_ir.h 2011-06-15 15:09:37 UTC (rev 2159)
@@ -896,10 +896,28 @@
Iop_QSalN8x8, Iop_QSalN16x4, Iop_QSalN32x2, Iop_QSalN64x1,
/* NARROWING -- narrow 2xI64 into 1xI64, hi half from left arg */
- Iop_QNarrow16Ux4,
- Iop_QNarrow16Sx4,
- Iop_QNarrow32Sx2,
+ /* For saturated narrowing, I believe there are 4 variants of
+ the basic arithmetic operation, depending on the signedness
+ of argument and result. Here are examples that exemplify
+ what I mean:
+ QNarrow16Uto8U ( UShort x ) if (x >u 255) x = 255;
+ return x[7:0];
+
+ QNarrow16Sto8S ( Short x ) if (x <s -128) x = -128;
+ if (x >s 127) x = 127;
+ return x[7:0];
+
+ QNarrow16Uto8S ( UShort x ) if (x >u 127) x = 127;
+ return x[7:0];
+
+ QNarrow16Sto8U ( Short x ) if (x <s 0) x = 0;
+ if (x >s 255) x = 255;
+ return x[7:0];
+ */
+ Iop_QNarrow16Sto8Ux8,
+ Iop_QNarrow16Sto8Sx8, Iop_QNarrow32Sto16Sx4,
+
/* INTERLEAVING */
/* Interleave lanes from low or high halves of
operands. Most-significant result lane is from the left
@@ -1176,9 +1194,10 @@
Iop_QSalN8x16, Iop_QSalN16x8, Iop_QSalN32x4, Iop_QSalN64x2,
/* NARROWING -- narrow 2xV128 into 1xV128, hi half from left arg */
- /* Note: the 16{U,S} and 32{U,S} are the pre-narrow lane widths. */
- Iop_QNarrow16Ux8, Iop_QNarrow32Ux4,
- Iop_QNarrow16Sx8, Iop_QNarrow32Sx4,
+ /* See comments above w.r.t. U vs S issues in saturated narrowing. */
+ Iop_QNarrow16Sto8Ux16,
+ Iop_QNarrow16Sto8Sx16, Iop_QNarrow32Sto16Sx8,
+ Iop_QNarrow16Uto8Ux16, Iop_QNarrow32Uto16Ux8,
Iop_Narrow16x8, Iop_Narrow32x4,
/* Shortening V128->I64, lo half from each element */
Iop_Shorten16x8, Iop_Shorten32x4, Iop_Shorten64x2,
Modified: trunk/test_main.c
===================================================================
--- trunk/test_main.c 2011-06-07 21:28:38 UTC (rev 2158)
+++ trunk/test_main.c 2011-06-15 15:09:37 UTC (rev 2159)
@@ -1531,9 +1531,9 @@
IRAtom *at1, *at2, *at3;
IRAtom* (*pcast)( MCEnv*, IRAtom* );
switch (narrow_op) {
- case Iop_QNarrow32Sx4: pcast = mkPCast32x4; break;
- case Iop_QNarrow16Sx8: pcast = mkPCast16x8; break;
- case Iop_QNarrow16Ux8: pcast = mkPCast16x8; break;
+ case Iop_QNarrow32Sto16Sx8: pcast = mkPCast32x4; break;
+ case Iop_QNarrow16Sto8Sx16: pcast = mkPCast16x8; break;
+ case Iop_QNarrow16Sto8Ux16: pcast = mkPCast16x8; break;
default: VG_(tool_panic)("vectorNarrowV128");
}
tl_assert(isShadowAtom(mce,vatom1));
@@ -1671,9 +1671,9 @@
case Iop_QAdd64Sx2:
return binary64Ix2(mce, vatom1, vatom2);
- case Iop_QNarrow32Sx4:
- case Iop_QNarrow16Sx8:
- case Iop_QNarrow16Ux8:
+ case Iop_QNarrow32Sto16Sx8:
+ case Iop_QNarrow16Sto8Sx16:
+ case Iop_QNarrow16Sto8Ux16:
return vectorNarrowV128(mce, op, vatom1, vatom2);
case Iop_Sub64Fx2:
|