|
From: <sv...@va...> - 2013-08-12 18:01:53
|
carll 2013-08-12 19:01:40 +0100 (Mon, 12 Aug 2013)
New Revision: 2740
Log:
Initial ISA 2.07 support for POWER8-tuned libc
The IBM Power ISA 2.07 has been published on power.org, and IBM's new POWER8
processor is under development to implement that ISA. This patch provides
initial VEX support for running Valgrind on POWER8 systems running a soon-to-be
released Linux distribution. This Linux distro will include a POWER8-tuned
libc that uses a subset of the new instructions from ISA 2.07. Since virtually
all applications link with libc, it would be impossible to run an application
under Valgrind on this distro without adding support for these new instructions
to Valgrind, so that's the intent of this patch. Note that applications built
on this distro will *not* employ new POWER8 instructions by default. There are
roughly 150 new instructions in the Power ISA 2.07, including hardware
transaction management (HTM). Support for these new instructions (modulo the
subset included in this bug) will be added to Valgrind in a phased approach,
similar to what we did for Power ISA 2.06.
Bugzilla 322294
Modified files:
trunk/priv/guest_ppc_toIR.c
trunk/priv/host_ppc_defs.c
trunk/priv/host_ppc_defs.h
trunk/priv/host_ppc_isel.c
trunk/priv/ir_defs.c
trunk/priv/main_main.c
trunk/pub/libvex.h
trunk/pub/libvex_ir.h
Modified: trunk/priv/guest_ppc_toIR.c (+149 -10)
===================================================================
--- trunk/priv/guest_ppc_toIR.c 2013-08-08 11:28:59 +01:00 (rev 2739)
+++ trunk/priv/guest_ppc_toIR.c 2013-08-12 19:01:40 +01:00 (rev 2740)
@@ -4933,11 +4933,17 @@
case 0x3E:
switch ((b1<<1) | b0) {
case 0x0: // std (Store DWord, PPC64 p580)
+ if (!mode64)
+ return False;
+
DIP("std r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
storeBE( mkexpr(EA), mkexpr(rS) );
break;
case 0x1: // stdu (Store DWord, Update, PPC64 p583)
+ if (!mode64)
+ return False;
+
DIP("stdu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
putIReg( rA_addr, mkexpr(EA) );
storeBE( mkexpr(EA), mkexpr(rS) );
@@ -6463,7 +6469,7 @@
}
/* not decodable */
return False;
-
+
/* XFX-Form */
case 0x153: // mfspr (Move from Special-Purpose Register, PPC32 p470)
@@ -6631,7 +6637,79 @@
return False;
}
break;
-
+
+ case 0x33: // mfvsrd
+ {
+ UChar XS = ifieldRegXS( theInstr );
+ UChar rA_addr = ifieldRegA(theInstr);
+ IRExpr * high64;
+ IRTemp vS = newTemp( Ity_V128 );
+ DIP("mfvsrd r%u,vsr%d\n", rA_addr, (UInt)XS);
+
+ /* XS = SX || S
+ * For SX=0, mfvsrd is treated as a Floating-Point
+ * instruction in terms of resource availability.
+ * For SX=1, mfvsrd is treated as a Vector instruction in
+ * terms of resource availability.
+ *NEED TO FIGURE OUT HOW TO IMPLEMENT THE RESOURCE AVAILABILITY PART
+ */
+ assign( vS, getVSReg( XS ) );
+ high64 = unop( Iop_V128HIto64, mkexpr( vS ) );
+ putIReg( rA_addr, (mode64) ? high64 :
+ unop( Iop_64to32, high64 ) );
+ break;
+ }
+
+ case 0xB3: // mtvsrd
+ {
+ UChar XT = ifieldRegXT( theInstr );
+ UChar rA_addr = ifieldRegA(theInstr);
+ IRTemp rA = newTemp(ty);
+ DIP("mtvsrd vsr%d,r%u\n", (UInt)XT, rA_addr);
+ /* XS = SX || S
+ * For SX=0, mfvsrd is treated as a Floating-Point
+ * instruction in terms of resource availability.
+ * For SX=1, mfvsrd is treated as a Vector instruction in
+ * terms of resource availability.
+ *NEED TO FIGURE OUT HOW TO IMPLEMENT THE RESOURCE AVAILABILITY PART
+ */
+ assign( rA, getIReg(rA_addr) );
+
+ if (mode64)
+ putVSReg( XT, binop( Iop_64HLtoV128, mkexpr( rA ), mkU64( 0 ) ) );
+ else
+ putVSReg( XT, binop( Iop_64HLtoV128,
+ binop( Iop_32HLto64,
+ mkU32( 0 ),
+ mkexpr( rA ) ),
+ mkU64( 0 ) ) );
+ break;
+ }
+
+ case 0xD3: // mtvsrwa
+ {
+ UChar XT = ifieldRegXT( theInstr );
+ UChar rA_addr = ifieldRegA(theInstr);
+ IRTemp rA = newTemp( Ity_I32 );
+ DIP("mtvsrwa vsr%d,r%u\n", (UInt)XT, rA_addr);
+ /* XS = SX || S
+ * For SX=0, mtvsrwa is treated as a Floating-Point
+ * instruction in terms of resource availability.
+ * For SX=1, mtvsrwa is treated as a Vector instruction in
+ * terms of resource availability.
+ *NEED TO FIGURE OUT HOW TO IMPLEMENT THE RESOURCE AVAILABILITY PART
+ */
+ if (mode64)
+ assign( rA, unop( Iop_64to32, getIReg( rA_addr ) ) );
+ else
+ assign( rA, getIReg(rA_addr) );
+
+ putVSReg( XT, binop( Iop_64HLtoV128,
+ unop( Iop_32Sto64, mkexpr( rA ) ),
+ mkU64( 0 ) ) );
+ break;
+ }
+
default:
vex_printf("dis_proc_ctl(ppc)(opc2)\n");
return False;
@@ -11692,7 +11770,7 @@
/* Create and assign temps only as needed for the given instruction. */
switch (opc2) {
// scalar double-precision floating point argument
- case 0x2B0: case 0x0b0: case 0x290: case 0x212: case 0x090:
+ case 0x2B0: case 0x0b0: case 0x290: case 0x212: case 0x216: case 0x090:
xB = newTemp(Ity_F64);
assign( xB,
unop( Iop_ReinterpI64asF64,
@@ -11734,6 +11812,11 @@
assign( xB,
unop( Iop_64HIto32, unop( Iop_V128HIto64, getVSReg( XB ) ) ) );
break;
+ case 0x296: // xscvspdpn (non signaling version of xscvpdp)
+ xB = newTemp(Ity_I32);
+ assign( xB,
+ unop( Iop_64HIto32, unop( Iop_V128HIto64, getVSReg( XB ) ) ) );
+ break;
/* Certain instructions have their complete implementation in the main switch statement
* that follows this one; thus we have a "do nothing" case for those instructions here.
@@ -11881,6 +11964,18 @@
mkU32( 0 ) ),
mkU64( 0ULL ) ) );
break;
+ case 0x216: /* xscvdpspn (VSX Scalar convert scalar Single-Precision to
+ vector single Convert to Single-Precision non-signalling */
+ DIP("xscvdpspn v%u,v%u\n", (UInt)XT, (UInt)XB);
+ putVSReg( XT,
+ binop( Iop_64HLtoV128,
+ binop( Iop_32HLto64,
+ unop( Iop_ReinterpF32asI32,
+ unop( Iop_TruncF64asF32,
+ mkexpr( xB ) ) ),
+ mkU32( 0 ) ),
+ mkU64( 0ULL ) ) );
+ break;
case 0x090: // xscvdpuxws (VSX Scalar truncate Double-Precision to integer
// and Convert to Unsigned Integer Word format with Saturate)
DIP("xscvdpuxws v%u,v%u\n", (UInt)XT, (UInt)XB);
@@ -11902,6 +11997,15 @@
unop( Iop_ReinterpI32asF32, mkexpr( xB ) ) ) ),
mkU64( 0ULL ) ) );
break;
+ case 0x296: // xscvspdpn (VSX Scalar Convert Single-Precision to Double-Precision format Non signaling)
+ DIP("xscvspdpn v%u,v%u\n", (UInt)XT, (UInt)XB);
+ putVSReg( XT,
+ binop( Iop_64HLtoV128,
+ unop( Iop_ReinterpF64asI64,
+ unop( Iop_F32toF64,
+ unop( Iop_ReinterpI32asF32, mkexpr( xB ) ) ) ),
+ mkU64( 0ULL ) ) );
+ break;
case 0x312: // xvcvdpsp (VSX Vector round Double-Precision to single-precision
// and Convert to Single-Precision format)
DIP("xvcvdpsp v%u,v%u\n", (UInt)XT, (UInt)XB);
@@ -14627,6 +14731,11 @@
putVReg( vD_addr, binop(Iop_Add32x4, mkexpr(vA), mkexpr(vB)) );
break;
+ case 0x0C0: // vaddudm (Add Unsigned Double Word Modulo)
+ DIP("vaddudm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Add64x2, mkexpr(vA), mkexpr(vB)) );
+ break;
+
case 0x200: // vaddubs (Add Unsigned Byte Saturate, AV p142)
DIP("vaddubs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
putVReg( vD_addr, binop(Iop_QAdd8Ux16, mkexpr(vA), mkexpr(vB)) );
@@ -15899,6 +16008,12 @@
return True;
}
+ case 0x44E: // vpkudum (Pack Unsigned Double Word Unsigned Modulo)
+ DIP("vpkudum v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr,
+ binop(Iop_NarrowBin64to32x4, mkexpr(vA), mkexpr(vB)) );
+ return True;
+
default:
break; // Fall through...
}
@@ -16431,6 +16546,7 @@
{ 0x1f4, "xvtdivdp" },
{ 0x208, "xxland" },
{ 0x212, "xscvdpsp" },
+ { 0x216, "xscvdpspn" },
{ 0x228, "xxlandc" },
{ 0x248 , "xxlor" },
{ 0x268, "xxlxor" },
@@ -16439,6 +16555,7 @@
{ 0x288, "xxlnor" },
{ 0x290, "xscvdpuxds" },
{ 0x292, "xscvspdp" },
+ { 0x296, "xscvspdpn" },
{ 0x2a0, "xsmindp" },
{ 0x2a4, "xsnmaddmdp" },
{ 0x2b0, "xscvdpsxds" },
@@ -16487,8 +16604,9 @@
{ 0x3f0, "xvcvsxddp" },
{ 0x3f2, "xvnegdp" }
};
-#define VSX_ALL_LEN 135
+#define VSX_ALL_LEN (sizeof vsx_all / sizeof *vsx_all)
+
// ATTENTION: This search function assumes vsx_all array is sorted.
static Int findVSXextOpCode(UInt opcode)
{
@@ -16565,6 +16683,7 @@
Bool allow_GX = False;
Bool allow_VX = False; // Equates to "supports Power ISA 2.06
Bool allow_DFP = False;
+ Bool allow_isa_2_07 = False;
UInt hwcaps = archinfo->hwcaps;
Long delta;
@@ -16576,6 +16695,7 @@
allow_GX = (0 != (hwcaps & VEX_HWCAPS_PPC64_GX));
allow_VX = (0 != (hwcaps & VEX_HWCAPS_PPC64_VX));
allow_DFP = (0 != (hwcaps & VEX_HWCAPS_PPC64_DFP));
+ allow_isa_2_07 = (0 != (hwcaps & VEX_HWCAPS_PPC64_ISA2_07));
} else {
allow_F = (0 != (hwcaps & VEX_HWCAPS_PPC32_F));
allow_V = (0 != (hwcaps & VEX_HWCAPS_PPC32_V));
@@ -16583,6 +16703,7 @@
allow_GX = (0 != (hwcaps & VEX_HWCAPS_PPC32_GX));
allow_VX = (0 != (hwcaps & VEX_HWCAPS_PPC32_VX));
allow_DFP = (0 != (hwcaps & VEX_HWCAPS_PPC32_DFP));
+ allow_isa_2_07 = (0 != (hwcaps & VEX_HWCAPS_PPC32_ISA2_07));
}
/* The running delta */
@@ -17025,8 +17146,9 @@
case 0x2B0: case 0x2F0: // xscvdpsxds, xscvsxddp
case 0x1b0: case 0x130: // xvcvdpsxws, xvcvspsxws
case 0x0b0: case 0x290: // xscvdpsxws, xscvdpuxds
- case 0x212: // xscvdpsp
- case 0x292: case 0x312: // xscvspdp, xvcvdpsp
+ case 0x212: case 0x216: // xscvdpsp, xscvdpspn
+ case 0x292: case 0x296: // xscvspdp, xscvspdpn
+ case 0x312: // xvcvdpsp
case 0x390: case 0x190: // xvcvdpuxds, xvcvdpuxws
case 0x3B0: case 0x310: // xvcvdpsxds, xvcvspuxds
case 0x392: case 0x330: // xvcvspdp, xvcvspsxds
@@ -17070,7 +17192,6 @@
/* 64bit Integer Stores */
case 0x3E: // std, stdu
- if (!mode64) goto decode_failure;
if (dis_int_store( theInstr, abiinfo )) goto decode_success;
goto decode_failure;
@@ -17105,7 +17226,7 @@
if (!allow_GX) goto decode_noGX;
if (dis_fp_arith(theInstr)) goto decode_success;
goto decode_failure;
-
+
default:
break; // Fall through
}
@@ -17455,6 +17576,8 @@
goto decode_failure;
/* Processor Control Instructions */
+ case 0x33: // mfvsrd
+ case 0xB3: case 0xD3: // mtvsrd, mtvsrwa
case 0x200: case 0x013: case 0x153: // mcrxr, mfcr, mfspr
case 0x173: case 0x090: case 0x1D3: // mftb, mtcrf, mtspr
if (dis_proc_ctl( abiinfo, theInstr )) goto decode_success;
@@ -17662,6 +17785,11 @@
if (dis_av_arith( theInstr )) goto decode_success;
goto decode_failure;
+ case 0x0C0: // vaddudm
+ if (!allow_isa_2_07) goto decode_noP8;
+ if (dis_av_arith( theInstr )) goto decode_success;
+ goto decode_failure;
+
/* AV Rotate, Shift */
case 0x004: case 0x044: case 0x084: // vrlb, vrlh, vrlw
case 0x104: case 0x144: case 0x184: // vslb, vslh, vslw
@@ -17725,6 +17853,11 @@
if (dis_av_pack( theInstr )) goto decode_success;
goto decode_failure;
+ case 0x44E: // vpkudum
+ if (!allow_isa_2_07) goto decode_noP8;
+ if (dis_av_pack( theInstr )) goto decode_success;
+ goto decode_failure;
+
default:
break; // Fall through...
}
@@ -17782,6 +17915,11 @@
vex_printf("disInstr(ppc): "
"declined to decode a Decimal Floating Point insn.\n");
goto decode_failure;
+ decode_noP8:
+ vassert(!allow_isa_2_07);
+ vex_printf("disInstr(ppc): "
+ "declined to decode a Power 8 insn.\n");
+ goto decode_failure;
decode_failure:
@@ -17870,10 +18008,11 @@
/* do some sanity checks */
mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
| VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX | VEX_HWCAPS_PPC32_VX
- | VEX_HWCAPS_PPC32_DFP;
+ | VEX_HWCAPS_PPC32_DFP | VEX_HWCAPS_PPC32_ISA2_07;
mask64 = VEX_HWCAPS_PPC64_V | VEX_HWCAPS_PPC64_FX
- | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX | VEX_HWCAPS_PPC64_DFP;
+ | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX | VEX_HWCAPS_PPC64_DFP
+ | VEX_HWCAPS_PPC64_ISA2_07;
if (mode64) {
vassert((hwcaps_guest & mask32) == 0);
Modified: trunk/priv/ir_defs.c (+2 -0)
===================================================================
--- trunk/priv/ir_defs.c 2013-08-08 11:28:59 +01:00 (rev 2739)
+++ trunk/priv/ir_defs.c 2013-08-12 19:01:40 +01:00 (rev 2740)
@@ -528,6 +528,7 @@
case Iop_QNarrowBin32Sto16Sx4: vex_printf("QNarrowBin32Sto16Sx4"); return;
case Iop_NarrowBin16to8x8: vex_printf("NarrowBin16to8x8"); return;
case Iop_NarrowBin32to16x4: vex_printf("NarrowBin32to16x4"); return;
+ case Iop_NarrowBin64to32x4: vex_printf("NarrowBin64to32x4"); return;
case Iop_InterleaveHI8x8: vex_printf("InterleaveHI8x8"); return;
case Iop_InterleaveHI16x4: vex_printf("InterleaveHI16x4"); return;
case Iop_InterleaveHI32x2: vex_printf("InterleaveHI32x2"); return;
@@ -2806,6 +2807,7 @@
case Iop_QNarrowBin16Sto8Sx16: case Iop_QNarrowBin32Sto16Sx8:
case Iop_QNarrowBin16Uto8Ux16: case Iop_QNarrowBin32Uto16Ux8:
case Iop_NarrowBin16to8x16: case Iop_NarrowBin32to16x8:
+ case Iop_NarrowBin64to32x4:
case Iop_InterleaveHI8x16: case Iop_InterleaveHI16x8:
case Iop_InterleaveHI32x4: case Iop_InterleaveHI64x2:
case Iop_InterleaveLO8x16: case Iop_InterleaveLO16x8:
Modified: trunk/priv/host_ppc_defs.h (+9 -0)
===================================================================
--- trunk/priv/host_ppc_defs.h 2013-08-08 11:28:59 +01:00 (rev 2739)
+++ trunk/priv/host_ppc_defs.h 2013-08-12 19:01:40 +01:00 (rev 2740)
@@ -492,6 +492,7 @@
Pin_AvBin8x16, /* AV binary, 8x4 */
Pin_AvBin16x8, /* AV binary, 16x4 */
Pin_AvBin32x4, /* AV binary, 32x4 */
+ Pin_AvBin64x2, /* AV binary, 64x2 */
Pin_AvBin32Fx4, /* AV FP binary, 32Fx4 */
Pin_AvUn32Fx4, /* AV FP unary, 32Fx4 */
@@ -795,7 +796,14 @@
HReg srcL;
HReg srcR;
} AvBin32x4;
+ /* Can only be generated for CPUs capable of ISA 2.07 or above */
struct {
+ PPCAvOp op;
+ HReg dst;
+ HReg srcL;
+ HReg srcR;
+ } AvBin64x2;
+ struct {
PPCAvFpOp op;
HReg dst;
HReg srcL;
@@ -1013,6 +1021,7 @@
extern PPCInstr* PPCInstr_AvBin8x16 ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
extern PPCInstr* PPCInstr_AvBin16x8 ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
extern PPCInstr* PPCInstr_AvBin32x4 ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
+extern PPCInstr* PPCInstr_AvBin64x2 ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
extern PPCInstr* PPCInstr_AvBin32Fx4 ( PPCAvFpOp op, HReg dst, HReg srcL, HReg srcR );
extern PPCInstr* PPCInstr_AvUn32Fx4 ( PPCAvFpOp op, HReg dst, HReg src );
extern PPCInstr* PPCInstr_AvPerm ( HReg dst, HReg srcL, HReg srcR, HReg ctl );
Modified: trunk/priv/host_ppc_defs.c (+49 -2)
===================================================================
--- trunk/priv/host_ppc_defs.c 2013-08-08 11:28:59 +01:00 (rev 2739)
+++ trunk/priv/host_ppc_defs.c 2013-08-12 19:01:40 +01:00 (rev 2740)
@@ -674,7 +674,7 @@
case Pav_UNPCKLPIX: return "vupklpx";
/* Integer binary */
- case Pav_ADDU: return "vaddu_m"; // b,h,w
+ case Pav_ADDU: return "vaddu_m"; // b,h,w,dw
case Pav_QADDU: return "vaddu_s"; // b,h,w
case Pav_QADDS: return "vadds_s"; // b,h,w
@@ -708,7 +708,7 @@
case Pav_ROTL: return "vrl"; // b,h,w
/* Pack */
- case Pav_PACKUU: return "vpku_um"; // h,w
+ case Pav_PACKUU: return "vpku_um"; // h,w,dw
case Pav_QPACKUU: return "vpku_us"; // h,w
case Pav_QPACKSU: return "vpks_us"; // h,w
case Pav_QPACKSS: return "vpks_ss"; // h,w
@@ -1348,6 +1348,17 @@
i->Pin.AvBin32x4.srcR = srcR;
return i;
}
+PPCInstr* PPCInstr_AvBin64x2 ( PPCAvOp op, HReg dst,
+ HReg srcL, HReg srcR ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_AvBin64x2;
+ i->Pin.AvBin64x2.op = op;
+ i->Pin.AvBin64x2.dst = dst;
+ i->Pin.AvBin64x2.srcL = srcL;
+ i->Pin.AvBin64x2.srcR = srcR;
+ return i;
+}
+
PPCInstr* PPCInstr_AvBin32Fx4 ( PPCAvFpOp op, HReg dst,
HReg srcL, HReg srcR ) {
PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
@@ -1883,6 +1894,14 @@
vex_printf(",");
ppHRegPPC(i->Pin.AvBin32x4.srcR);
return;
+ case Pin_AvBin64x2:
+ vex_printf("%s(w) ", showPPCAvOp(i->Pin.AvBin64x2.op));
+ ppHRegPPC(i->Pin.AvBin64x2.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvBin64x2.srcL);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvBin64x2.srcR);
+ return;
case Pin_AvBin32Fx4:
vex_printf("%s ", showPPCAvFpOp(i->Pin.AvBin32Fx4.op));
ppHRegPPC(i->Pin.AvBin32Fx4.dst);
@@ -2364,6 +2383,11 @@
addHRegUse(u, HRmRead, i->Pin.AvBin32x4.srcL);
addHRegUse(u, HRmRead, i->Pin.AvBin32x4.srcR);
return;
+ case Pin_AvBin64x2:
+ addHRegUse(u, HRmWrite, i->Pin.AvBin64x2.dst);
+ addHRegUse(u, HRmRead, i->Pin.AvBin64x2.srcL);
+ addHRegUse(u, HRmRead, i->Pin.AvBin64x2.srcR);
+ return;
case Pin_AvBin32Fx4:
addHRegUse(u, HRmWrite, i->Pin.AvBin32Fx4.dst);
addHRegUse(u, HRmRead, i->Pin.AvBin32Fx4.srcL);
@@ -2670,6 +2694,11 @@
mapReg(m, &i->Pin.AvBin32x4.srcL);
mapReg(m, &i->Pin.AvBin32x4.srcR);
return;
+ case Pin_AvBin64x2:
+ mapReg(m, &i->Pin.AvBin64x2.dst);
+ mapReg(m, &i->Pin.AvBin64x2.srcL);
+ mapReg(m, &i->Pin.AvBin64x2.srcR);
+ return;
case Pin_AvBin32Fx4:
mapReg(m, &i->Pin.AvBin32Fx4.dst);
mapReg(m, &i->Pin.AvBin32Fx4.srcL);
@@ -4785,6 +4814,24 @@
goto done;
}
+ case Pin_AvBin64x2: {
+ UInt v_dst = vregNo(i->Pin.AvBin64x2.dst);
+ UInt v_srcL = vregNo(i->Pin.AvBin64x2.srcL);
+ UInt v_srcR = vregNo(i->Pin.AvBin64x2.srcR);
+ UInt opc2;
+ switch (i->Pin.AvBin64x2.op) {
+ case Pav_ADDU: opc2 = 192; break; // vaddudm vector double add
+ case Pav_PACKUU: opc2 = 1102; break; // vpkudum
+ // FIXME: We currently don't have a vector compare equal double word, so it's a hack
+ // to use vcmpequw, but it works.
+ case Pav_CMPEQU: opc2 = 134; break; // vcmpequw
+ default:
+ goto bad;
+ }
+ p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2 );
+ goto done;
+ }
+
case Pin_AvBin32Fx4: {
UInt v_dst = vregNo(i->Pin.AvBin32Fx4.dst);
UInt v_srcL = vregNo(i->Pin.AvBin32Fx4.srcL);
Modified: trunk/priv/main_main.c (+7 -0)
===================================================================
--- trunk/priv/main_main.c 2013-08-08 11:28:59 +01:00 (rev 2739)
+++ trunk/priv/main_main.c 2013-08-12 19:01:40 +01:00 (rev 2740)
@@ -1294,6 +1294,7 @@
const UInt GX = VEX_HWCAPS_PPC32_GX;
const UInt VX = VEX_HWCAPS_PPC32_VX;
const UInt DFP = VEX_HWCAPS_PPC32_DFP;
+ const UInt ISA2_07 = VEX_HWCAPS_PPC32_ISA2_07;
UInt c = hwcaps;
if (c == 0) return "ppc32-int";
if (c == F) return "ppc32-int-flt";
@@ -1306,6 +1307,9 @@
if (c == (F|V|FX|GX)) return "ppc32-int-flt-vmx-FX-GX";
if (c == (F|V|FX|GX|DFP)) return "ppc32-int-flt-vmx-FX-GX-DFP";
if (c == (F|V|FX|GX|VX|DFP)) return "ppc32-int-flt-vmx-FX-GX-VX-DFP";
+ if (c == (F|V|FX|GX|VX|DFP|ISA2_07))
+ return "ppc32-int-flt-vmx-FX-GX-VX-DFP-ISA2_07";
+
return NULL;
}
@@ -1318,6 +1322,7 @@
const UInt GX = VEX_HWCAPS_PPC64_GX;
const UInt VX = VEX_HWCAPS_PPC64_VX;
const UInt DFP = VEX_HWCAPS_PPC64_DFP;
+ const UInt ISA2_07 = VEX_HWCAPS_PPC64_ISA2_07;
UInt c = hwcaps;
if (c == 0) return "ppc64-int-flt";
if (c == FX) return "ppc64-int-flt-FX";
@@ -1329,6 +1334,8 @@
if (c == (V|FX|GX)) return "ppc64-int-flt-vmx-FX-GX";
if (c == (V|FX|GX|DFP)) return "ppc64-int-flt-vmx-FX-GX-DFP";
if (c == (V|FX|GX|VX|DFP)) return "ppc64-int-flt-vmx-FX-GX-VX-DFP";
+ if (c == (V|FX|GX|VX|DFP|ISA2_07))
+ return "ppc64-int-flt-vmx-FX-GX-VX-DFP-ISA2_07";
return NULL;
}
Modified: trunk/priv/host_ppc_isel.c (+24 -2)
===================================================================
--- trunk/priv/host_ppc_isel.c 2013-08-08 11:28:59 +01:00 (rev 2739)
+++ trunk/priv/host_ppc_isel.c 2013-08-12 19:01:40 +01:00 (rev 2740)
@@ -4781,6 +4781,16 @@
return dst;
}
+ case Iop_CmpNEZ64x2: {
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg zero = newVRegV(env);
+ HReg dst = newVRegV(env);
+ addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
+ addInstr(env, PPCInstr_AvBin64x2(Pav_CMPEQU, dst, arg, zero));
+ addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
+ return dst;
+ }
+
case Iop_Recip32Fx4: fpop = Pavfp_RCPF; goto do_32Fx4_unary;
case Iop_RSqrt32Fx4: fpop = Pavfp_RSQRTF; goto do_32Fx4_unary;
case Iop_I32UtoFx4: fpop = Pavfp_CVTU2F; goto do_32Fx4_unary;
@@ -5045,6 +5055,16 @@
return dst;
}
+ case Iop_NarrowBin64to32x4: op = Pav_PACKUU; goto do_AvBin64x2;
+ case Iop_Add64x2: op = Pav_ADDU; goto do_AvBin64x2;
+ do_AvBin64x2: {
+ HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+ addInstr(env, PPCInstr_AvBin64x2(op, dst, arg1, arg2));
+ return dst;
+ }
+
case Iop_ShlN8x16: op = Pav_SHL; goto do_AvShift8x16;
case Iop_SarN8x16: op = Pav_SAR; goto do_AvShift8x16;
do_AvShift8x16: {
@@ -5779,10 +5799,12 @@
/* do some sanity checks */
mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
| VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX | VEX_HWCAPS_PPC32_VX
- | VEX_HWCAPS_PPC32_DFP;
+ | VEX_HWCAPS_PPC32_DFP | VEX_HWCAPS_PPC32_ISA2_07;
+
mask64 = VEX_HWCAPS_PPC64_V | VEX_HWCAPS_PPC64_FX
- | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX | VEX_HWCAPS_PPC64_DFP;
+ | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX | VEX_HWCAPS_PPC64_DFP
+ | VEX_HWCAPS_PPC64_ISA2_07;
if (mode64) {
vassert((hwcaps_host & mask32) == 0);
Modified: trunk/pub/libvex_ir.h (+1 -0)
===================================================================
--- trunk/pub/libvex_ir.h 2013-08-08 11:28:59 +01:00 (rev 2739)
+++ trunk/pub/libvex_ir.h 2013-08-12 19:01:40 +01:00 (rev 2740)
@@ -1467,6 +1467,7 @@
Iop_QNarrowBin16Sto8Sx16, Iop_QNarrowBin32Sto16Sx8,
Iop_QNarrowBin16Uto8Ux16, Iop_QNarrowBin32Uto16Ux8,
Iop_NarrowBin16to8x16, Iop_NarrowBin32to16x8,
+ Iop_NarrowBin64to32x4,
/* NARROWING (unary) -- narrow V128 into I64 */
Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4, Iop_NarrowUn64to32x2,
Modified: trunk/pub/libvex.h (+2 -0)
===================================================================
--- trunk/pub/libvex.h 2013-08-08 11:28:59 +01:00 (rev 2739)
+++ trunk/pub/libvex.h 2013-08-12 19:01:40 +01:00 (rev 2740)
@@ -95,6 +95,7 @@
(fres,frsqrte,fsel,stfiwx) */
#define VEX_HWCAPS_PPC32_VX (1<<12) /* Vector-scalar floating-point (VSX); implies ISA 2.06 or higher */
#define VEX_HWCAPS_PPC32_DFP (1<<17) /* Decimal Floating Point (DFP) -- e.g., dadd */
+#define VEX_HWCAPS_PPC32_ISA2_07 (1<<19) /* ISA 2.07 -- e.g., mtvsrd */
/* ppc64: baseline capability is integer and basic FP insns */
#define VEX_HWCAPS_PPC64_V (1<<13) /* Altivec (VMX) */
@@ -103,6 +104,7 @@
(fres,frsqrte,fsel,stfiwx) */
#define VEX_HWCAPS_PPC64_VX (1<<16) /* Vector-scalar floating-point (VSX); implies ISA 2.06 or higher */
#define VEX_HWCAPS_PPC64_DFP (1<<18) /* Decimal Floating Point (DFP) -- e.g., dadd */
+#define VEX_HWCAPS_PPC64_ISA2_07 (1<<20) /* ISA 2.07 -- e.g., mtvsrd */
/* s390x: Hardware capability encoding
|