|
From: <sv...@va...> - 2005-11-08 16:23:13
|
Author: cerion
Date: 2005-11-08 16:23:07 +0000 (Tue, 08 Nov 2005)
New Revision: 1447
Log:
Frontend:
added remaining integer altivec insns (phew!)
- vsum4ubs, vsum4sbs, vsum4shs, vsum2sws, vsumsws
- vmsummbm, vmsumuhs, vmsumshs
various helpers to construct IR
- expand8x16*: sign/zero-extend V128_8x16 lanes =3D> 2x V128_16x8
- breakV128to4x64*: break V128 to 4xI32's, sign/zero-extend to I64's
- mkQNarrow64to32*: un/signed saturating narrow 64 to 32
- mkV128from4x64*: narrow 4xI64's to 4xI32's, combine to V128_34x4
Backend:
Iop_Add64
- added PPC32Instr_AddSubC32: 32-bit add/sub read/write carry
64-bit Iex_Const
Iop_32Sto64
Modified:
trunk/priv/guest-ppc32/toIR.c
trunk/priv/host-ppc32/hdefs.c
trunk/priv/host-ppc32/hdefs.h
trunk/priv/host-ppc32/isel.c
Modified: trunk/priv/guest-ppc32/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-ppc32/toIR.c 2005-11-07 15:37:24 UTC (rev 1446)
+++ trunk/priv/guest-ppc32/toIR.c 2005-11-08 16:23:07 UTC (rev 1447)
@@ -414,6 +414,11 @@
return IRExpr_Const(IRConst_U32(i));
}
=20
+static IRExpr* mkU64 ( ULong i )
+{
+ return IRExpr_Const(IRConst_U64(i));
+}
+
static IRExpr* loadBE ( IRType ty, IRExpr* data )
{
return IRExpr_Load(Iend_BE,ty,data);
@@ -437,6 +442,206 @@
unop(Iop_1Uto32, arg2)));
}
=20
+/* expand V128_8Ux16 to 2x V128_16Ux8's */
+static void expand8Ux16( IRExpr* vIn, /*OUTs*/ IRTemp* vEvn, IRTemp* vOd=
d )
+{
+ IRTemp ones8x16 =3D newTemp(Ity_V128);
+
+ vassert(typeOfIRExpr(irbb->tyenv, vIn) =3D=3D Ity_V128);
+ vassert(vEvn && *vEvn =3D=3D IRTemp_INVALID);
+ vassert(vOdd && *vOdd =3D=3D IRTemp_INVALID);
+ *vEvn =3D newTemp(Ity_V128);
+ *vOdd =3D newTemp(Ity_V128);
+
+ assign( ones8x16, unop(Iop_Dup8x16, mkU8(0x1)) );
+ assign( *vEvn, binop(Iop_MullEven8Ux16, mkexpr(ones8x16), vIn) );
+ assign( *vOdd, binop(Iop_MullEven8Ux16, mkexpr(ones8x16),=20
+ binop(Iop_ShlV128, vIn, mkU8(8))) );
+}
+
+/* expand V128_8Sx16 to 2x V128_16Sx8's */
+static void expand8Sx16( IRExpr* vIn, /*OUTs*/ IRTemp* vEvn, IRTemp* vOd=
d )
+{
+ IRTemp ones8x16 =3D newTemp(Ity_V128);
+
+ vassert(typeOfIRExpr(irbb->tyenv, vIn) =3D=3D Ity_V128);
+ vassert(vEvn && *vEvn =3D=3D IRTemp_INVALID);
+ vassert(vOdd && *vOdd =3D=3D IRTemp_INVALID);
+ *vEvn =3D newTemp(Ity_V128);
+ *vOdd =3D newTemp(Ity_V128);
+
+ assign( ones8x16, unop(Iop_Dup8x16, mkU8(0x1)) );
+ assign( *vEvn, binop(Iop_MullEven8Sx16, mkexpr(ones8x16), vIn) );
+ assign( *vOdd, binop(Iop_MullEven8Sx16, mkexpr(ones8x16),=20
+ binop(Iop_ShlV128, vIn, mkU8(8))) );
+}
+
+/* expand V128_16Uto8 to 2x V128_32Ux4's */
+static void expand16Ux8( IRExpr* vIn, /*OUTs*/ IRTemp* vEvn, IRTemp* vOd=
d )
+{
+ IRTemp ones16x8 =3D newTemp(Ity_V128);
+
+ vassert(typeOfIRExpr(irbb->tyenv, vIn) =3D=3D Ity_V128);
+ vassert(vEvn && *vEvn =3D=3D IRTemp_INVALID);
+ vassert(vOdd && *vOdd =3D=3D IRTemp_INVALID);
+ *vEvn =3D newTemp(Ity_V128);
+ *vOdd =3D newTemp(Ity_V128);
+
+ assign( ones16x8, unop(Iop_Dup16x8, mkU16(0x1)) );
+ assign( *vEvn, binop(Iop_MullEven16Ux8, mkexpr(ones16x8), vIn) );
+ assign( *vOdd, binop(Iop_MullEven16Ux8, mkexpr(ones16x8),=20
+ binop(Iop_ShlV128, vIn, mkU8(16))) );
+}
+
+/* expand V128_16Sto8 to 2x V128_32Sx4's */
+static void expand16Sx8( IRExpr* vIn, /*OUTs*/ IRTemp* vEvn, IRTemp* vOd=
d )
+{
+ IRTemp ones16x8 =3D newTemp(Ity_V128);
+
+ vassert(typeOfIRExpr(irbb->tyenv, vIn) =3D=3D Ity_V128);
+ vassert(vEvn && *vEvn =3D=3D IRTemp_INVALID);
+ vassert(vOdd && *vOdd =3D=3D IRTemp_INVALID);
+ *vEvn =3D newTemp(Ity_V128);
+ *vOdd =3D newTemp(Ity_V128);
+
+ assign( ones16x8, unop(Iop_Dup16x8, mkU16(0x1)) );
+ assign( *vEvn, binop(Iop_MullEven16Sx8, mkexpr(ones16x8), vIn) );
+ assign( *vOdd, binop(Iop_MullEven16Sx8, mkexpr(ones16x8),=20
+ binop(Iop_ShlV128, vIn, mkU8(16))) );
+}
+
+/* break V128 to 4xI32's, then sign-extend to I64's */
+static void breakV128to4x64S( IRExpr* t128,
+ /*OUTs*/
+ IRTemp* t3, IRTemp* t2,
+ IRTemp* t1, IRTemp* t0 )
+{
+ IRTemp hi64 =3D newTemp(Ity_I64);
+ IRTemp lo64 =3D newTemp(Ity_I64);
+
+ vassert(typeOfIRExpr(irbb->tyenv, t128) =3D=3D Ity_V128);
+ vassert(t0 && *t0 =3D=3D IRTemp_INVALID);
+ vassert(t1 && *t1 =3D=3D IRTemp_INVALID);
+ vassert(t2 && *t2 =3D=3D IRTemp_INVALID);
+ vassert(t3 && *t3 =3D=3D IRTemp_INVALID);
+ *t0 =3D newTemp(Ity_I64);
+ *t1 =3D newTemp(Ity_I64);
+ *t2 =3D newTemp(Ity_I64);
+ *t3 =3D newTemp(Ity_I64);
+
+ assign( hi64, unop(Iop_V128HIto64, t128) );
+ assign( lo64, unop(Iop_V128to64, t128) );
+ assign( *t3, unop(Iop_32Sto64, unop(Iop_64HIto32, mkexpr(hi64))) );
+ assign( *t2, unop(Iop_32Sto64, unop(Iop_64to32, mkexpr(hi64))) );
+ assign( *t1, unop(Iop_32Sto64, unop(Iop_64HIto32, mkexpr(lo64))) );
+ assign( *t0, unop(Iop_32Sto64, unop(Iop_64to32, mkexpr(lo64))) );
+}
+
+/* break V128 to 4xI32's, then zero-extend to I64's */
+static void breakV128to4x64U ( IRExpr* t128,
+ /*OUTs*/
+ IRTemp* t3, IRTemp* t2,
+ IRTemp* t1, IRTemp* t0 )
+{
+ IRTemp hi64 =3D newTemp(Ity_I64);
+ IRTemp lo64 =3D newTemp(Ity_I64);
+
+ vassert(typeOfIRExpr(irbb->tyenv, t128) =3D=3D Ity_V128);
+ vassert(t0 && *t0 =3D=3D IRTemp_INVALID);
+ vassert(t1 && *t1 =3D=3D IRTemp_INVALID);
+ vassert(t2 && *t2 =3D=3D IRTemp_INVALID);
+ vassert(t3 && *t3 =3D=3D IRTemp_INVALID);
+ *t0 =3D newTemp(Ity_I64);
+ *t1 =3D newTemp(Ity_I64);
+ *t2 =3D newTemp(Ity_I64);
+ *t3 =3D newTemp(Ity_I64);
+
+ assign( hi64, unop(Iop_V128HIto64, t128) );
+ assign( lo64, unop(Iop_V128to64, t128) );
+ assign( *t3, unop(Iop_32Uto64, unop(Iop_64HIto32, mkexpr(hi64))) );
+ assign( *t2, unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(hi64))) );
+ assign( *t1, unop(Iop_32Uto64, unop(Iop_64HIto32, mkexpr(lo64))) );
+ assign( *t0, unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(lo64))) );
+}
+
+/* Signed saturating narrow 64S to 32 */
+static IRExpr* mkQNarrow64Sto32 ( IRExpr* t64 )
+{
+ IRTemp hi32 =3D newTemp(Ity_I32);
+ IRTemp lo32 =3D newTemp(Ity_I32);
+
+ vassert(typeOfIRExpr(irbb->tyenv, t64) =3D=3D Ity_I64);
+
+ assign( hi32, unop(Iop_64HIto32, t64));
+ assign( lo32, unop(Iop_64to32, t64));
+
+ return IRExpr_Mux0X(
+ /* if (hi32 =3D=3D (lo32 >>s 31)) */
+ unop(Iop_1Uto8,
+ binop(Iop_CmpEQ32, mkexpr(hi32),
+ binop( Iop_Sar32, mkexpr(lo32), mkU8(31)))),
+ /* else: sign dep saturate: 1->0x80000000, 0->0x7FFFFFFF */
+ binop(Iop_Add32, mkU32(0x7FFFFFFF),
+ binop(Iop_Shr32, mkexpr(hi32), mkU8(31))),
+ /* then: within signed-32 range: lo half good enough */
+ mkexpr(lo32) );
+}
+
+/* Unsigned saturating narrow 64S to 32 */
+static IRExpr* mkQNarrow64Uto32 ( IRExpr* t64 )
+{
+ IRTemp hi32 =3D newTemp(Ity_I32);
+ IRTemp lo32 =3D newTemp(Ity_I32);
+
+ vassert(typeOfIRExpr(irbb->tyenv, t64) =3D=3D Ity_I64);
+
+ assign( hi32, unop(Iop_64HIto32, t64));
+ assign( lo32, unop(Iop_64to32, t64));
+
+ return IRExpr_Mux0X(
+ /* if (top 32 bits of t64 are 0) */
+ unop(Iop_1Uto8, binop(Iop_CmpEQ32, mkexpr(hi32), mkU32(0))),
+ /* else: positive saturate -> 0xFFFFFFFF */
+ mkU32(0xFFFFFFFF),
+ /* then: within unsigned-32 range: lo half good enough */
+ mkexpr(lo32) );
+}
+
+/* Signed saturate narrow 64->32, combining to V128 */
+static IRExpr* mkV128from4x64S ( IRExpr* t3, IRExpr* t2,
+ IRExpr* t1, IRExpr* t0 )
+{
+ vassert(typeOfIRExpr(irbb->tyenv, t3) =3D=3D Ity_I64);
+ vassert(typeOfIRExpr(irbb->tyenv, t2) =3D=3D Ity_I64);
+ vassert(typeOfIRExpr(irbb->tyenv, t1) =3D=3D Ity_I64);
+ vassert(typeOfIRExpr(irbb->tyenv, t0) =3D=3D Ity_I64);
+ return binop(Iop_64HLtoV128,
+ binop(Iop_32HLto64,
+ mkQNarrow64Sto32( t3 ),
+ mkQNarrow64Sto32( t2 )),
+ binop(Iop_32HLto64,
+ mkQNarrow64Sto32( t1 ),
+ mkQNarrow64Sto32( t0 )));
+}
+
+/* Unsigned saturate narrow 64->32, combining to V128 */
+static IRExpr* mkV128from4x64U ( IRExpr* t3, IRExpr* t2,
+ IRExpr* t1, IRExpr* t0 )
+{
+ vassert(typeOfIRExpr(irbb->tyenv, t3) =3D=3D Ity_I64);
+ vassert(typeOfIRExpr(irbb->tyenv, t2) =3D=3D Ity_I64);
+ vassert(typeOfIRExpr(irbb->tyenv, t1) =3D=3D Ity_I64);
+ vassert(typeOfIRExpr(irbb->tyenv, t0) =3D=3D Ity_I64);
+ return binop(Iop_64HLtoV128,
+ binop(Iop_32HLto64,
+ mkQNarrow64Uto32( t3 ),
+ mkQNarrow64Uto32( t2 )),
+ binop(Iop_32HLto64,
+ mkQNarrow64Uto32( t1 ),
+ mkQNarrow64Uto32( t0 )));
+}
+
+
static Int integerGuestRegOffset ( UInt archreg )
{
vassert(archreg < 32);
@@ -5155,6 +5360,20 @@
=20
IRTemp vA =3D newTemp(Ity_V128);
IRTemp vB =3D newTemp(Ity_V128);
+ IRTemp z3 =3D newTemp(Ity_I64);
+ IRTemp z2 =3D newTemp(Ity_I64);
+ IRTemp z1 =3D newTemp(Ity_I64);
+ IRTemp z0 =3D newTemp(Ity_I64);
+ IRTemp aEvn, aOdd;
+ IRTemp a15, a14, a13, a12, a11, a10, a9, a8;
+ IRTemp a7, a6, a5, a4, a3, a2, a1, a0;
+ IRTemp b3, b2, b1, b0;
+
+ aEvn =3D aOdd =3D IRTemp_INVALID;
+ a15 =3D a14 =3D a13 =3D a12 =3D a11 =3D a10 =3D a9 =3D a8 =3D IRTemp_=
INVALID;
+ a7 =3D a6 =3D a5 =3D a4 =3D a3 =3D a2 =3D a1 =3D a0 =3D IRTemp_INVALI=
D;
+ b3 =3D b2 =3D b1 =3D b0 =3D IRTemp_INVALID;
+
assign( vA, getVReg(vA_addr));
assign( vB, getVReg(vB_addr));
=20
@@ -5436,31 +5655,148 @@
=20
=20
/* Sum Across Partial */
- case 0x608: // vsum4ubs (Sum Partial (1/4) UB Saturate, AV p275)
+ case 0x608: { // vsum4ubs (Sum Partial (1/4) UB Saturate, AV p275)
+ IRTemp aEE, aEO, aOE, aOO;
+ aEE =3D aEO =3D aOE =3D aOO =3D IRTemp_INVALID;
DIP("vsum4ubs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
=20
- case 0x708: // vsum4sbs (Sum Partial (1/4) SB Saturate, AV p273)
+ /* vA: V128_8Ux16 -> 4 x V128_32Ux4, sign-extended */
+ expand8Ux16( mkexpr(vA), &aEvn, &aOdd ); // (15,13...),(14,12...)
+ expand16Ux8( mkexpr(aEvn), &aEE, &aEO ); // (15,11...),(13, 9...)
+ expand16Ux8( mkexpr(aOdd), &aOE, &aOO ); // (14,10...),(12, 8...)
+
+ /* break V128 to 4xI32's, zero-extending to I64's */
+ breakV128to4x64U( mkexpr(aEE), &a15, &a11, &a7, &a3 );
+ breakV128to4x64U( mkexpr(aOE), &a14, &a10, &a6, &a2 );
+ breakV128to4x64U( mkexpr(aEO), &a13, &a9, &a5, &a1 );
+ breakV128to4x64U( mkexpr(aOO), &a12, &a8, &a4, &a0 );
+ breakV128to4x64U( mkexpr(vB), &b3, &b2, &b1, &b0 );
+
+ /* add lanes */
+ assign( z3, binop(Iop_Add64, mkexpr(b3),
+ binop(Iop_Add64,
+ binop(Iop_Add64, mkexpr(a15), mkexpr(a14))=
,
+ binop(Iop_Add64, mkexpr(a13), mkexpr(a12))=
)) );
+ assign( z2, binop(Iop_Add64, mkexpr(b2),
+ binop(Iop_Add64,
+ binop(Iop_Add64, mkexpr(a11), mkexpr(a10))=
,
+ binop(Iop_Add64, mkexpr(a9), mkexpr(a8))))=
);
+ assign( z1, binop(Iop_Add64, mkexpr(b1),
+ binop(Iop_Add64,
+ binop(Iop_Add64, mkexpr(a7), mkexpr(a6)),
+ binop(Iop_Add64, mkexpr(a5), mkexpr(a4))))=
);
+ assign( z0, binop(Iop_Add64, mkexpr(b0),
+ binop(Iop_Add64,
+ binop(Iop_Add64, mkexpr(a3), mkexpr(a2)),
+ binop(Iop_Add64, mkexpr(a1), mkexpr(a0))))=
);
+ =20
+ /* saturate-narrow to 32bit, and combine to V128 */
+ putVReg( vD_addr, mkV128from4x64U( mkexpr(z3), mkexpr(z2),
+ mkexpr(z1), mkexpr(z0)) );
+ break;
+ }
+ case 0x708: { // vsum4sbs (Sum Partial (1/4) SB Saturate, AV p273)
+ IRTemp aEE, aEO, aOE, aOO;
+ aEE =3D aEO =3D aOE =3D aOO =3D IRTemp_INVALID;
DIP("vsum4sbs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
=20
- case 0x648: // vsum4shs (Sum Partial (1/4) SHW Saturate, AV p274)
+ /* vA: V128_8Sx16 -> 4 x V128_32Sx4, sign-extended */
+ expand8Sx16( mkexpr(vA), &aEvn, &aOdd ); // (15,13...),(14,12...)
+ expand16Sx8( mkexpr(aEvn), &aEE, &aEO ); // (15,11...),(13, 9...)
+ expand16Sx8( mkexpr(aOdd), &aOE, &aOO ); // (14,10...),(12, 8...)
+
+ /* break V128 to 4xI32's, sign-extending to I64's */
+ breakV128to4x64S( mkexpr(aEE), &a15, &a11, &a7, &a3 );
+ breakV128to4x64S( mkexpr(aOE), &a14, &a10, &a6, &a2 );
+ breakV128to4x64S( mkexpr(aEO), &a13, &a9, &a5, &a1 );
+ breakV128to4x64S( mkexpr(aOO), &a12, &a8, &a4, &a0 );
+ breakV128to4x64S( mkexpr(vB), &b3, &b2, &b1, &b0 );
+
+ /* add lanes */
+ assign( z3, binop(Iop_Add64, mkexpr(b3),
+ binop(Iop_Add64,
+ binop(Iop_Add64, mkexpr(a15), mkexpr(a14))=
,
+ binop(Iop_Add64, mkexpr(a13), mkexpr(a12))=
)) );
+ assign( z2, binop(Iop_Add64, mkexpr(b2),
+ binop(Iop_Add64,
+ binop(Iop_Add64, mkexpr(a11), mkexpr(a10))=
,
+ binop(Iop_Add64, mkexpr(a9), mkexpr(a8))))=
);
+ assign( z1, binop(Iop_Add64, mkexpr(b1),
+ binop(Iop_Add64,
+ binop(Iop_Add64, mkexpr(a7), mkexpr(a6)),
+ binop(Iop_Add64, mkexpr(a5), mkexpr(a4))))=
);
+ assign( z0, binop(Iop_Add64, mkexpr(b0),
+ binop(Iop_Add64,
+ binop(Iop_Add64, mkexpr(a3), mkexpr(a2)),
+ binop(Iop_Add64, mkexpr(a1), mkexpr(a0))))=
);
+ =20
+ /* saturate-narrow to 32bit, and combine to V128 */
+ putVReg( vD_addr, mkV128from4x64S( mkexpr(z3), mkexpr(z2),
+ mkexpr(z1), mkexpr(z0)) );
+ break;
+ }
+ case 0x648: { // vsum4shs (Sum Partial (1/4) SHW Saturate, AV p274)
DIP("vsum4shs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
=20
- case 0x688: // vsum2sws (Sum Partial (1/2) SW Saturate, AV p272)
+ /* vA: V128_16Sx8 -> 2 x V128_32Sx4, sign-extended */
+ expand16Sx8( mkexpr(vA), &aEvn, &aOdd ); // (7,5...),(6,4...)
+
+ /* break V128 to 4xI32's, sign-extending to I64's */
+ breakV128to4x64S( mkexpr(aEvn), &a7, &a5, &a3, &a1 );
+ breakV128to4x64S( mkexpr(aOdd), &a6, &a4, &a2, &a0 );
+ breakV128to4x64S( mkexpr(vB), &b3, &b2, &b1, &b0 );
+
+ /* add lanes */
+ assign( z3, binop(Iop_Add64, mkexpr(b3),
+ binop(Iop_Add64, mkexpr(a7), mkexpr(a6))));
+ assign( z2, binop(Iop_Add64, mkexpr(b2),
+ binop(Iop_Add64, mkexpr(a5), mkexpr(a4))));
+ assign( z1, binop(Iop_Add64, mkexpr(b1),
+ binop(Iop_Add64, mkexpr(a3), mkexpr(a2))));
+ assign( z0, binop(Iop_Add64, mkexpr(b0),
+ binop(Iop_Add64, mkexpr(a1), mkexpr(a0))));
+
+ /* saturate-narrow to 32bit, and combine to V128 */
+ putVReg( vD_addr, mkV128from4x64S( mkexpr(z3), mkexpr(z2),
+ mkexpr(z1), mkexpr(z0)) );
+ break;
+ }
+ case 0x688: { // vsum2sws (Sum Partial (1/2) SW Saturate, AV p272)
DIP("vsum2sws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
=20
- case 0x788: // vsumsws (Sum SW Saturate, AV p271)
+ /* break V128 to 4xI32's, sign-extending to I64's */
+ breakV128to4x64S( mkexpr(vA), &a3, &a2, &a1, &a0 );
+ breakV128to4x64S( mkexpr(vB), &b3, &b2, &b1, &b0 );
+
+ /* add lanes */
+ assign( z2, binop(Iop_Add64, mkexpr(b2),
+ binop(Iop_Add64, mkexpr(a3), mkexpr(a2))) );
+ assign( z0, binop(Iop_Add64, mkexpr(b0),
+ binop(Iop_Add64, mkexpr(a1), mkexpr(a0))) );
+
+ /* saturate-narrow to 32bit, and combine to V128 */
+ putVReg( vD_addr, mkV128from4x64S( mkU64(0), mkexpr(z2),
+ mkU64(0), mkexpr(z0)) );
+ break;
+ }
+ case 0x788: { // vsumsws (Sum SW Saturate, AV p271)
DIP("vsumsws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
=20
+ /* break V128 to 4xI32's, sign-extending to I64's */
+ breakV128to4x64S( mkexpr(vA), &a3, &a2, &a1, &a0 );
+ breakV128to4x64S( mkexpr(vB), &b3, &b2, &b1, &b0 );
+
+ /* add lanes */
+ assign( z0, binop(Iop_Add64, mkexpr(b0),
+ binop(Iop_Add64,
+ binop(Iop_Add64, mkexpr(a3), mkexpr(a2)),
+ binop(Iop_Add64, mkexpr(a1), mkexpr(a0))))=
);
+
+ /* saturate-narrow to 32bit, and combine to V128 */
+ putVReg( vD_addr, mkV128from4x64S( mkU64(0), mkU64(0),
+ mkU64(0), mkexpr(z0)) );
+ break;
+ }
default:
vex_printf("dis_av_arith(PPC32)(opc2=3D0x%x)\n", opc2);
return False;
@@ -5618,32 +5954,43 @@
UChar vC_addr =3D toUChar((theInstr >> 6) & 0x1F); /* theInstr[6:10=
] */
UChar opc2 =3D toUChar((theInstr >> 0) & 0x3F); /* theInstr[0:5]=
*/
=20
+ IRTemp vA =3D newTemp(Ity_V128);
+ IRTemp vB =3D newTemp(Ity_V128);
+ IRTemp vC =3D newTemp(Ity_V128);
IRTemp zeros =3D newTemp(Ity_V128);
- IRTemp vA =3D newTemp(Ity_V128);
- IRTemp vB =3D newTemp(Ity_V128);
- IRTemp vC =3D newTemp(Ity_V128);
+ IRTemp aLo =3D newTemp(Ity_V128);
+ IRTemp bLo =3D newTemp(Ity_V128);
+ IRTemp cLo =3D newTemp(Ity_V128);
+ IRTemp zLo =3D newTemp(Ity_V128);
+ IRTemp aHi =3D newTemp(Ity_V128);
+ IRTemp bHi =3D newTemp(Ity_V128);
+ IRTemp cHi =3D newTemp(Ity_V128);
+ IRTemp zHi =3D newTemp(Ity_V128);
+ IRTemp abEvn =3D newTemp(Ity_V128);
+ IRTemp abOdd =3D newTemp(Ity_V128);
+ IRTemp z3 =3D newTemp(Ity_I64);
+ IRTemp z2 =3D newTemp(Ity_I64);
+ IRTemp z1 =3D newTemp(Ity_I64);
+ IRTemp z0 =3D newTemp(Ity_I64);
+ IRTemp ab7, ab6, ab5, ab4, ab3, ab2, ab1, ab0;
+ IRTemp c3, c2, c1, c0;
+
+ ab7 =3D ab6 =3D ab5 =3D ab4 =3D ab3 =3D ab2 =3D ab1 =3D ab0 =3D IRTem=
p_INVALID;
+ c3 =3D c2 =3D c1 =3D c0 =3D IRTemp_INVALID;
+
assign( vA, getVReg(vA_addr));
assign( vB, getVReg(vB_addr));
assign( vC, getVReg(vC_addr));
+ assign( zeros, unop(Iop_Dup32x4, mkU32(0)) );
=20
if (opc1 !=3D 0x4) {
vex_printf("dis_av_multarith(PPC32)(instr)\n");
return False;
}
=20
- assign( zeros, unop(Iop_Dup32x4, mkU32(0)) );
-
switch (opc2) {
/* Multiply-Add */
case 0x20: { // vmhaddshs (Multiply High, Add Signed HW Saturate, AV =
p185)
- IRTemp aLo =3D newTemp(Ity_V128);
- IRTemp bLo =3D newTemp(Ity_V128);
- IRTemp cLo =3D newTemp(Ity_V128);
- IRTemp zLo =3D newTemp(Ity_V128);
- IRTemp aHi =3D newTemp(Ity_V128);
- IRTemp bHi =3D newTemp(Ity_V128);
- IRTemp cHi =3D newTemp(Ity_V128);
- IRTemp zHi =3D newTemp(Ity_V128);
IRTemp cSigns =3D newTemp(Ity_V128);
DIP("vmhaddshs v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_a=
ddr);
assign( cSigns, binop(Iop_CmpGT16Sx8, mkexpr(zeros), mkexpr(vC)) )=
;
@@ -5675,14 +6022,6 @@
}
case 0x21: { // vmhraddshs (Multiply High Round, Add Signed HW Satura=
te, AV p186)
IRTemp zKonst =3D newTemp(Ity_V128);
- IRTemp aLo =3D newTemp(Ity_V128);
- IRTemp bLo =3D newTemp(Ity_V128);
- IRTemp cLo =3D newTemp(Ity_V128);
- IRTemp zLo =3D newTemp(Ity_V128);
- IRTemp aHi =3D newTemp(Ity_V128);
- IRTemp bHi =3D newTemp(Ity_V128);
- IRTemp cHi =3D newTemp(Ity_V128);
- IRTemp zHi =3D newTemp(Ity_V128);
IRTemp cSigns =3D newTemp(Ity_V128);
DIP("vmhraddshs v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_=
addr);
assign( cSigns, binop(Iop_CmpGT16Sx8, mkexpr(zeros), mkexpr(vC)) )=
;
@@ -5694,8 +6033,8 @@
assign( cHi, binop(Iop_InterleaveHI16x8, mkexpr(cSigns), mkexpr(vC=
)) );
=20
/* shifting our const avoids store/load version of Dup */
- assign( zKonst, binop(Iop_ShlN32x4, unop(Iop_Dup32x4,
- mkU32(0x1)), mkU8(14)) );
+ assign( zKonst, binop(Iop_ShlN32x4, unop(Iop_Dup32x4, mkU32(0x1)),
+ mkU8(14)) );
=20
assign( zLo, binop(Iop_Add32x4,
binop(Iop_SarN32x4,
@@ -5719,14 +6058,6 @@
break;
}
case 0x22: { // vmladduhm (Multiply Low, Add Unsigned HW Modulo, AV p=
194)
- IRTemp aLo =3D newTemp(Ity_V128);
- IRTemp bLo =3D newTemp(Ity_V128);
- IRTemp cLo =3D newTemp(Ity_V128);
- IRTemp zLo =3D newTemp(Ity_V128);
- IRTemp aHi =3D newTemp(Ity_V128);
- IRTemp bHi =3D newTemp(Ity_V128);
- IRTemp cHi =3D newTemp(Ity_V128);
- IRTemp zHi =3D newTemp(Ity_V128);
DIP("vmladduhm v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_a=
ddr);
assign( aLo, binop(Iop_InterleaveLO16x8, mkexpr(zeros), mkexpr(vA)=
) );
assign( bLo, binop(Iop_InterleaveLO16x8, mkexpr(zeros), mkexpr(vB)=
) );
@@ -5751,78 +6082,138 @@
=20
/* Multiply-Sum */
case 0x24: { // vmsumubm (Multiply Sum Unsigned B Modulo, AV p204)
- IRTemp zKonst =3D newTemp(Ity_V128);
- IRTemp odd =3D newTemp(Ity_V128);
- IRTemp even =3D newTemp(Ity_V128);
- IRTemp odd_odd =3D newTemp(Ity_V128);
- IRTemp odd_even =3D newTemp(Ity_V128);
- IRTemp even_odd =3D newTemp(Ity_V128);
- IRTemp even_even =3D newTemp(Ity_V128);
+ IRTemp abEE, abEO, abOE, abOO;
+ abEE =3D abEO =3D abOE =3D abOO =3D IRTemp_INVALID;
DIP("vmsumubm v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_ad=
dr);
- assign( odd, binop(Iop_MullEven8Ux16,
- binop(Iop_ShlV128, mkexpr(vA), mkU8(8)),
- binop(Iop_ShlV128, mkexpr(vB), mkU8(8)) ));
- assign( even, binop(Iop_MullEven8Ux16, mkexpr(vA), mkexpr(vB)) );
- /* zKonst just used to separate the lanes out */
- assign( zKonst, unop(Iop_Dup16x8, mkU16(0x1)) );
=20
- assign( odd_odd, binop(Iop_MullEven16Ux8,
- binop(Iop_ShlV128, mkexpr(odd), mkU8(16))=
,
- binop(Iop_ShlV128, mkexpr(zKonst), mkU8(1=
6)) ));
- assign( odd_even, binop(Iop_MullEven16Ux8, mkexpr(odd), mkexpr(z=
Konst)) );
- assign( even_odd, binop(Iop_MullEven16Ux8,
- binop(Iop_ShlV128, mkexpr(even), mkU8(16)=
),
- binop(Iop_ShlV128, mkexpr(zKonst), mkU8(1=
6)) ));
- assign( even_even, binop(Iop_MullEven16Ux8, mkexpr(even), mkexpr(z=
Konst)) );
-
+ /* multiply vA,vB (unsigned, widening) */
+ assign( abEvn, binop(Iop_MullEven8Ux16, mkexpr(vA), mkexpr(vB)) );
+ assign( abOdd, binop(Iop_MullEven8Ux16,
+ binop(Iop_ShlV128, mkexpr(vA), mkU8(8)),
+ binop(Iop_ShlV128, mkexpr(vB), mkU8(8)) ));
+ =20
+ /* evn,odd: V128_16Ux8 -> 2 x V128_32Ux4, zero-extended */
+ expand16Ux8( mkexpr(abEvn), &abEE, &abEO );
+ expand16Ux8( mkexpr(abOdd), &abOE, &abOO );
+ =20
putVReg( vD_addr,
binop(Iop_Add32x4, mkexpr(vC),
binop(Iop_Add32x4,
- binop(Iop_Add32x4, mkexpr(odd_even), mkexpr(o=
dd_odd)),
- binop(Iop_Add32x4, mkexpr(even_even), mkexpr(=
even_odd)))) );
+ binop(Iop_Add32x4, mkexpr(abEE), mkexpr(abEO)=
),
+ binop(Iop_Add32x4, mkexpr(abOE), mkexpr(abOO)=
))) );
break;
}
- case 0x25: // vmsummbm (Multiply Sum Mixed-Sign B Modulo, AV p201)
+ case 0x25: { // vmsummbm (Multiply Sum Mixed-Sign B Modulo, AV p201)
+ IRTemp aEvn, aOdd, bEvn, bOdd;
+ IRTemp abEE =3D newTemp(Ity_V128);
+ IRTemp abEO =3D newTemp(Ity_V128);
+ IRTemp abOE =3D newTemp(Ity_V128);
+ IRTemp abOO =3D newTemp(Ity_V128);
+ aEvn =3D aOdd =3D bEvn =3D bOdd =3D IRTemp_INVALID;
DIP("vmsummbm v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_ad=
dr);
- DIP(" =3D> not implemented\n");
- return False;
=20
+ /* sign-extend vA, zero-extend vB, for mixed-sign multiply
+ (separating out adjacent lanes to different vectors) */
+ expand8Sx16( mkexpr(vA), &aEvn, &aOdd );
+ expand8Ux16( mkexpr(vB), &bEvn, &bOdd );
+
+ /* multiply vA, vB, again separating adjacent lanes */
+ assign( abEE, binop(Iop_MullEven16Sx8, mkexpr(aEvn), mkexpr(bEvn) =
));
+ assign( abEO, binop(Iop_MullEven16Sx8,
+ binop(Iop_ShlV128, mkexpr(aEvn), mkU8(16)),
+ binop(Iop_ShlV128, mkexpr(bEvn), mkU8(16)) ));
+ assign( abOE, binop(Iop_MullEven16Sx8, mkexpr(aOdd), mkexpr(bOdd) =
));
+ assign( abOO, binop(Iop_MullEven16Sx8,
+ binop(Iop_ShlV128, mkexpr(aOdd), mkU8(16)),
+ binop(Iop_ShlV128, mkexpr(bOdd), mkU8(16)) ));
+
+ /* add results together, + vC */
+ putVReg( vD_addr,
+ binop(Iop_QAdd32Sx4, mkexpr(vC),
+ binop(Iop_QAdd32Sx4,
+ binop(Iop_QAdd32Sx4, mkexpr(abEE), mkexpr(abE=
O)),
+ binop(Iop_QAdd32Sx4, mkexpr(abOE), mkexpr(abO=
O)) )));
+ break;
+ }
case 0x26: { // vmsumuhm (Multiply Sum Unsigned HW Modulo, AV p205)
- IRTemp odd =3D newTemp(Ity_V128);
- IRTemp even =3D newTemp(Ity_V128);
DIP("vmsumuhm v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_ad=
dr);
- assign( odd, binop(Iop_MullEven16Ux8,
- binop(Iop_ShlV128, mkexpr(vA), mkU8(16)),
- binop(Iop_ShlV128, mkexpr(vB), mkU8(16)) ));
- assign( even, binop(Iop_MullEven16Ux8, mkexpr(vA), mkexpr(vB)) );
+ assign( abEvn, binop(Iop_MullEven16Ux8, mkexpr(vA), mkexpr(vB)) );
+ assign( abOdd, binop(Iop_MullEven16Ux8,
+ binop(Iop_ShlV128, mkexpr(vA), mkU8(16)),
+ binop(Iop_ShlV128, mkexpr(vB), mkU8(16)) ));
putVReg( vD_addr,
binop(Iop_Add32x4, mkexpr(vC),
- binop(Iop_Add32x4, mkexpr(odd), mkexpr(even))) );
+ binop(Iop_Add32x4, mkexpr(abEvn), mkexpr(abOdd))) )=
;
break;
}
- case 0x27: // vmsumuhs (Multiply Sum Unsigned HW Saturate, AV p206)
+ case 0x27: { // vmsumuhs (Multiply Sum Unsigned HW Saturate, AV p206)
DIP("vmsumuhs v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_ad=
dr);
- DIP(" =3D> not implemented\n");
- return False;
+ /* widening multiply, separating lanes */
+ assign( abEvn, binop(Iop_MullEven16Ux8, mkexpr(vA), mkexpr(vB)) );
+ assign( abOdd, binop(Iop_MullEven16Ux8,
+ binop(Iop_ShlV128, mkexpr(vA), mkU8(16)),
+ binop(Iop_ShlV128, mkexpr(vB), mkU8(16))) );
=20
+ /* break V128 to 4xI32's, zero-extending to I64's */
+ breakV128to4x64U( mkexpr(abEvn), &ab7, &ab5, &ab3, &ab1 );
+ breakV128to4x64U( mkexpr(abOdd), &ab6, &ab4, &ab2, &ab0 );
+ breakV128to4x64U( mkexpr(vC), &c3, &c2, &c1, &c0 );
+
+ /* add lanes */
+ assign( z3, binop(Iop_Add64, mkexpr(c3),
+ binop(Iop_Add64, mkexpr(ab7), mkexpr(ab6))));
+ assign( z2, binop(Iop_Add64, mkexpr(c2),
+ binop(Iop_Add64, mkexpr(ab5), mkexpr(ab4))));
+ assign( z1, binop(Iop_Add64, mkexpr(c1),
+ binop(Iop_Add64, mkexpr(ab3), mkexpr(ab2))));
+ assign( z0, binop(Iop_Add64, mkexpr(c0),
+ binop(Iop_Add64, mkexpr(ab1), mkexpr(ab0))));
+
+ /* saturate-narrow to 32bit, and combine to V128 */
+ putVReg( vD_addr, mkV128from4x64U( mkexpr(z3), mkexpr(z2),
+ mkexpr(z1), mkexpr(z0)) );
+
+ break;
+ }
case 0x28: { // vmsumshm (Multiply Sum Signed HW Modulo, AV p202)
- IRTemp odd =3D newTemp(Ity_V128);
- IRTemp even =3D newTemp(Ity_V128);
DIP("vmsumshm v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_ad=
dr);
- assign( odd, binop(Iop_MullEven16Sx8,
- binop(Iop_ShlV128, mkexpr(vA), mkU8(16)),
- binop(Iop_ShlV128, mkexpr(vB), mkU8(16)) ));
- assign( even, binop(Iop_MullEven16Sx8, mkexpr(vA), mkexpr(vB)) );
+ assign( abEvn, binop(Iop_MullEven16Sx8, mkexpr(vA), mkexpr(vB)) );
+ assign( abOdd, binop(Iop_MullEven16Sx8,
+ binop(Iop_ShlV128, mkexpr(vA), mkU8(16)),
+ binop(Iop_ShlV128, mkexpr(vB), mkU8(16)) ));
putVReg( vD_addr,
binop(Iop_Add32x4, mkexpr(vC),
- binop(Iop_Add32x4, mkexpr(odd), mkexpr(even))) );
+ binop(Iop_Add32x4, mkexpr(abOdd), mkexpr(abEvn))) )=
;
break;
}
- case 0x29: // vmsumshs (Multiply Sum Signed HW Saturate, AV p203)
+ case 0x29: { // vmsumshs (Multiply Sum Signed HW Saturate, AV p203)
DIP("vmsumshs v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_ad=
dr);
- DIP(" =3D> not implemented\n");
- return False;
+ /* widening multiply, separating lanes */
+ assign( abEvn, binop(Iop_MullEven16Sx8, mkexpr(vA), mkexpr(vB)) );
+ assign( abOdd, binop(Iop_MullEven16Sx8,
+ binop(Iop_ShlV128, mkexpr(vA), mkU8(16)),
+ binop(Iop_ShlV128, mkexpr(vB), mkU8(16))) );
=20
+ /* break V128 to 4xI32's, sign-extending to I64's */
+ breakV128to4x64S( mkexpr(abEvn), &ab7, &ab5, &ab3, &ab1 );
+ breakV128to4x64S( mkexpr(abOdd), &ab6, &ab4, &ab2, &ab0 );
+ breakV128to4x64S( mkexpr(vC), &c3, &c2, &c1, &c0 );
+
+ /* add lanes */
+ assign( z3, binop(Iop_Add64, mkexpr(c3),
+ binop(Iop_Add64, mkexpr(ab7), mkexpr(ab6))));
+ assign( z2, binop(Iop_Add64, mkexpr(c2),
+ binop(Iop_Add64, mkexpr(ab5), mkexpr(ab4))));
+ assign( z1, binop(Iop_Add64, mkexpr(c1),
+ binop(Iop_Add64, mkexpr(ab3), mkexpr(ab2))));
+ assign( z0, binop(Iop_Add64, mkexpr(c0),
+ binop(Iop_Add64, mkexpr(ab1), mkexpr(ab0))));
+
+ /* saturate-narrow to 32bit, and combine to V128 */
+ putVReg( vD_addr, mkV128from4x64S( mkexpr(z3), mkexpr(z2),
+ mkexpr(z1), mkexpr(z0)) );
+ break;
+ }
default:
vex_printf("dis_av_multarith(PPC32)(opc2)\n");
return False;
Modified: trunk/priv/host-ppc32/hdefs.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-ppc32/hdefs.c 2005-11-07 15:37:24 UTC (rev 1446)
+++ trunk/priv/host-ppc32/hdefs.c 2005-11-08 16:23:07 UTC (rev 1447)
@@ -721,6 +721,17 @@
i->Pin.Alu32.srcR =3D srcR;
return i;
}
+PPC32Instr* PPC32Instr_AddSubC32 ( Bool isAdd, Bool setC,
+ HReg dst, HReg srcL, HReg srcR ) {
+ PPC32Instr* i =3D LibVEX_Alloc(sizeof(PPC32Instr));
+ i->tag =3D Pin_AddSubC32;
+ i->Pin.AddSubC32.isAdd =3D isAdd;
+ i->Pin.AddSubC32.setC =3D setC;
+ i->Pin.AddSubC32.dst =3D dst;
+ i->Pin.AddSubC32.srcL =3D srcL;
+ i->Pin.AddSubC32.srcR =3D srcR;
+ return i;
+}
PPC32Instr* PPC32Instr_Cmp32 ( Bool syned, UInt crfD,=20
HReg srcL, PPC32RH* srcR ) {
PPC32Instr* i =3D LibVEX_Alloc(sizeof(PPC32Instr));
@@ -1080,6 +1091,16 @@
ppPPC32RH(i->Pin.Alu32.srcR);
}
return;
+ case Pin_AddSubC32:
+ vex_printf("%s%s ",
+ i->Pin.AddSubC32.isAdd ? "add" : "sub",
+ i->Pin.AddSubC32.setC ? "c" : "e");
+ ppHRegPPC32(i->Pin.AddSubC32.dst);
+ vex_printf(",");
+ ppHRegPPC32(i->Pin.AddSubC32.srcL);
+ vex_printf(",");
+ ppHRegPPC32(i->Pin.AddSubC32.srcR);
+ return;
case Pin_Cmp32:
vex_printf("%s%s %%cr%u,",
i->Pin.Cmp32.syned ? "cmp" : "cmpl",
@@ -1469,6 +1490,11 @@
addRegUsage_PPC32RH(u, i->Pin.Alu32.srcR);
addHRegUse(u, HRmWrite, i->Pin.Alu32.dst);
return;
+ case Pin_AddSubC32:
+ addHRegUse(u, HRmWrite, i->Pin.AddSubC32.dst);
+ addHRegUse(u, HRmRead, i->Pin.AddSubC32.srcL);
+ addHRegUse(u, HRmRead, i->Pin.AddSubC32.srcR);
+ return;
case Pin_Cmp32:
addHRegUse(u, HRmRead, i->Pin.Cmp32.srcL);
addRegUsage_PPC32RH(u, i->Pin.Cmp32.srcR);
@@ -1694,6 +1720,11 @@
mapReg(m, &i->Pin.Alu32.srcL);
mapRegs_PPC32RH(m, i->Pin.Alu32.srcR);
return;
+ case Pin_AddSubC32:
+ mapReg(m, &i->Pin.AddSubC32.dst);
+ mapReg(m, &i->Pin.AddSubC32.srcL);
+ mapReg(m, &i->Pin.AddSubC32.srcR);
+ return;
case Pin_Cmp32:
mapReg(m, &i->Pin.Cmp32.srcL);
mapRegs_PPC32RH(m, i->Pin.Cmp32.srcR);
@@ -2346,6 +2377,28 @@
goto done;
}
=20
+ case Pin_AddSubC32: {
+ Bool isAdd =3D i->Pin.AddSubC32.isAdd;
+ Bool setC =3D i->Pin.AddSubC32.setC;
+ UInt r_srcL =3D iregNo(i->Pin.AddSubC32.srcL);
+ UInt r_srcR =3D iregNo(i->Pin.AddSubC32.srcR);
+ UInt r_dst =3D iregNo(i->Pin.AddSubC32.dst);
+ =20
+ if (isAdd) {
+ if (setC) /* addc (PPC32 p348) */
+ p =3D mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 10, 0);
+ else /* adde (PPC32 p349) */
+ p =3D mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 138, 0);
+ } else {
+ /* subfX, with args the "wrong" way round */
+ if (setC) /* subfc (PPC32 p538) */
+ p =3D mkFormXO(p, 31, r_dst, r_srcR, r_srcL, 0, 8, 0);
+ else /* subfe (PPC32 p539) */
+ p =3D mkFormXO(p, 31, r_dst, r_srcR, r_srcL, 0, 136, 0);
+ }
+ goto done;
+ }
+
case Pin_Cmp32: {
Bool syned =3D i->Pin.Cmp32.syned;
UInt fld1 =3D i->Pin.Cmp32.crfD << 2;
Modified: trunk/priv/host-ppc32/hdefs.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-ppc32/hdefs.h 2005-11-07 15:37:24 UTC (rev 1446)
+++ trunk/priv/host-ppc32/hdefs.h 2005-11-08 16:23:07 UTC (rev 1447)
@@ -419,6 +419,7 @@
enum {
Pin_LI32, /* load 32-bit immediate (fake insn) */
Pin_Alu32, /* 32-bit add/sub/and/or/xor/shl/shr/sar */
+ Pin_AddSubC32, /* 32-bit add/sub with read/write carry */
Pin_Cmp32, /* 32-bit compare */
Pin_Unary32, /* 32-bit not, neg, clz */
Pin_MulL, /* widening multiply */
@@ -491,6 +492,14 @@
HReg srcL;
PPC32RH* srcR;
} Alu32;
+ /* */
+ struct {
+ Bool isAdd; /* else sub */
+ Bool setC; /* else read carry */
+ HReg dst;
+ HReg srcL;
+ HReg srcR;
+ } AddSubC32;
/* If signed, the immediate, if it exists, is a signed 16,
else it is an unsigned 16. */
struct {
@@ -710,6 +719,7 @@
=20
extern PPC32Instr* PPC32Instr_LI32 ( HReg, UInt );
extern PPC32Instr* PPC32Instr_Alu32 ( PPC32AluOp, HReg, HReg, PPC32=
RH* );
+extern PPC32Instr* PPC32Instr_AddSubC32 ( Bool, Bool, HReg, HReg, HReg =
);
extern PPC32Instr* PPC32Instr_Cmp32 ( Bool, UInt, HReg, PPC32=
RH* );
extern PPC32Instr* PPC32Instr_Unary32 ( PPC32UnaryOp op, HReg dst, HR=
eg src );
extern PPC32Instr* PPC32Instr_MulL ( Bool syned, Bool hi32, HReg, =
HReg, HReg );
Modified: trunk/priv/host-ppc32/isel.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-ppc32/isel.c 2005-11-07 15:37:24 UTC (rev 1446)
+++ trunk/priv/host-ppc32/isel.c 2005-11-08 16:23:07 UTC (rev 1447)
@@ -1926,20 +1926,20 @@
vassert(e);
vassert(typeOfIRExpr(env->type_env,e) =3D=3D Ity_I64);
=20
-//.. /* 64-bit literal */
-//.. if (e->tag =3D=3D Iex_Const) {
-//.. ULong w64 =3D e->Iex.Const.con->Ico.U64;
-//.. UInt wHi =3D ((UInt)(w64 >> 32)) & 0xFFFFFFFF;
-//.. UInt wLo =3D ((UInt)w64) & 0xFFFFFFFF;
-//.. HReg tLo =3D newVRegI(env);
-//.. HReg tHi =3D newVRegI(env);
-//.. vassert(e->Iex.Const.con->tag =3D=3D Ico_U64);
-//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi)=
);
-//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo)=
);
-//.. *rHi =3D tHi;
-//.. *rLo =3D tLo;
-//.. return;
-//.. }
+ /* 64-bit literal */
+ if (e->tag =3D=3D Iex_Const) {
+ ULong w64 =3D e->Iex.Const.con->Ico.U64;
+ UInt wHi =3D ((UInt)(w64 >> 32)) & 0xFFFFFFFF;
+ UInt wLo =3D ((UInt)w64) & 0xFFFFFFFF;
+ HReg tLo =3D newVRegI(env);
+ HReg tHi =3D newVRegI(env);
+ vassert(e->Iex.Const.con->tag =3D=3D Ico_U64);
+ addInstr(env, PPC32Instr_LI32(tHi, wHi));
+ addInstr(env, PPC32Instr_LI32(tLo, wLo));
+ *rHi =3D tHi;
+ *rLo =3D tLo;
+ return;
+ }
=20
/* read 64-bit IRTemp */
if (e->tag =3D=3D Iex_Tmp) {
@@ -2070,39 +2070,34 @@
: e->Iex.Binop.op=3D=3DIop_And64 ? Palu_AND
: Palu_XOR;
iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
- addInstr(env, mk_iMOVds_RR(tHi, xHi));
- addInstr(env, mk_iMOVds_RR(tLo, xLo));
iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
- addInstr(env, PPC32Instr_Alu32(op, tHi, tHi, PPC32RH_Reg(yHi=
)));
- addInstr(env, PPC32Instr_Alu32(op, tLo, tHi, PPC32RH_Reg(yLo=
)));
+ addInstr(env, PPC32Instr_Alu32(op, tHi, xHi, PPC32RH_Reg(yHi=
)));
+ addInstr(env, PPC32Instr_Alu32(op, tLo, xLo, PPC32RH_Reg(yLo=
)));
*rHi =3D tHi;
*rLo =3D tLo;
return;
}
=20
-//.. /* Add64/Sub64 */
-//.. case Iop_Add64:
+ /* Add64/Sub64 */
+ case Iop_Add64: {
//.. case Iop_Sub64: {
-//.. HReg xLo, xHi, yLo, yHi;
-//.. HReg tLo =3D newVRegI(env);
-//.. HReg tHi =3D newVRegI(env);
-//.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
-//.. addInstr(env, mk_iMOVsd_RR(xHi, tHi));
-//.. addInstr(env, mk_iMOVsd_RR(xLo, tLo));
-//.. iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
+ HReg xLo, xHi, yLo, yHi;
+ HReg tLo =3D newVRegI(env);
+ HReg tHi =3D newVRegI(env);
+ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
+ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
//.. if (e->Iex.Binop.op=3D=3DIop_Add64) {
-//.. addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(y=
Lo), tLo));
-//.. addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(y=
Hi), tHi));
-//.. } else {
-//.. addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(y=
Lo), tLo));
-//.. addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(y=
Hi), tHi));
+ addInstr(env, PPC32Instr_AddSubC32( True/*add*/, True /*set =
carry*/,
+ tLo, xLo, yLo));
+ addInstr(env, PPC32Instr_AddSubC32( True/*add*/, False/*read=
carry*/,
+ tHi, xHi, yHi));
+//.. } else { // Sub64
//.. }
-//.. *rHi =3D tHi;
-//.. *rLo =3D tLo;
-//.. return;
-//.. }
+ *rHi =3D tHi;
+ *rLo =3D tLo;
+ return;
+ }
=20
-
/* 32HLto64(e1,e2) */
case Iop_32HLto64:
*rHi =3D iselIntExpr_R(env, e->Iex.Binop.arg1);
@@ -2401,18 +2396,15 @@
if (e->tag =3D=3D Iex_Unop) {
switch (e->Iex.Unop.op) {
=20
-//.. /* 32Sto64(e) */
-//.. case Iop_32Sto64: {
-//.. HReg tLo =3D newVRegI(env);
-//.. HReg tHi =3D newVRegI(env);
-//.. HReg src =3D iselIntExpr_R(env, e->Iex.Unop.arg);
-//.. addInstr(env, mk_iMOVsd_RR(src,tHi));
-//.. addInstr(env, mk_iMOVsd_RR(src,tLo));
-//.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, X86RM_Reg(tHi)=
));
-//.. *rHi =3D tHi;
-//.. *rLo =3D tLo;
-//.. return;
-//.. }
+ /* 32Sto64(e) */
+ case Iop_32Sto64: {
+ HReg tHi =3D newVRegI(env);
+ HReg src =3D iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, PPC32Instr_Alu32(Palu_SAR, tHi, src, PPC32RH_I=
mm(False,31)));
+ *rHi =3D tHi;
+ *rLo =3D src;
+ return;
+ }
=20
/* 32Uto64(e) */
case Iop_32Uto64: {
|