|
From: <sv...@va...> - 2005-09-15 12:42:34
|
Author: cerion
Date: 2005-09-15 13:42:16 +0100 (Thu, 15 Sep 2005)
New Revision: 1395
Log:
Implemented simple AltiVec arithmetic insns:
- add, sub, max, min, avg, hi/lo mul
and all varieties thereof: (un)signed, (un)saturated, 8|16|32 lane siz=
e...
fixed backend hi/lo_mul: only valid for 16|32 bit lanes, not 8.
Modified:
trunk/priv/guest-ppc32/toIR.c
trunk/priv/host-ppc32/hdefs.c
trunk/priv/host-ppc32/isel.c
Modified: trunk/priv/guest-ppc32/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-ppc32/toIR.c 2005-09-14 22:59:26 UTC (rev 1394)
+++ trunk/priv/guest-ppc32/toIR.c 2005-09-15 12:42:16 UTC (rev 1395)
@@ -5068,247 +5068,264 @@
/* Add */
case 0x180: { // vaddcuw (Add Carryout Unsigned Word, AV p136)
DIP("vaddcuw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- /* ov =3D x >u (x+y) */
- IRTemp sum =3D newTemp(Ity_V128);
- assign( sum, binop(Iop_Add32x4, mkexpr(vA), mkexpr(vB)) );
+ /* unsigned_ov(x+y) =3D (y >u not(x)) */
putVReg( vD_addr, binop(Iop_ShrN32x4,
- binop(Iop_CmpGT32Ux4, mkexpr(vA), mkexpr(s=
um)),
+ binop(Iop_CmpGT32Ux4, mkexpr(vB),
+ unop(Iop_NotV128, mkexpr(vA))),
mkU8(31)) );
break;
}
case 0x000: // vaddubm (Add Unsigned Byte Modulo, AV p141)
DIP("vaddubm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
- =20
+ putVReg( vD_addr, binop(Iop_Add8x16, mkexpr(vA), mkexpr(vB)) );
+ break;
+
case 0x040: // vadduhm (Add Unsigned Half Word Modulo, AV p143)
DIP("vadduhm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
- =20
+ putVReg( vD_addr, binop(Iop_Add16x8, mkexpr(vA), mkexpr(vB)) );
+ break;
+
case 0x080: // vadduwm (Add Unsigned Word Modulo, AV p145)
DIP("vadduwm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
- =20
+ putVReg( vD_addr, binop(Iop_Add32x4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
case 0x200: // vaddubs (Add Unsigned Byte Saturate, AV p142)
DIP("vaddubs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
- =20
+ putVReg( vD_addr, binop(Iop_QAdd8Ux16, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT], perhaps via new primop: Iop_SatOfQAdd8Ux16
+ break;
+
case 0x240: // vadduhs (Add Unsigned Half Word Saturate, AV p144)
DIP("vadduhs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
- =20
+ putVReg( vD_addr, binop(Iop_QAdd16Ux8, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
+
case 0x280: // vadduws (Add Unsigned Word Saturate, AV p146)
DIP("vadduws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
- =20
+ putVReg( vD_addr, binop(Iop_QAdd32Ux4, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
+
case 0x300: // vaddsbs (Add Signed Byte Saturate, AV p138)
DIP("vaddsbs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
- =20
+ putVReg( vD_addr, binop(Iop_QAdd8Sx16, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
+
case 0x340: // vaddshs (Add Signed Half Word Saturate, AV p139)
DIP("vaddshs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
- =20
+ putVReg( vD_addr, binop(Iop_QAdd16Sx8, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
+
case 0x380: // vaddsws (Add Signed Word Saturate, AV p140)
DIP("vaddsws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
- =20
+ putVReg( vD_addr, binop(Iop_QAdd32Sx4, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
+
+
/* Subtract */
- case 0x580: // vsubcuw (Subtract Carryout Unsigned Word, AV p260)
+ case 0x580: { // vsubcuw (Subtract Carryout Unsigned Word, AV p260)
DIP("vsubcuw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
- =20
+ /* unsigned_ov(x-y) =3D (y >u x) */
+ putVReg( vD_addr, binop(Iop_ShrN32x4,
+ unop(Iop_NotV128,
+ binop(Iop_CmpGT32Ux4, mkexpr(vB),
+ mkexpr(vA))),
+ mkU8(31)) );
+ break;
+ } =20
case 0x400: // vsububm (Subtract Unsigned Byte Modulo, AV p265)
DIP("vsububm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
- =20
+ putVReg( vD_addr, binop(Iop_Sub8x16, mkexpr(vA), mkexpr(vB)) );
+ break;
+
case 0x440: // vsubuhm (Subtract Unsigned Half Word Modulo, AV p267)
DIP("vsubuhm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
- =20
+ putVReg( vD_addr, binop(Iop_Sub16x8, mkexpr(vA), mkexpr(vB)) );
+ break;
+
case 0x480: // vsubuwm (Subtract Unsigned Word Modulo, AV p269)
DIP("vsubuwm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
- =20
+ putVReg( vD_addr, binop(Iop_Sub32x4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
case 0x600: // vsububs (Subtract Unsigned Byte Saturate, AV p266)
DIP("vsububs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
- =20
+ putVReg( vD_addr, binop(Iop_QSub8Ux16, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
+
case 0x640: // vsubuhs (Subtract Unsigned Half Word Saturate, AV p268=
)
DIP("vsubuhs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
- =20
+ putVReg( vD_addr, binop(Iop_QSub16Ux8, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
+
case 0x680: // vsubuws (Subtract Unsigned Word Saturate, AV p270)
DIP("vsubuws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
- =20
+ putVReg( vD_addr, binop(Iop_QSub32Ux4, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
+
case 0x700: // vsubsbs (Subtract Signed Byte Saturate, AV p262)
DIP("vsubsbs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
- =20
+ putVReg( vD_addr, binop(Iop_QSub8Sx16, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
+
case 0x740: // vsubshs (Subtract Signed Half Word Saturate, AV p263)
DIP("vsubshs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
- =20
+ putVReg( vD_addr, binop(Iop_QSub16Sx8, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
+
case 0x780: // vsubsws (Subtract Signed Word Saturate, AV p264)
DIP("vsubsws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_QSub32Sx4, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
=20
=20
/* Maximum */
case 0x002: // vmaxub (Maximum Unsigned Byte, AV p182)
DIP("vmaxub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_Max8Ux16, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x042: // vmaxuh (Maximum Unsigned Half Word, AV p183)
DIP("vmaxuh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_Max16Ux8, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x082: // vmaxuw (Maximum Unsigned Word, AV p184)
DIP("vmaxuw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_Max32Ux4, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x102: // vmaxsb (Maximum Signed Byte, AV p179)
DIP("vmaxsb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_Max8Sx16, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x142: // vmaxsh (Maximum Signed Half Word, AV p180)
DIP("vmaxsh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_Max16Sx8, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x182: // vmaxsw (Maximum Signed Word, AV p181)
DIP("vmaxsw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_Max32Sx4, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
=20
/* Minimum */
case 0x202: // vminub (Minimum Unsigned Byte, AV p191)
DIP("vminub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_Min8Ux16, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x242: // vminuh (Minimum Unsigned Half Word, AV p192)
DIP("vminuh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_Min16Ux8, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x282: // vminuw (Minimum Unsigned Word, AV p193)
DIP("vminuw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_Min32Ux4, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x302: // vminsb (Minimum Signed Byte, AV p188)
DIP("vminsb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_Min8Sx16, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x342: // vminsh (Minimum Signed Half Word, AV p189)
DIP("vminsh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_Min16Sx8, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x382: // vminsw (Minimum Signed Word, AV p190)
DIP("vminsw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
- =20
+ putVReg( vD_addr, binop(Iop_Min32Sx4, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
+
/* Average */
case 0x402: // vavgub (Average Unsigned Byte, AV p152)
DIP("vavgub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_Avg8Ux16, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x442: // vavguh (Average Unsigned Half Word, AV p153)
DIP("vavguh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_Avg16Ux8, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x482: // vavguw (Average Unsigned Word, AV p154)
DIP("vavguw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_Avg32Ux4, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x502: // vavgsb (Average Signed Byte, AV p149)
DIP("vavgsb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_Avg8Sx16, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x542: // vavgsh (Average Signed Half Word, AV p150)
DIP("vavgsh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_Avg16Sx8, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x582: // vavgsw (Average Signed Word, AV p151)
DIP("vavgsw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_Avg32Sx4, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
=20
/* Multiply */
case 0x008: // vmuloub (Multiply Odd Unsigned Byte, AV p213)
DIP("vmuloub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_MulLo16Ux8, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x048: // vmulouh (Multiply Odd Unsigned Half Word, AV p214)
DIP("vmulouh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_MulLo32Ux4, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x108: // vmulosb (Multiply Odd Signed Byte, AV p211)
DIP("vmulosb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_MulLo16Sx8, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x148: // vmulosh (Multiply Odd Signed Half Word, AV p212)
DIP("vmulosh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_MulLo32Sx4, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x208: // vmuleub (Multiply Even Unsigned Byte, AV p209)
DIP("vmuleub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_MulHi16Ux8, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x248: // vmuleuh (Multiply Even Unsigned Half Word, AV p210)
DIP("vmuleuh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_MulHi32Ux4, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x308: // vmulesb (Multiply Even Signed Byte, AV p207)
DIP("vmulesb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_MulHi16Sx8, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x348: // vmulesh (Multiply Even Signed Half Word, AV p208)
DIP("vmulesh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_MulHi32Sx4, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
=20
/* Sum Across Partial */
Modified: trunk/priv/host-ppc32/hdefs.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-ppc32/hdefs.c 2005-09-14 22:59:26 UTC (rev 1394)
+++ trunk/priv/host-ppc32/hdefs.c 2005-09-15 12:42:16 UTC (rev 1395)
@@ -2902,11 +2902,6 @@
case Pav_SUBUS: opc2 =3D 1536; break; // vsububs
case Pav_SUBSS: opc2 =3D 1792; break; // vsubsbs
=20
- case Pav_OMULU: opc2 =3D 8; break; // vmuloub
- case Pav_OMULS: opc2 =3D 264; break; // vmulosb
- case Pav_EMULU: opc2 =3D 520; break; // vmuleub
- case Pav_EMULS: opc2 =3D 776; break; // vmulesb
-
case Pav_AVGU: opc2 =3D 1026; break; // vavgub
case Pav_AVGS: opc2 =3D 1282; break; // vavgsb
case Pav_MAXU: opc2 =3D 2; break; // vmaxub
@@ -2948,10 +2943,10 @@
case Pav_SUBUS: opc2 =3D 1600; break; // vsubuhs
case Pav_SUBSS: opc2 =3D 1856; break; // vsubshs
=20
- case Pav_OMULU: opc2 =3D 72; break; // vmulouh
- case Pav_OMULS: opc2 =3D 328; break; // vmulosh
- case Pav_EMULU: opc2 =3D 584; break; // vmuleuh
- case Pav_EMULS: opc2 =3D 840; break; // vmulesh
+ case Pav_OMULU: opc2 =3D 8; break; // vmuloub
+ case Pav_OMULS: opc2 =3D 264; break; // vmulosb
+ case Pav_EMULU: opc2 =3D 520; break; // vmuleub
+ case Pav_EMULS: opc2 =3D 776; break; // vmulesb
=20
case Pav_AVGU: opc2 =3D 1090; break; // vavguh
case Pav_AVGS: opc2 =3D 1346; break; // vavgsh
@@ -3000,6 +2995,11 @@
case Pav_SUBUS: opc2 =3D 1664; break; // vsubuws
case Pav_SUBSS: opc2 =3D 1920; break; // vsubsws
=20
+ case Pav_OMULU: opc2 =3D 72; break; // vmulouh
+ case Pav_OMULS: opc2 =3D 328; break; // vmulosh
+ case Pav_EMULU: opc2 =3D 584; break; // vmuleuh
+ case Pav_EMULS: opc2 =3D 840; break; // vmulesh
+
case Pav_AVGU: opc2 =3D 1154; break; // vavguw
case Pav_AVGS: opc2 =3D 1410; break; // vavgsw
=20
@@ -3069,17 +3069,17 @@
=20
// Finally, do the multiply:
p =3D mkFormVA( p, 4, v_dst, v_srcL, vB, v_srcR, 46 );
- break;
+ break;
}
case Pav_CMPEQF:
p =3D mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 0, 198 ); // vcmp=
eqfp
- break;
+ break;
case Pav_CMPGTF:
p =3D mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 1, 710 ); // vcmp=
gtfp
- break;
+ break;
case Pav_CMPGEF:
p =3D mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 1, 454 ); // vcmp=
gefp
- break;
+ break;
=20
default:
goto bad;
Modified: trunk/priv/host-ppc32/isel.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-ppc32/isel.c 2005-09-14 22:59:26 UTC (rev 1394)
+++ trunk/priv/host-ppc32/isel.c 2005-09-15 12:42:16 UTC (rev 1395)
@@ -3217,7 +3217,7 @@
//.. addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
//.. return dst;
//.. }
-//..=20
+
//.. case Iop_CmpEQ64Fx2: op =3D Xsse_CMPEQF; goto do_64Fx2;
//.. case Iop_CmpLT64Fx2: op =3D Xsse_CMPLTF; goto do_64Fx2;
//.. case Iop_CmpLE64Fx2: op =3D Xsse_CMPLEF; goto do_64Fx2;
@@ -3237,7 +3237,7 @@
//.. addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
//.. return dst;
//.. }
-//..=20
+
//.. case Iop_CmpEQ32F0x4: op =3D Xsse_CMPEQF; goto do_32F0x4;
//.. case Iop_CmpLT32F0x4: op =3D Xsse_CMPLTF; goto do_32F0x4;
//.. case Iop_CmpLE32F0x4: op =3D Xsse_CMPLEF; goto do_32F0x4;
@@ -3255,7 +3255,7 @@
//.. addInstr(env, X86Instr_Sse32FLo(op, argR, dst));
//.. return dst;
//.. }
-//..=20
+
//.. case Iop_CmpEQ64F0x2: op =3D Xsse_CMPEQF; goto do_64F0x2;
//.. case Iop_CmpLT64F0x2: op =3D Xsse_CMPLTF; goto do_64F0x2;
//.. case Iop_CmpLE64F0x2: op =3D Xsse_CMPLEF; goto do_64F0x2;
@@ -3274,14 +3274,14 @@
//.. addInstr(env, X86Instr_Sse64FLo(op, argR, dst));
//.. return dst;
//.. }
-//..=20
+
//.. case Iop_QNarrow32Sx4:=20
//.. op =3D Xsse_PACKSSD; arg1isEReg =3D True; goto do_SseReRg;
//.. case Iop_QNarrow16Sx8:=20
//.. op =3D Xsse_PACKSSW; arg1isEReg =3D True; goto do_SseReRg;
//.. case Iop_QNarrow16Ux8:=20
//.. op =3D Xsse_PACKUSW; arg1isEReg =3D True; goto do_SseReRg;
-//..=20
+
//.. case Iop_InterleaveHI8x16:=20
//.. op =3D Xsse_UNPCKHB; arg1isEReg =3D True; goto do_SseReRg;
//.. case Iop_InterleaveHI16x8:=20
@@ -3290,7 +3290,7 @@
//.. op =3D Xsse_UNPCKHD; arg1isEReg =3D True; goto do_SseReRg;
//.. case Iop_InterleaveHI64x2:=20
//.. op =3D Xsse_UNPCKHQ; arg1isEReg =3D True; goto do_SseReRg;
-//..=20
+
//.. case Iop_InterleaveLO8x16:=20
//.. op =3D Xsse_UNPCKLB; arg1isEReg =3D True; goto do_SseReRg;
//.. case Iop_InterleaveLO16x8:=20
@@ -3303,9 +3303,15 @@
case Iop_AndV128: op =3D Pav_AND; goto do_AvBin;
case Iop_OrV128: op =3D Pav_OR; goto do_AvBin;
case Iop_XorV128: op =3D Pav_XOR; goto do_AvBin;
+ do_AvBin: {
+ HReg arg1 =3D iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg arg2 =3D iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dst =3D newVRegV(env);
+ addInstr(env, PPC32Instr_AvBinary(op, dst, arg1, arg2));
+ return dst;
+ }
+
//.. case Iop_Add8x16: op =3D Xsse_ADD8; goto do_SseReRg;
-//.. case Iop_Add16x8: op =3D Xsse_ADD16; goto do_SseReRg;
-//.. case Iop_Add32x4: op =3D Xsse_ADD32; goto do_SseReRg;
//.. case Iop_Add64x2: op =3D Xsse_ADD64; goto do_SseReRg;
//.. case Iop_QAdd8Sx16: op =3D Xsse_QADD8S; goto do_SseReRg;
//.. case Iop_QAdd16Sx8: op =3D Xsse_QADD16S; goto do_SseReRg;
@@ -3319,31 +3325,74 @@
//.. case Iop_CmpGT8Sx16: op =3D Xsse_CMPGT8S; goto do_SseReRg;
//.. case Iop_CmpGT16Sx8: op =3D Xsse_CMPGT16S; goto do_SseReRg;
//.. case Iop_CmpGT32Sx4: op =3D Xsse_CMPGT32S; goto do_SseReRg;
-//.. case Iop_Max16Sx8: op =3D Xsse_MAX16S; goto do_SseReRg;
-//.. case Iop_Max8Ux16: op =3D Xsse_MAX8U; goto do_SseReRg;
-//.. case Iop_Min16Sx8: op =3D Xsse_MIN16S; goto do_SseReRg;
-//.. case Iop_Min8Ux16: op =3D Xsse_MIN8U; goto do_SseReRg;
-//.. case Iop_MulHi16Ux8: op =3D Xsse_MULHI16U; goto do_SseReRg;
-//.. case Iop_MulHi16Sx8: op =3D Xsse_MULHI16S; goto do_SseReRg;
//.. case Iop_Mul16x8: op =3D Xsse_MUL16; goto do_SseReRg;
-//.. case Iop_Sub8x16: op =3D Xsse_SUB8; goto do_SseReRg;
-//.. case Iop_Sub16x8: op =3D Xsse_SUB16; goto do_SseReRg;
-//.. case Iop_Sub32x4: op =3D Xsse_SUB32; goto do_SseReRg;
//.. case Iop_Sub64x2: op =3D Xsse_SUB64; goto do_SseReRg;
//.. case Iop_QSub8Sx16: op =3D Xsse_QSUB8S; goto do_SseReRg;
//.. case Iop_QSub16Sx8: op =3D Xsse_QSUB16S; goto do_SseReRg;
//.. case Iop_QSub8Ux16: op =3D Xsse_QSUB8U; goto do_SseReRg;
//.. case Iop_QSub16Ux8: op =3D Xsse_QSUB16U; goto do_SseReRg;
- do_AvBin: {
+
+ case Iop_Add8x16: op =3D Pav_ADDUM; goto do_AvBin8x16;
+ case Iop_QAdd8Ux16: op =3D Pav_ADDUS; goto do_AvBin8x16;
+ case Iop_QAdd8Sx16: op =3D Pav_ADDSS; goto do_AvBin8x16;
+ case Iop_Sub8x16: op =3D Pav_SUBUM; goto do_AvBin8x16;
+ case Iop_QSub8Ux16: op =3D Pav_SUBUS; goto do_AvBin8x16;
+ case Iop_QSub8Sx16: op =3D Pav_SUBSS; goto do_AvBin8x16;
+ case Iop_Avg8Ux16: op =3D Pav_AVGU; goto do_AvBin8x16;
+ case Iop_Avg8Sx16: op =3D Pav_AVGS; goto do_AvBin8x16;
+ case Iop_Max8Ux16: op =3D Pav_MAXU; goto do_AvBin8x16;
+ case Iop_Max8Sx16: op =3D Pav_MAXS; goto do_AvBin8x16;
+ case Iop_Min8Ux16: op =3D Pav_MINU; goto do_AvBin8x16;
+ case Iop_Min8Sx16: op =3D Pav_MINS; goto do_AvBin8x16;
+ do_AvBin8x16: {
HReg arg1 =3D iselVecExpr(env, e->Iex.Binop.arg1);
HReg arg2 =3D iselVecExpr(env, e->Iex.Binop.arg2);
HReg dst =3D newVRegV(env);
- addInstr(env, PPC32Instr_AvBinary(op, dst, arg1, arg2));
+ addInstr(env, PPC32Instr_AvBin8x16(op, dst, arg1, arg2));
return dst;
}
=20
- case Iop_Add32x4: op =3D Pav_ADDUM; goto do_AvBin32x4;
+ case Iop_Add16x8: op =3D Pav_ADDUM; goto do_AvBin16x8;
+ case Iop_QAdd16Ux8: op =3D Pav_ADDUS; goto do_AvBin16x8;
+ case Iop_QAdd16Sx8: op =3D Pav_ADDSS; goto do_AvBin16x8;
+ case Iop_Sub16x8: op =3D Pav_SUBUM; goto do_AvBin16x8;
+ case Iop_QSub16Ux8: op =3D Pav_SUBUS; goto do_AvBin16x8;
+ case Iop_QSub16Sx8: op =3D Pav_SUBSS; goto do_AvBin16x8;
+ case Iop_Avg16Ux8: op =3D Pav_AVGU; goto do_AvBin16x8;
+ case Iop_Avg16Sx8: op =3D Pav_AVGS; goto do_AvBin16x8;
+ case Iop_Max16Ux8: op =3D Pav_MAXU; goto do_AvBin16x8;
+ case Iop_Max16Sx8: op =3D Pav_MAXS; goto do_AvBin16x8;
+ case Iop_Min16Ux8: op =3D Pav_MINU; goto do_AvBin16x8;
+ case Iop_Min16Sx8: op =3D Pav_MINS; goto do_AvBin16x8;
+ case Iop_MulLo16Ux8: op =3D Pav_OMULU; goto do_AvBin16x8;
+ case Iop_MulLo16Sx8: op =3D Pav_OMULS; goto do_AvBin16x8;
+ case Iop_MulHi16Ux8: op =3D Pav_EMULU; goto do_AvBin16x8;
+ case Iop_MulHi16Sx8: op =3D Pav_EMULS; goto do_AvBin16x8;
+ do_AvBin16x8: {
+ HReg arg1 =3D iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg arg2 =3D iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dst =3D newVRegV(env);
+ addInstr(env, PPC32Instr_AvBin16x8(op, dst, arg1, arg2));
+ return dst;
+ }
+
+ case Iop_Add32x4: op =3D Pav_ADDUM; goto do_AvBin32x4;
+ case Iop_QAdd32Ux4: op =3D Pav_ADDUS; goto do_AvBin32x4;
+ case Iop_QAdd32Sx4: op =3D Pav_ADDSS; goto do_AvBin32x4;
+ case Iop_Sub32x4: op =3D Pav_SUBUM; goto do_AvBin32x4;
+ case Iop_QSub32Ux4: op =3D Pav_SUBUS; goto do_AvBin32x4;
+ case Iop_QSub32Sx4: op =3D Pav_SUBSS; goto do_AvBin32x4;
+ case Iop_Avg32Ux4: op =3D Pav_AVGU; goto do_AvBin32x4;
+ case Iop_Avg32Sx4: op =3D Pav_AVGS; goto do_AvBin32x4;
+ case Iop_Max32Ux4: op =3D Pav_MAXU; goto do_AvBin32x4;
+ case Iop_Max32Sx4: op =3D Pav_MAXS; goto do_AvBin32x4;
+ case Iop_Min32Ux4: op =3D Pav_MINU; goto do_AvBin32x4;
+ case Iop_Min32Sx4: op =3D Pav_MINS; goto do_AvBin32x4;
case Iop_CmpGT32Ux4: op =3D Pav_CMPGTU; goto do_AvBin32x4;
+ case Iop_MulLo32Ux4: op =3D Pav_OMULU; goto do_AvBin32x4;
+ case Iop_MulLo32Sx4: op =3D Pav_OMULS; goto do_AvBin32x4;
+ case Iop_MulHi32Ux4: op =3D Pav_EMULU; goto do_AvBin32x4;
+ case Iop_MulHi32Sx4: op =3D Pav_EMULS; goto do_AvBin32x4;
do_AvBin32x4: {
HReg arg1 =3D iselVecExpr(env, e->Iex.Binop.arg1);
HReg arg2 =3D iselVecExpr(env, e->Iex.Binop.arg2);
@@ -3394,24 +3443,6 @@
return dst;
}
=20
-//.. do_SseShift: {
-//.. HReg greg =3D iselVecExpr(env, e->Iex.Binop.arg1);
-//.. X86RMI* rmi =3D iselIntExpr_RMI(env, e->Iex.Binop.arg2)=
;
-//.. X86AMode* esp0 =3D X86AMode_IR(0, hregX86_ESP());
-//.. HReg ereg =3D newVRegV(env);
-//.. HReg dst =3D newVRegV(env);
-//.. REQUIRE_SSE2;
-//.. addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
-//.. addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
-//.. addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
-//.. addInstr(env, X86Instr_Push(rmi));
-//.. addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0));
-//.. addInstr(env, mk_vMOVsd_RR(greg, dst));
-//.. addInstr(env, X86Instr_SseReRg(op, ereg, dst));
-//.. add_to_esp(env, 16);
-//.. return dst;
-//.. }
-
default:
break;
} /* switch (e->Iex.Binop.op) */
|