|
From: <sv...@va...> - 2005-11-04 19:44:51
|
Author: cerion
Date: 2005-11-04 19:44:48 +0000 (Fri, 04 Nov 2005)
New Revision: 1434
Log:
New irop Iop_MullEven*
- a widening un/signed multiply of even lanes
Recast misused irops Iop_MulLo/Hi* as Iop_MullEven*
Modified:
trunk/priv/guest-ppc32/toIR.c
trunk/priv/host-ppc32/hdefs.c
trunk/priv/host-ppc32/isel.c
trunk/priv/ir/irdefs.c
trunk/pub/libvex_ir.h
Modified: trunk/priv/guest-ppc32/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-ppc32/toIR.c 2005-11-04 14:34:52 UTC (rev 1433)
+++ trunk/priv/guest-ppc32/toIR.c 2005-11-04 19:44:48 UTC (rev 1434)
@@ -5388,42 +5388,50 @@
/* Multiply */
case 0x008: // vmuloub (Multiply Odd Unsigned Byte, AV p213)
DIP("vmuloub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- putVReg( vD_addr, binop(Iop_MulLo16Ux8, mkexpr(vA), mkexpr(vB)) );
+ putVReg( vD_addr, binop(Iop_MullEven8Ux16,
+ binop(Iop_ShlV128, mkexpr(vA), mkU8(8)),
+ binop(Iop_ShlV128, mkexpr(vB), mkU8(8)) ))=
;
break;
=20
case 0x048: // vmulouh (Multiply Odd Unsigned Half Word, AV p214)
DIP("vmulouh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- putVReg( vD_addr, binop(Iop_MulLo32Ux4, mkexpr(vA), mkexpr(vB)) );
+ putVReg( vD_addr, binop(Iop_MullEven16Ux8,
+ binop(Iop_ShlV128, mkexpr(vA), mkU8(16)),
+ binop(Iop_ShlV128, mkexpr(vB), mkU8(16)) )=
);
break;
=20
case 0x108: // vmulosb (Multiply Odd Signed Byte, AV p211)
DIP("vmulosb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- putVReg( vD_addr, binop(Iop_MulLo16Sx8, mkexpr(vA), mkexpr(vB)) );
+ putVReg( vD_addr, binop(Iop_MullEven8Sx16,
+ binop(Iop_ShlV128, mkexpr(vA), mkU8(8)),
+ binop(Iop_ShlV128, mkexpr(vB), mkU8(8)) ))=
;
break;
=20
case 0x148: // vmulosh (Multiply Odd Signed Half Word, AV p212)
DIP("vmulosh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- putVReg( vD_addr, binop(Iop_MulLo32Sx4, mkexpr(vA), mkexpr(vB)) );
+ putVReg( vD_addr, binop(Iop_MullEven16Sx8,
+ binop(Iop_ShlV128, mkexpr(vA), mkU8(16)),
+ binop(Iop_ShlV128, mkexpr(vB), mkU8(16)) )=
);
break;
=20
case 0x208: // vmuleub (Multiply Even Unsigned Byte, AV p209)
DIP("vmuleub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- putVReg( vD_addr, binop(Iop_MulHi16Ux8, mkexpr(vA), mkexpr(vB)) );
+ putVReg( vD_addr, binop(Iop_MullEven8Ux16, mkexpr(vA), mkexpr(vB))=
);
break;
=20
case 0x248: // vmuleuh (Multiply Even Unsigned Half Word, AV p210)
DIP("vmuleuh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- putVReg( vD_addr, binop(Iop_MulHi32Ux4, mkexpr(vA), mkexpr(vB)) );
+ putVReg( vD_addr, binop(Iop_MullEven16Ux8, mkexpr(vA), mkexpr(vB))=
);
break;
=20
case 0x308: // vmulesb (Multiply Even Signed Byte, AV p207)
DIP("vmulesb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- putVReg( vD_addr, binop(Iop_MulHi16Sx8, mkexpr(vA), mkexpr(vB)) );
+ putVReg( vD_addr, binop(Iop_MullEven8Sx16, mkexpr(vA), mkexpr(vB))=
);
break;
=20
case 0x348: // vmulesh (Multiply Even Signed Half Word, AV p208)
DIP("vmulesh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- putVReg( vD_addr, binop(Iop_MulHi32Sx4, mkexpr(vA), mkexpr(vB)) );
+ putVReg( vD_addr, binop(Iop_MullEven16Sx8, mkexpr(vA), mkexpr(vB))=
);
break;
=20
=20
@@ -5648,13 +5656,17 @@
=20
assign( zLo, binop(Iop_Add32x4,
binop(Iop_SarN32x4,
- binop(Iop_MulLo32Sx4, mkexpr(aLo), mkexpr=
(bLo)),
+ binop(Iop_MullEven16Sx8,
+ binop(Iop_ShlV128, mkexpr(aLo), mkU=
8(16)),
+ binop(Iop_ShlV128, mkexpr(bLo), mkU=
8(16)) ),
mkU8(15)),
mkexpr(cLo)) );
=20
assign( zHi, binop(Iop_Add32x4,
binop(Iop_SarN32x4,
- binop(Iop_MulLo32Sx4, mkexpr(aHi), mkexpr=
(bHi)),
+ binop(Iop_MullEven16Sx8,
+ binop(Iop_ShlV128, mkexpr(aHi), mkU=
8(16)),
+ binop(Iop_ShlV128, mkexpr(bHi), mkU=
8(16)) ),
mkU8(15)),
mkexpr(cHi)) );
=20
@@ -5688,14 +5700,18 @@
assign( zLo, binop(Iop_Add32x4,
binop(Iop_SarN32x4,
binop(Iop_Add32x4, mkexpr(zKonst),
- binop(Iop_MulLo32Sx4, mkexpr(aLo), =
mkexpr(bLo))),
+ binop(Iop_MullEven16Sx8,
+ binop(Iop_ShlV128, mkexpr(aLo=
), mkU8(16)),
+ binop(Iop_ShlV128, mkexpr(bLo=
), mkU8(16)) )),
mkU8(15)),
mkexpr(cLo)) );
=20
assign( zHi, binop(Iop_Add32x4,
binop(Iop_SarN32x4,
binop(Iop_Add32x4, mkexpr(zKonst),
- binop(Iop_MulLo32Sx4, mkexpr(aHi), =
mkexpr(bHi))),
+ binop(Iop_MullEven16Sx8,
+ binop(Iop_ShlV128, mkexpr(aHi=
), mkU8(16)),
+ binop(Iop_ShlV128, mkexpr(bHi=
), mkU8(16)) )),
mkU8(15)),
mkexpr(cHi)) );
=20
@@ -5719,10 +5735,14 @@
assign( bHi, binop(Iop_InterleaveHI16x8, mkexpr(zeros), mkexpr(vB)=
) );
assign( cHi, binop(Iop_InterleaveHI16x8, mkexpr(zeros), mkexpr(vC)=
) );
assign( zLo, binop(Iop_Add32x4,
- binop(Iop_MulLo32Ux4, mkexpr(aLo), mkexpr(bLo))=
,
+ binop(Iop_MullEven16Ux8,
+ binop(Iop_ShlV128, mkexpr(aLo), mkU8(16))=
,
+ binop(Iop_ShlV128, mkexpr(bLo), mkU8(16))=
),
mkexpr(cLo)) );
assign( zHi, binop(Iop_Add32x4,
- binop(Iop_MulLo32Ux4, mkexpr(aHi), mkexpr(bHi))=
,
+ binop(Iop_MullEven16Ux8,
+ binop(Iop_ShlV128, mkexpr(aHi), mkU8(16))=
,
+ binop(Iop_ShlV128, mkexpr(bHi), mkU8(16))=
),
mkexpr(cHi)) );
putVReg( vD_addr, binop(Iop_Narrow32Ux4, mkexpr(zHi), mkexpr(zLo))=
);
break;
@@ -5739,15 +5759,21 @@
IRTemp even_odd =3D newTemp(Ity_V128);
IRTemp even_even =3D newTemp(Ity_V128);
DIP("vmsumubm v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_ad=
dr);
- assign( odd, binop(Iop_MulLo16Ux8, mkexpr(vA), mkexpr(vB)) );
- assign( even, binop(Iop_MulHi16Ux8, mkexpr(vA), mkexpr(vB)) );
+ assign( odd, binop(Iop_MullEven8Ux16,
+ binop(Iop_ShlV128, mkexpr(vA), mkU8(8)),
+ binop(Iop_ShlV128, mkexpr(vB), mkU8(8)) ));
+ assign( even, binop(Iop_MullEven8Ux16, mkexpr(vA), mkexpr(vB)) );
/* zKonst just used to separate the lanes out */
assign( zKonst, unop(Iop_Dup16x8, mkU16(0x1)) );
=20
- assign( odd_odd, binop(Iop_MulLo32Ux4, mkexpr(odd), mkexpr(zKon=
st)) );
- assign( odd_even, binop(Iop_MulHi32Ux4, mkexpr(odd), mkexpr(zKon=
st)) );
- assign( even_odd, binop(Iop_MulLo32Ux4, mkexpr(even), mkexpr(zKon=
st)) );
- assign( even_even, binop(Iop_MulHi32Ux4, mkexpr(even), mkexpr(zKon=
st)) );
+ assign( odd_odd, binop(Iop_MullEven16Ux8,
+ binop(Iop_ShlV128, mkexpr(odd), mkU8(16))=
,
+ binop(Iop_ShlV128, mkexpr(zKonst), mkU8(1=
6)) ));
+ assign( odd_even, binop(Iop_MullEven16Ux8, mkexpr(odd), mkexpr(z=
Konst)) );
+ assign( even_odd, binop(Iop_MullEven16Ux8,
+ binop(Iop_ShlV128, mkexpr(even), mkU8(16)=
),
+ binop(Iop_ShlV128, mkexpr(zKonst), mkU8(1=
6)) ));
+ assign( even_even, binop(Iop_MullEven16Ux8, mkexpr(even), mkexpr(z=
Konst)) );
=20
putVReg( vD_addr,
binop(Iop_Add32x4, mkexpr(vC),
@@ -5765,8 +5791,10 @@
IRTemp odd =3D newTemp(Ity_V128);
IRTemp even =3D newTemp(Ity_V128);
DIP("vmsumuhm v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_ad=
dr);
- assign( odd, binop(Iop_MulLo32Ux4, mkexpr(vA), mkexpr(vB)) );
- assign( even, binop(Iop_MulHi32Ux4, mkexpr(vA), mkexpr(vB)) );
+ assign( odd, binop(Iop_MullEven16Ux8,
+ binop(Iop_ShlV128, mkexpr(vA), mkU8(16)),
+ binop(Iop_ShlV128, mkexpr(vB), mkU8(16)) ));
+ assign( even, binop(Iop_MullEven16Ux8, mkexpr(vA), mkexpr(vB)) );
putVReg( vD_addr,
binop(Iop_Add32x4, mkexpr(vC),
binop(Iop_Add32x4, mkexpr(odd), mkexpr(even))) );
@@ -5781,8 +5809,10 @@
IRTemp odd =3D newTemp(Ity_V128);
IRTemp even =3D newTemp(Ity_V128);
DIP("vmsumshm v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_ad=
dr);
- assign( odd, binop(Iop_MulLo32Sx4, mkexpr(vA), mkexpr(vB)) );
- assign( even, binop(Iop_MulHi32Sx4, mkexpr(vA), mkexpr(vB)) );
+ assign( odd, binop(Iop_MullEven16Sx8,
+ binop(Iop_ShlV128, mkexpr(vA), mkU8(16)),
+ binop(Iop_ShlV128, mkexpr(vB), mkU8(16)) ));
+ assign( even, binop(Iop_MullEven16Sx8, mkexpr(vA), mkexpr(vB)) );
putVReg( vD_addr,
binop(Iop_Add32x4, mkexpr(vC),
binop(Iop_Add32x4, mkexpr(odd), mkexpr(even))) );
Modified: trunk/priv/host-ppc32/hdefs.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-ppc32/hdefs.c 2005-11-04 14:34:52 UTC (rev 1433)
+++ trunk/priv/host-ppc32/hdefs.c 2005-11-04 19:44:48 UTC (rev 1434)
@@ -2910,6 +2910,11 @@
case Pav_QSUBU: opc2 =3D 1536; break; // vsububs
case Pav_QSUBS: opc2 =3D 1792; break; // vsubsbs
=20
+ case Pav_OMULU: opc2 =3D 8; break; // vmuloub
+ case Pav_OMULS: opc2 =3D 264; break; // vmulosb
+ case Pav_EMULU: opc2 =3D 520; break; // vmuleub
+ case Pav_EMULS: opc2 =3D 776; break; // vmulesb
+
case Pav_AVGU: opc2 =3D 1026; break; // vavgub
case Pav_AVGS: opc2 =3D 1282; break; // vavgsb
case Pav_MAXU: opc2 =3D 2; break; // vmaxub
@@ -2951,10 +2956,10 @@
case Pav_QSUBU: opc2 =3D 1600; break; // vsubuhs
case Pav_QSUBS: opc2 =3D 1856; break; // vsubshs
=20
- case Pav_OMULU: opc2 =3D 8; break; // vmuloub
- case Pav_OMULS: opc2 =3D 264; break; // vmulosb
- case Pav_EMULU: opc2 =3D 520; break; // vmuleub
- case Pav_EMULS: opc2 =3D 776; break; // vmulesb
+ case Pav_OMULU: opc2 =3D 72; break; // vmulouh
+ case Pav_OMULS: opc2 =3D 328; break; // vmulosh
+ case Pav_EMULU: opc2 =3D 584; break; // vmuleuh
+ case Pav_EMULS: opc2 =3D 840; break; // vmulesh
=20
case Pav_AVGU: opc2 =3D 1090; break; // vavguh
case Pav_AVGS: opc2 =3D 1346; break; // vavgsh
@@ -3003,11 +3008,6 @@
case Pav_QSUBU: opc2 =3D 1664; break; // vsubuws
case Pav_QSUBS: opc2 =3D 1920; break; // vsubsws
=20
- case Pav_OMULU: opc2 =3D 72; break; // vmulouh
- case Pav_OMULS: opc2 =3D 328; break; // vmulosh
- case Pav_EMULU: opc2 =3D 584; break; // vmuleuh
- case Pav_EMULS: opc2 =3D 840; break; // vmulesh
-
case Pav_AVGU: opc2 =3D 1154; break; // vavguw
case Pav_AVGS: opc2 =3D 1410; break; // vavgsw
=20
Modified: trunk/priv/host-ppc32/isel.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-ppc32/isel.c 2005-11-04 14:34:52 UTC (rev 1433)
+++ trunk/priv/host-ppc32/isel.c 2005-11-04 19:44:48 UTC (rev 1434)
@@ -3315,6 +3315,8 @@
case Iop_Max8Sx16: op =3D Pav_MAXS; goto do_AvBin8x16;
case Iop_Min8Ux16: op =3D Pav_MINU; goto do_AvBin8x16;
case Iop_Min8Sx16: op =3D Pav_MINS; goto do_AvBin8x16;
+ case Iop_MullEven8Ux16: op =3D Pav_EMULU; goto do_AvBin8x16;
+ case Iop_MullEven8Sx16: op =3D Pav_EMULS; goto do_AvBin8x16;
case Iop_CmpEQ8x16: op =3D Pav_CMPEQU; goto do_AvBin8x16;
case Iop_CmpGT8Ux16: op =3D Pav_CMPGTU; goto do_AvBin8x16;
case Iop_CmpGT8Sx16: op =3D Pav_CMPGTS; goto do_AvBin8x16;
@@ -3347,10 +3349,8 @@
case Iop_Max16Sx8: op =3D Pav_MAXS; goto do_AvBin16x8;
case Iop_Min16Ux8: op =3D Pav_MINU; goto do_AvBin16x8;
case Iop_Min16Sx8: op =3D Pav_MINS; goto do_AvBin16x8;
- case Iop_MulLo16Ux8: op =3D Pav_OMULU; goto do_AvBin16x8;
- case Iop_MulLo16Sx8: op =3D Pav_OMULS; goto do_AvBin16x8;
- case Iop_MulHi16Ux8: op =3D Pav_EMULU; goto do_AvBin16x8;
- case Iop_MulHi16Sx8: op =3D Pav_EMULS; goto do_AvBin16x8;
+ case Iop_MullEven16Ux8: op =3D Pav_EMULU; goto do_AvBin16x8;
+ case Iop_MullEven16Sx8: op =3D Pav_EMULS; goto do_AvBin16x8;
case Iop_CmpEQ16x8: op =3D Pav_CMPEQU; goto do_AvBin16x8;
case Iop_CmpGT16Ux8: op =3D Pav_CMPGTU; goto do_AvBin16x8;
case Iop_CmpGT16Sx8: op =3D Pav_CMPGTS; goto do_AvBin16x8;
@@ -3383,10 +3383,6 @@
case Iop_Max32Sx4: op =3D Pav_MAXS; goto do_AvBin32x4;
case Iop_Min32Ux4: op =3D Pav_MINU; goto do_AvBin32x4;
case Iop_Min32Sx4: op =3D Pav_MINS; goto do_AvBin32x4;
- case Iop_MulLo32Ux4: op =3D Pav_OMULU; goto do_AvBin32x4;
- case Iop_MulLo32Sx4: op =3D Pav_OMULS; goto do_AvBin32x4;
- case Iop_MulHi32Ux4: op =3D Pav_EMULU; goto do_AvBin32x4;
- case Iop_MulHi32Sx4: op =3D Pav_EMULS; goto do_AvBin32x4;
case Iop_CmpEQ32x4: op =3D Pav_CMPEQU; goto do_AvBin32x4;
case Iop_CmpGT32Ux4: op =3D Pav_CMPGTU; goto do_AvBin32x4;
case Iop_CmpGT32Sx4: op =3D Pav_CMPGTS; goto do_AvBin32x4;
Modified: trunk/priv/ir/irdefs.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/ir/irdefs.c 2005-11-04 14:34:52 UTC (rev 1433)
+++ trunk/priv/ir/irdefs.c 2005-11-04 19:44:48 UTC (rev 1434)
@@ -444,6 +444,11 @@
case Iop_MulHi16Sx8: vex_printf("MulHi16Sx8"); return;
case Iop_MulHi32Sx4: vex_printf("MulHi32Sx4"); return;
=20
+ case Iop_MullEven8Ux16: vex_printf("MullEven8Ux16"); return;
+ case Iop_MullEven16Ux8: vex_printf("MullEven16Ux8"); return;
+ case Iop_MullEven8Sx16: vex_printf("MullEven8Sx16"); return;
+ case Iop_MullEven16Sx8: vex_printf("MullEven16Sx8"); return;
+
case Iop_Avg8Ux16: vex_printf("Avg8Ux16"); return;
case Iop_Avg16Ux8: vex_printf("Avg16Ux8"); return;
case Iop_Avg32Ux4: vex_printf("Avg32Ux4"); return;
@@ -1556,6 +1561,8 @@
case Iop_MulLo16Sx8: case Iop_MulLo32Sx4:
case Iop_MulHi16Ux8: case Iop_MulHi32Ux4:=20
case Iop_MulHi16Sx8: case Iop_MulHi32Sx4:=20
+ case Iop_MullEven8Ux16: case Iop_MullEven16Ux8:
+ case Iop_MullEven8Sx16: case Iop_MullEven16Sx8:
case Iop_Avg8Ux16: case Iop_Avg16Ux8: case Iop_Avg32Ux4:
case Iop_Avg8Sx16: case Iop_Avg16Sx8: case Iop_Avg32Sx4:
case Iop_Max8Sx16: case Iop_Max16Sx8: case Iop_Max32Sx4:
Modified: trunk/pub/libvex_ir.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/pub/libvex_ir.h 2005-11-04 14:34:52 UTC (rev 1433)
+++ trunk/pub/libvex_ir.h 2005-11-04 19:44:48 UTC (rev 1434)
@@ -554,6 +554,10 @@
Iop_MulHi16Ux8, Iop_MulHi32Ux4,
Iop_MulHi16Sx8, Iop_MulHi32Sx4,
=20
+ /* (widening signed/unsigned of even lanes) */
+ Iop_MullEven8Ux16, Iop_MullEven16Ux8,
+ Iop_MullEven8Sx16, Iop_MullEven16Sx8,
+
/* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
Iop_Avg8Ux16, Iop_Avg16Ux8, Iop_Avg32Ux4,
Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4,
|