|
From: <sv...@va...> - 2005-09-15 21:58:55
|
Author: cerion
Date: 2005-09-15 22:58:50 +0100 (Thu, 15 Sep 2005)
New Revision: 1399
Log:
Added AltiVec permutation insns:
- vperm, vsldoi, vmrg*, vsplt*
Modified:
trunk/priv/guest-ppc32/toIR.c
trunk/priv/host-ppc32/hdefs.c
trunk/priv/host-ppc32/hdefs.h
trunk/priv/host-ppc32/isel.c
Modified: trunk/priv/guest-ppc32/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-ppc32/toIR.c 2005-09-15 16:28:36 UTC (rev 1398)
+++ trunk/priv/guest-ppc32/toIR.c 2005-09-15 21:58:50 UTC (rev 1399)
@@ -335,12 +335,10 @@
return toUChar((((Int)x) << 27) >> 27);
}
=20
-//zz #if 0
-//zz static UInt extend_s_8to32( UInt x )
-//zz {
-//zz return (UInt)((((Int)x) << 24) >> 24);
-//zz }
-//zz #endif
+static UInt extend_s_8to32( UChar x )
+{
+ return (UInt)((((Int)x) << 24) >> 24);
+}
=20
static UInt extend_s_16to32 ( UInt x )
{
@@ -406,6 +404,11 @@
return IRExpr_Const(IRConst_U8(i));
}
=20
+static IRExpr* mkU16 ( UInt i )
+{
+ return IRExpr_Const(IRConst_U16(i));
+}
+
static IRExpr* mkU32 ( UInt i )
{
return IRExpr_Const(IRConst_U32(i));
@@ -5774,19 +5777,40 @@
binop(Iop_AndV128, mkexpr(vB), mkexpr(vC))) );
return True;
=20
- case 0x2B: // vperm (Permute, AV p218)
- DIP("vperm v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_addr)=
;
- DIP(" =3D> not implemented\n");
- return False;
-
+ case 0x2B: { // vperm (Permute, AV p218)
+ DIP("vperma v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_addr=
);
+ /* limited to two args for IR, so have to play games... */
+ IRTemp a_perm =3D newTemp(Ity_V128);
+ IRTemp b_perm =3D newTemp(Ity_V128);
+ IRTemp mask =3D newTemp(Ity_V128);
+ assign( a_perm, binop(Iop_Perm, mkexpr(vA), mkexpr(vC)) );
+ assign( b_perm, binop(Iop_Perm, mkexpr(vB), mkexpr(vC)) );
+ // mask[i8] =3D (vC[i8]_4 =3D=3D 1) ? 0xFF : 0x0
+ assign( mask, binop(Iop_SarN8x16,
+ binop(Iop_ShlN8x16, mkexpr(vC), mkU8(3)),
+ mkU8(7)) );
+ // dst =3D (a & ~mask) | (b & mask)
+ putVReg( vD_addr, binop(Iop_OrV128,
+ binop(Iop_AndV128, mkexpr(a_perm),
+ unop(Iop_NotV128, mkexpr(mask))),
+ binop(Iop_AndV128, mkexpr(b_perm),
+ mkexpr(mask))) );
+ return True;
+ }
case 0x2C: // vsldoi (Shift Left Double by Octet Imm, AV p241)
if (b10 !=3D 0) {
vex_printf("dis_av_permute(PPC32)(vsldoi)\n");
return False;
}
DIP("vsldoi v%d,v%d,v%d,%d\n", vD_addr, vA_addr, vB_addr, SHB_uimm=
4);
- DIP(" =3D> not implemented\n");
- return False;
+ if (SHB_uimm4 =3D=3D 0)
+ putVReg( vD_addr, mkexpr(vA) );
+ else
+ putVReg( vD_addr,
+ binop(Iop_OrV128,
+ binop(Iop_ShlV128, mkexpr(vA), mkU8(SHB_uimm4*8)=
),
+ binop(Iop_ShrV128, mkexpr(vB), mkU8((16-SHB_uimm=
4)*8))) );
+ return True;
=20
default:
break; // Fall through...
@@ -5798,49 +5822,63 @@
/* Merge */
case 0x00C: // vmrghb (Merge High B, AV p195)
DIP("vmrghb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr,
+ binop(Iop_InterleaveHI8x16, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x04C: // vmrghh (Merge High HW, AV p196)
DIP("vmrghh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr,
+ binop(Iop_InterleaveHI16x8, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x08C: // vmrghw (Merge High W, AV p197)
DIP("vmrghw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr,
+ binop(Iop_InterleaveHI32x4, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x10C: // vmrglb (Merge Low B, AV p198)
DIP("vmrglb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr,
+ binop(Iop_InterleaveLO8x16, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x14C: // vmrglh (Merge Low HW, AV p199)
DIP("vmrglh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr,
+ binop(Iop_InterleaveLO16x8, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
case 0x18C: // vmrglw (Merge Low W, AV p200)
DIP("vmrglw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr,
+ binop(Iop_InterleaveLO32x4, mkexpr(vA), mkexpr(vB)) );
+ break;
=20
+
/* Splat */
- case 0x20C: // vspltb (Splat Byte, AV p245)
+ case 0x20C: { // vspltb (Splat Byte, AV p245)
DIP("vspltb v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5);
- DIP(" =3D> not implemented\n");
- return False;
-
- case 0x24C: // vsplth (Splat Half Word, AV p246)
+ /* vD =3D Dup8x16( vB[UIMM_5] ) */
+ UChar sh_uimm =3D (15-UIMM_5)*8;
+ putVReg( vD_addr, unop(Iop_Dup8x16,
+ unop(Iop_32to8, unop(Iop_V128to32,=20
+ binop(Iop_ShrV128, mkexpr(vB), mkU8(sh_uimm))))) );
+ break;
+ }
+ case 0x24C: { // vsplth (Splat Half Word, AV p246)
DIP("vsplth v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5);
- DIP(" =3D> not implemented\n");
- return False;
-
+ UChar sh_uimm =3D (7-UIMM_5)*16;
+ putVReg( vD_addr, unop(Iop_Dup16x8,
+ unop(Iop_32to16, unop(Iop_V128to32,=20
+ binop(Iop_ShrV128, mkexpr(vB), mkU8(sh_uimm))))) );
+ break;
+ }
case 0x28C: { // vspltw (Splat Word, AV p250)
DIP("vspltw v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5);
/* vD =3D Dup32x4( vB[UIMM_5] ) */
- unsigned int sh_uimm =3D (3-UIMM_5)*32;
+ UChar sh_uimm =3D (3-UIMM_5)*32;
putVReg( vD_addr, unop(Iop_Dup32x4,
unop(Iop_V128to32,
binop(Iop_ShrV128, mkexpr(vB), mkU8(sh_uimm)))) );
@@ -5848,18 +5886,18 @@
}
case 0x30C: // vspltisb (Splat Immediate Signed B, AV p247)
DIP("vspltisb v%d,%d\n", vD_addr, (Char)SIMM_8);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, unop(Iop_Dup8x16, mkU8(SIMM_8)) );
+ break;
=20
case 0x34C: // vspltish (Splat Immediate Signed HW, AV p248)
DIP("vspltish v%d,%d\n", vD_addr, (Char)SIMM_8);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, unop(Iop_Dup16x8, mkU16(extend_s_8to32(SIMM_8)))=
);
+ break;
=20
case 0x38C: // vspltisw (Splat Immediate Signed W, AV p249)
DIP("vspltisw v%d,%d\n", vD_addr, (Char)SIMM_8);
- DIP(" =3D> not implemented\n");
- return False;
+ putVReg( vD_addr, unop(Iop_Dup32x4, mkU32(extend_s_8to32(SIMM_8)))=
);
+ break;
=20
default:
vex_printf("dis_av_permute(PPC32)(opc2)\n");
@@ -6614,14 +6652,10 @@
/* AV Permutations */
case 0x2A: // vsel
case 0x2B: // vperm
+ case 0x2C: // vsldoi
if (dis_av_permute( theInstr )) goto decode_success;
goto decode_failure;
=20
- /* AV Shift */
- case 0x2C: // vsldoi
- if (dis_av_shift( theInstr )) goto decode_success;
- goto decode_failure;
-
/* AV Floating Point Mult-Add/Sub */
case 0x2E: case 0x2F: // vmaddfp, vnmsubfp
if (dis_av_fp_arith( theInstr )) goto decode_success;
Modified: trunk/priv/host-ppc32/hdefs.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-ppc32/hdefs.c 2005-09-15 16:28:36 UTC (rev 1398)
+++ trunk/priv/host-ppc32/hdefs.c 2005-09-15 21:58:50 UTC (rev 1399)
@@ -973,13 +973,13 @@
i->Pin.AvBin32Fx4.srcR =3D srcR;
return i;
}
-PPC32Instr* PPC32Instr_AvPerm ( HReg ctl, HReg dst, HReg srcL, HReg srcR=
) {
+PPC32Instr* PPC32Instr_AvPerm ( HReg dst, HReg srcL, HReg srcR, HReg ctl=
) {
PPC32Instr* i =3D LibVEX_Alloc(sizeof(PPC32Instr));
i->tag =3D Pin_AvPerm;
- i->Pin.AvPerm.ctl =3D ctl;
i->Pin.AvPerm.dst =3D dst;
i->Pin.AvPerm.srcL =3D srcL;
i->Pin.AvPerm.srcR =3D srcR;
+ i->Pin.AvPerm.ctl =3D ctl;
return i;
}
PPC32Instr* PPC32Instr_AvSel ( HReg ctl, HReg dst, HReg srcL, HReg srcR =
) {
@@ -1636,9 +1636,9 @@
return;
case Pin_AvPerm:
addHRegUse(u, HRmWrite, i->Pin.AvPerm.dst);
- addHRegUse(u, HRmRead, i->Pin.AvPerm.ctl);
addHRegUse(u, HRmRead, i->Pin.AvPerm.srcL);
addHRegUse(u, HRmRead, i->Pin.AvPerm.srcR);
+ addHRegUse(u, HRmRead, i->Pin.AvPerm.ctl);
return;
case Pin_AvSel:
addHRegUse(u, HRmWrite, i->Pin.AvSel.dst);
@@ -3088,10 +3088,10 @@
}
=20
case Pin_AvPerm: { // vperm
- UInt v_ctl =3D vregNo(i->Pin.AvPerm.ctl);
UInt v_dst =3D vregNo(i->Pin.AvPerm.dst);
UInt v_srcL =3D vregNo(i->Pin.AvPerm.srcL);
UInt v_srcR =3D vregNo(i->Pin.AvPerm.srcR);
+ UInt v_ctl =3D vregNo(i->Pin.AvPerm.ctl);
p =3D mkFormVA( p, 4, v_dst, v_srcL, v_srcR, v_ctl, 43 );
goto done;
}
Modified: trunk/priv/host-ppc32/hdefs.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-ppc32/hdefs.h 2005-09-15 16:28:36 UTC (rev 1398)
+++ trunk/priv/host-ppc32/hdefs.h 2005-09-15 21:58:50 UTC (rev 1399)
@@ -670,16 +670,16 @@
} AvBin32Fx4;
/* Perm,Sel,SlDbl,Splat are all weird AV permutations */
struct {
- HReg ctl;
HReg dst;
HReg srcL;
HReg srcR;
+ HReg ctl;
} AvPerm;
struct {
- HReg ctl;
HReg dst;
HReg srcL;
HReg srcR;
+ HReg ctl;
} AvSel;
struct {
UChar shift;
@@ -742,7 +742,7 @@
extern PPC32Instr* PPC32Instr_AvBin16x8 ( PPC32AvOp op, HReg dst, HReg =
srcL, HReg srcR );
extern PPC32Instr* PPC32Instr_AvBin32x4 ( PPC32AvOp op, HReg dst, HReg =
srcL, HReg srcR );
extern PPC32Instr* PPC32Instr_AvBin32Fx4 ( PPC32AvOp op, HReg dst, HReg =
srcL, HReg srcR );
-extern PPC32Instr* PPC32Instr_AvPerm ( HReg ctl, HReg dst, HReg srcL=
, HReg srcR );
+extern PPC32Instr* PPC32Instr_AvPerm ( HReg dst, HReg srcL, HReg src=
R, HReg ctl );
extern PPC32Instr* PPC32Instr_AvSel ( HReg ctl, HReg dst, HReg srcL=
, HReg srcR );
extern PPC32Instr* PPC32Instr_AvShlDbl ( UChar shift, HReg dst, HReg s=
rcL, HReg srcR );
extern PPC32Instr* PPC32Instr_AvSplat ( UChar sz, HReg dst, PPC32VI5s=
* src );
Modified: trunk/priv/host-ppc32/isel.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-ppc32/isel.c 2005-09-15 16:28:36 UTC (rev 1398)
+++ trunk/priv/host-ppc32/isel.c 2005-09-15 21:58:50 UTC (rev 1399)
@@ -3127,10 +3127,10 @@
//.. return dst;
//.. }
=20
- case Iop_Dup32x4: {
- HReg dst =3D mk_AvDuplicateRI(env, e->Iex.Binop.arg1);
- return dst;
- }
+ case Iop_Dup8x16:
+ case Iop_Dup16x8:
+ case Iop_Dup32x4:
+ return mk_AvDuplicateRI(env, e->Iex.Binop.arg1);
=20
default:
break;
@@ -3332,6 +3332,8 @@
//.. case Iop_QSub8Ux16: op =3D Xsse_QSUB8U; goto do_SseReRg;
//.. case Iop_QSub16Ux8: op =3D Xsse_QSUB16U; goto do_SseReRg;
=20
+ case Iop_InterleaveHI8x16: op =3D Pav_MRGHI; goto do_AvBin8x16;
+ case Iop_InterleaveLO8x16: op =3D Pav_MRGLO; goto do_AvBin8x16;
case Iop_Add8x16: op =3D Pav_ADDUM; goto do_AvBin8x16;
case Iop_QAdd8Ux16: op =3D Pav_ADDUS; goto do_AvBin8x16;
case Iop_QAdd8Sx16: op =3D Pav_ADDSS; goto do_AvBin8x16;
@@ -3355,6 +3357,8 @@
return dst;
}
=20
+ case Iop_InterleaveHI16x8: op =3D Pav_MRGHI; goto do_AvBin16x8;
+ case Iop_InterleaveLO16x8: op =3D Pav_MRGLO; goto do_AvBin16x8;
case Iop_Add16x8: op =3D Pav_ADDUM; goto do_AvBin16x8;
case Iop_QAdd16Ux8: op =3D Pav_ADDUS; goto do_AvBin16x8;
case Iop_QAdd16Sx8: op =3D Pav_ADDSS; goto do_AvBin16x8;
@@ -3382,6 +3386,8 @@
return dst;
}
=20
+ case Iop_InterleaveHI32x4: op =3D Pav_MRGHI; goto do_AvBin32x4;
+ case Iop_InterleaveLO32x4: op =3D Pav_MRGLO; goto do_AvBin32x4;
case Iop_Add32x4: op =3D Pav_ADDUM; goto do_AvBin32x4;
case Iop_QAdd32Ux4: op =3D Pav_ADDUS; goto do_AvBin32x4;
case Iop_QAdd32Sx4: op =3D Pav_ADDSS; goto do_AvBin32x4;
@@ -3433,9 +3439,19 @@
//.. case Iop_ShrN16x8: op =3D Xsse_SHR16; goto do_SseShift;
//.. case Iop_ShrN64x2: op =3D Xsse_SHR64; goto do_SseShift;
=20
+ case Iop_ShlN8x16: op =3D Pav_SHL; goto do_AvShift8x16;
+ case Iop_SarN8x16: op =3D Pav_SAR; goto do_AvShift8x16;
+ do_AvShift8x16: {
+ HReg r_src =3D iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg dst =3D newVRegV(env);
+ HReg v_shft =3D mk_AvDuplicateRI(env, e->Iex.Binop.arg2);
+ addInstr(env, PPC32Instr_AvBin8x16(op, dst, r_src, v_shft));
+ return dst;
+ }
+
case Iop_ShrN32x4: op =3D Pav_SHR; goto do_AvShift32x4;
do_AvShift32x4: {
- HReg r_src =3D iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg r_src =3D iselVecExpr(env, e->Iex.Binop.arg1);
HReg dst =3D newVRegV(env);
HReg v_shft =3D mk_AvDuplicateRI(env, e->Iex.Binop.arg2);
addInstr(env, PPC32Instr_AvBin32x4(op, dst, r_src, v_shft));
@@ -3443,14 +3459,24 @@
}
=20
case Iop_ShrV128: op =3D Pav_SHR; goto do_AvShiftV128;
+ case Iop_ShlV128: op =3D Pav_SHL; goto do_AvShiftV128;
do_AvShiftV128: {
HReg dst =3D newVRegV(env);
HReg r_src =3D iselVecExpr(env, e->Iex.Binop.arg1);
HReg v_shft =3D mk_AvDuplicateRI(env, e->Iex.Binop.arg2);
+ /* Note: shift value gets masked by 127 */
addInstr(env, PPC32Instr_AvBinary(op, dst, r_src, v_shft));
return dst;
}
=20
+ case Iop_Perm: {
+ HReg dst =3D newVRegV(env);
+ HReg v_src =3D iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg v_ctl =3D iselVecExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, PPC32Instr_AvPerm(dst, v_src, v_src, v_ctl));
+ return dst;
+ }
+
default:
break;
} /* switch (e->Iex.Binop.op) */
|