|
From: <sv...@va...> - 2005-09-14 20:35:49
|
Author: cerion
Date: 2005-09-14 21:35:47 +0100 (Wed, 14 Sep 2005)
New Revision: 1392
Log:
more altivec insns: vsr, vspltw
- only working with with --tool=3Dnone
back-end:
hdefs:
new type for PPC32Instr_AvSplat:
PPC32VI5s =3D> {vector-reg | signed-5bit-imm}
fixed ShlV128, ShrV128 to shift the full 128bits
isel:=20
implemented Iop_Dup32x4, Iop_ShrV128
new function mk_AvDuplicateRI()
- takes in ri_src (imm|reg, latter of type 8|16|32)
returns vector reg of duplicated lanes of ri_src
avoids store/load for immediates up to simm6.
Modified:
trunk/priv/guest-ppc32/toIR.c
trunk/priv/host-ppc32/hdefs.c
trunk/priv/host-ppc32/hdefs.h
trunk/priv/host-ppc32/isel.c
Modified: trunk/priv/guest-ppc32/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-ppc32/toIR.c 2005-09-13 18:41:09 UTC (rev 1391)
+++ trunk/priv/guest-ppc32/toIR.c 2005-09-14 20:35:47 UTC (rev 1392)
@@ -5518,6 +5518,11 @@
UChar vB_addr =3D toUChar((theInstr >> 11) & 0x1F); /* theInstr[11:15=
] */
UInt opc2 =3D (theInstr >> 0) & 0x7FF; /* theInstr[0:10]=
*/
=20
+ IRTemp vA =3D newTemp(Ity_V128);
+ IRTemp vB =3D newTemp(Ity_V128);
+ assign( vA, getVReg(vA_addr));
+ assign( vB, getVReg(vB_addr));
+
if (opc1 !=3D 0x4){
vex_printf("dis_av_shift(PPC32)(instr)\n");
return False;
@@ -5583,11 +5588,16 @@
DIP(" =3D> not implemented\n");
return False;
=20
- case 0x2C4: // vsr (Shift Right, AV p252)
+ case 0x2C4: { // vsr (Shift Right, AV p251)
DIP("vsr v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" =3D> not implemented\n");
- return False;
-
+ IRTemp sh =3D newTemp(Ity_I8);
+ assign( sh, binop(Iop_And8, mkU8(0x7),
+ unop(Iop_32to8,
+ unop(Iop_V128to32, mkexpr(vB)))) );
+ putVReg( vD_addr,
+ binop(Iop_ShrV128, mkexpr(vA), mkexpr(sh)) );
+ break;
+ }
case 0x304: // vsrab (Shift Right Algebraic B, AV p253)
DIP("vsrab v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
DIP(" =3D> not implemented\n");
@@ -5717,11 +5727,15 @@
DIP(" =3D> not implemented\n");
return False;
=20
- case 0x28C: // vspltw (Splat Word, AV p250)
+ case 0x28C: { // vspltw (Splat Word, AV p250)
DIP("vspltw v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5);
- DIP(" =3D> not implemented\n");
- return False;
-
+ /* vD =3D Dup32x4( vB[UIMM_5] ) */
+ unsigned int sh_uimm =3D (3-UIMM_5)*32;
+ putVReg( vD_addr, unop(Iop_Dup32x4,
+ unop(Iop_V128to32,
+ binop(Iop_ShrV128, mkexpr(vB), mkU8(sh_uimm)))) );
+ break;
+ }
case 0x30C: // vspltisb (Splat Immediate Signed B, AV p247)
DIP("vspltisb v%d,%d\n", vD_addr, (Char)SIMM_8);
DIP(" =3D> not implemented\n");
Modified: trunk/priv/host-ppc32/hdefs.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-ppc32/hdefs.c 2005-09-13 18:41:09 UTC (rev 1391)
+++ trunk/priv/host-ppc32/hdefs.c 2005-09-14 20:35:47 UTC (rev 1392)
@@ -529,6 +529,64 @@
}
=20
=20
+/* --------- Operand, which can be a vector reg or a simm5. --------- */
+
+PPC32VI5s* PPC32VI5s_Imm ( Char simm5 ) {
+ PPC32VI5s* op =3D LibVEX_Alloc(sizeof(PPC32VI5s));
+ op->tag =3D Pvi_Imm;
+ op->Pvi.Imm5s =3D simm5;
+ vassert(simm5 >=3D -16 && simm5 <=3D 15);
+ return op;
+}
+PPC32VI5s* PPC32VI5s_Reg ( HReg reg ) {
+ PPC32VI5s* op =3D LibVEX_Alloc(sizeof(PPC32VI5s));
+ op->tag =3D Pvi_Reg;
+ op->Pvi.Reg =3D reg;
+ vassert(hregClass(reg) =3D=3D HRcVec128);
+ return op;
+}
+
+void ppPPC32VI5s ( PPC32VI5s* src ) {
+ switch (src->tag) {
+ case Pvi_Imm:=20
+ vex_printf("%d", (Int)src->Pvi.Imm5s);
+ break;
+ case Pvi_Reg:=20
+ ppHRegPPC32(src->Pvi.Reg);
+ break;
+ default:=20
+ vpanic("ppPPC32VI5s");
+ }
+}
+
+/* An PPC32VI5s can only be used in a "read" context (what would it
+ mean to write or modify a literal?) and so we enumerate its
+ registers accordingly. */
+static void addRegUsage_PPC32VI5s ( HRegUsage* u, PPC32VI5s* dst ) {
+ switch (dst->tag) {
+ case Pvi_Imm:=20
+ return;
+ case Pvi_Reg:=20
+ addHRegUse(u, HRmRead, dst->Pvi.Reg);
+ return;
+ default:=20
+ vpanic("addRegUsage_PPC32VI5s");
+ }
+}
+
+static void mapRegs_PPC32VI5s ( HRegRemap* m, PPC32VI5s* dst ) {
+ switch (dst->tag) {
+ case Pvi_Imm:=20
+ return;
+ case Pvi_Reg:=20
+ dst->Pvi.Reg =3D lookupHRegRemap(m, dst->Pvi.Reg);
+ return;
+ default:=20
+ vpanic("mapRegs_PPC32VI5s");
+ }
+}
+
+
/* --------- Instructions. --------- */
=20
HChar* showPPC32UnaryOp ( PPC32UnaryOp op ) {
@@ -942,7 +1000,7 @@
i->Pin.AvShlDbl.srcR =3D srcR;
return i;
}
-PPC32Instr* PPC32Instr_AvSplat ( UChar sz, HReg dst, PPC32RI* src ) {
+PPC32Instr* PPC32Instr_AvSplat ( UChar sz, HReg dst, PPC32VI5s* src ) {
PPC32Instr* i =3D LibVEX_Alloc(sizeof(PPC32Instr));
i->tag =3D Pin_AvSplat;
i->Pin.AvSplat.sz =3D sz;
@@ -1355,20 +1413,15 @@
return;
=20
case Pin_AvSplat: {
- UChar ch_sz =3D toUChar(
- (i->Pin.AvSplat.sz =3D=3D 8) ? 'b' :
- (i->Pin.AvSplat.sz =3D=3D 16) ? 'h' : 'w'
- );
+ UChar sz =3D i->Pin.AvSplat.sz;
+ UChar ch_sz =3D toUChar( (sz =3D=3D 8) ? 'b' : (sz =3D=3D 16) ? 'h=
' : 'w' );
vex_printf("vsplt%s%c ",
- i->Pin.AvSplat.src->tag =3D=3D Pri_Imm ? "is" : "", ch_=
sz);
+ i->Pin.AvSplat.src->tag =3D=3D Pvi_Imm ? "is" : "", ch_=
sz);
ppHRegPPC32(i->Pin.AvSplat.dst);
vex_printf(",");
- if (i->Pin.AvSplat.src->tag =3D=3D Pri_Imm) {
- vex_printf("%d", (Char)(i->Pin.AvSplat.src->Pri.Imm));
- } else {
- ppHRegPPC32(i->Pin.AvSplat.src->Pri.Reg);
- vex_printf(", 0");
- }
+ ppPPC32VI5s(i->Pin.AvSplat.src);
+ if (i->Pin.AvSplat.src->tag =3D=3D Pvi_Reg)
+ vex_printf(", %u", (128/sz)-1); /* louis lane */
return;
}
=20
@@ -1599,8 +1652,8 @@
addHRegUse(u, HRmRead, i->Pin.AvShlDbl.srcR);
return;
case Pin_AvSplat:
- addHRegUse(u, HRmWrite, i->Pin.AvSplat.dst);
- addRegUsage_PPC32RI(u, i->Pin.AvSplat.src);
+ addHRegUse(u, HRmWrite, i->Pin.AvSplat.dst);
+ addRegUsage_PPC32VI5s(u, i->Pin.AvSplat.src);
return;
case Pin_AvCMov:
addHRegUse(u, HRmModify, i->Pin.AvCMov.dst);
@@ -1764,7 +1817,7 @@
return;
case Pin_AvSplat:
mapReg(m, &i->Pin.AvSplat.dst);
- mapRegs_PPC32RI(m, i->Pin.AvSplat.src);
+ mapRegs_PPC32VI5s(m, i->Pin.AvSplat.src);
return;
case Pin_AvCMov:
mapReg(m, &i->Pin.AvCMov.dst);
@@ -2812,16 +2865,21 @@
UInt v_srcL =3D vregNo(i->Pin.AvBinary.srcL);
UInt v_srcR =3D vregNo(i->Pin.AvBinary.srcR);
UInt opc2;
+ if (i->Pin.AvBinary.op =3D=3D Pav_SHL) {
+ p =3D mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1036 ); // vslo
+ p =3D mkFormVX( p, 4, v_dst, v_dst, v_srcR, 452 ); // vsl
+ goto done;
+ }
+ if (i->Pin.AvBinary.op =3D=3D Pav_SHR) {
+ p =3D mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1100 ); // vsro
+ p =3D mkFormVX( p, 4, v_dst, v_dst, v_srcR, 708 ); // vsr
+ goto done;
+ }
switch (i->Pin.AvBinary.op) {
/* Bitwise */
case Pav_AND: opc2 =3D 1028; break; // vand
case Pav_OR: opc2 =3D 1156; break; // vor
case Pav_XOR: opc2 =3D 1220; break; // vxor
-
- /* Shift */
- case Pav_SHL: opc2 =3D 452; break; // vsl
- case Pav_SHR: opc2 =3D 708; break; // vsr
-
default:
goto bad;
}
@@ -3060,17 +3118,22 @@
case Pin_AvSplat: { // vsplt(is)(b,h,w)
UInt v_dst =3D vregNo(i->Pin.AvShlDbl.dst);
UChar sz =3D i->Pin.AvSplat.sz;
- UInt v_src, simm_src, opc2;
+ UInt v_src, opc2;
vassert(sz =3D=3D 8 || sz =3D=3D 16 || sz =3D=3D 32);
=20
- if (i->Pin.AvSplat.src->tag =3D=3D Pri_Imm) {
+ if (i->Pin.AvSplat.src->tag =3D=3D Pvi_Imm) {
opc2 =3D (sz =3D=3D 8) ? 780 : (sz =3D=3D 16) ? 844 : 908; //=
8,16,32
- simm_src =3D i->Pin.AvSplat.src->Pri.Imm;
- p =3D mkFormVX( p, 4, v_dst, simm_src, 0, opc2 );
- } else { // Pri_Reg
+ /* expects 5-bit-signed-imm */
+ Char simm5 =3D i->Pin.AvSplat.src->Pvi.Imm5s;
+ vassert(simm5 >=3D -16 && simm5 <=3D 15);
+ p =3D mkFormVX( p, 4, v_dst, (UInt)simm5, 0, opc2 );
+ }
+ else { // Pri_Reg
opc2 =3D (sz =3D=3D 8) ? 524 : (sz =3D=3D 16) ? 588 : 652; // =
8,16,32
- v_src =3D iregNo(i->Pin.AvSplat.src->Pri.Reg);
- p =3D mkFormVX( p, 4, v_dst, 0, v_src, opc2 );
+ vassert(hregClass(i->Pin.AvSplat.src->Pvi.Reg) =3D=3D HRcVec128=
);
+ v_src =3D vregNo(i->Pin.AvSplat.src->Pvi.Reg);
+ UInt lowest_lane =3D (128/sz)-1;
+ p =3D mkFormVX( p, 4, v_dst, lowest_lane, v_src, opc2 );
}
goto done;
}
Modified: trunk/priv/host-ppc32/hdefs.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-ppc32/hdefs.h 2005-09-13 18:41:09 UTC (rev 1391)
+++ trunk/priv/host-ppc32/hdefs.h 2005-09-14 20:35:47 UTC (rev 1392)
@@ -293,6 +293,32 @@
extern void ppPPC32RI ( PPC32RI* );
=20
=20
+/* --------- Operand, which can be a vector reg or a s6. --------- */
+/* ("VI" =3D=3D "Vector Register or Immediate") */
+typedef
+ enum {
+ Pvi_Imm=3D5,
+ Pvi_Reg=3D6
+ }=20
+ PPC32VI5sTag;
+
+typedef
+ struct {
+ PPC32VI5sTag tag;
+ union {
+ Char Imm5s;
+ HReg Reg;
+ }
+ Pvi;
+ }
+ PPC32VI5s;
+
+extern PPC32VI5s* PPC32VI5s_Imm ( Char );
+extern PPC32VI5s* PPC32VI5s_Reg ( HReg );
+
+extern void ppPPC32VI5s ( PPC32VI5s* );
+
+
/* --------- Instructions. --------- */
=20
/* --------- */
@@ -664,7 +690,7 @@
struct {
UChar sz; /* 8,16,32 */
HReg dst;
- PPC32RI* src;
+ PPC32VI5s* src;=20
} AvSplat;
/* Mov src to dst on the given condition, which may not
be the bogus Xcc_ALWAYS. */
@@ -719,7 +745,7 @@
extern PPC32Instr* PPC32Instr_AvPerm ( HReg ctl, HReg dst, HReg srcL=
, HReg srcR );
extern PPC32Instr* PPC32Instr_AvSel ( HReg ctl, HReg dst, HReg srcL=
, HReg srcR );
extern PPC32Instr* PPC32Instr_AvShlDbl ( UChar shift, HReg dst, HReg s=
rcL, HReg srcR );
-extern PPC32Instr* PPC32Instr_AvSplat ( UChar sz, HReg dst, PPC32RI* =
src );
+extern PPC32Instr* PPC32Instr_AvSplat ( UChar sz, HReg dst, PPC32VI5s=
* src );
extern PPC32Instr* PPC32Instr_AvCMov ( PPC32CondCode, HReg dst, HReg=
src );
extern PPC32Instr* PPC32Instr_AvLdVSCR ( HReg src );
=20
Modified: trunk/priv/host-ppc32/isel.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-ppc32/isel.c 2005-09-13 18:41:09 UTC (rev 1391)
+++ trunk/priv/host-ppc32/isel.c 2005-09-14 20:35:47 UTC (rev 1392)
@@ -752,7 +752,86 @@
//.. add_to_esp(env, 8);
//.. }
=20
+/*
+ Generates code for AvSplat
+ - takes in IRExpr* of type 8|16|32
+ returns vector reg of duplicated lanes of input
+ - uses AvSplat(imm) for imms up to simm6.
+ otherwise must use store reg & load vector
+*/
+static HReg mk_AvDuplicateRI( ISelEnv* env, IRExpr* e )
+{
+ HReg dst =3D newVRegV(env);
+ PPC32RI* ri =3D iselIntExpr_RI(env, e);
+ IRType ty =3D typeOfIRExpr(env->type_env,e);
+ UInt sz =3D (ty =3D=3D Ity_I8) ? 8 : (ty =3D=3D Ity_I16) ? 16 : =
32;
+ vassert(ty =3D=3D Ity_I8 || ty =3D=3D Ity_I16 || ty =3D=3D Ity_I32);
=20
+ HReg r_src;
+ /* special case: immediate */
+ if (ri->tag =3D=3D Pri_Imm) {
+ Int simm32 =3D (Int)ri->Pri.Imm;
+
+ /* figure out if it's do-able with imm splats. */
+ if (simm32 >=3D -32 && simm32 <=3D 31) {
+ Char simm6 =3D (Char)simm32;
+ if (simm6 > 15) { /* 16:31 inclusive */
+ HReg v1 =3D newVRegV(env);
+ HReg v2 =3D newVRegV(env);
+ addInstr(env, PPC32Instr_AvSplat(sz, v1, PPC32VI5s_Imm(-16))=
);
+ addInstr(env, PPC32Instr_AvSplat(sz, v2, PPC32VI5s_Imm(simm6=
-16)));
+ addInstr(env, PPC32Instr_AvBinary(Pav_SUBUM, dst, v2, v1));
+ return dst;
+ }
+ if (simm6 < -16) { /* -32:-17 inclusive */
+ HReg v1 =3D newVRegV(env);
+ HReg v2 =3D newVRegV(env);
+ addInstr(env, PPC32Instr_AvSplat(sz, v1, PPC32VI5s_Imm(-16))=
);
+ addInstr(env, PPC32Instr_AvSplat(sz, v2, PPC32VI5s_Imm(simm6=
+16)));
+ addInstr(env, PPC32Instr_AvBinary(Pav_ADDUM, dst, v2, v1));
+ return dst;
+ }
+ /* simplest form: -16:15 inclusive */
+ addInstr(env, PPC32Instr_AvSplat(sz, dst, PPC32VI5s_Imm(simm6))=
);
+ return dst;
+ }
+
+ /* no luck; use the Slow way. */
+ r_src =3D newVRegI(env);
+ addInstr(env, PPC32Instr_LI32(r_src, (UInt)simm32));
+ }
+ else {
+ r_src =3D ri->Pri.Reg;
+ }
+
+ /* default case: store r_src in lowest lane of 16-aligned mem,
+ load vector, splat lowest lane to dst */
+ {
+ /* CAB: Perhaps faster to store r_src multiple times (sz dependent=
),
+ and simply load the vector? */
+
+ HReg v_src =3D newVRegV(env);
+ PPC32AMode *am_off12;
+
+ sub_from_sp( env, 32 ); // Move SP down
+ /* Get a 16-aligned address within our stack space */
+ HReg r_aligned16 =3D get_sp_aligned16( env );
+ am_off12 =3D PPC32AMode_IR( 12, r_aligned16);
+
+ /* Store r_src in low word of 16-aligned mem */
+ addInstr(env, PPC32Instr_Store( 4, am_off12, r_src ));
+
+ /* Load src to vector[low lane] */
+ addInstr(env, PPC32Instr_AvLdSt( True/*load*/, 4, v_src, am_off12 =
));
+ add_to_sp( env, 32 ); // Reset SP
+
+ /* Finally, splat v_src[low_lane] to dst */
+ addInstr(env, PPC32Instr_AvSplat(sz, dst, PPC32VI5s_Reg(v_src)));
+ return dst;
+ }
+}
+
+
/*---------------------------------------------------------*/
/*--- ISEL: Integer expressions (32/16/8 bit) ---*/
/*---------------------------------------------------------*/
@@ -1498,13 +1577,15 @@
static PPC32RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
{
IRType ty =3D typeOfIRExpr(env->type_env,e);
- vassert(ty =3D=3D Ity_I32);
+ vassert(ty =3D=3D Ity_I8 || ty =3D=3D Ity_I16 || ty =3D=3D Ity_I32);
=20
/* special case: immediate */
if (e->tag =3D=3D Iex_Const) {
UInt u;
switch (e->Iex.Const.con->tag) {
- case Ico_U32: u =3D e->Iex.Const.con->Ico.U32; break;
+ case Ico_U32: u =3D e->Iex.Const.con->Ico.U32; break;
+ case Ico_U16: u =3D 0xFFFF & e->Iex.Const.con->Ico.U16; break;
+ case Ico_U8: u =3D 0xFF & e->Iex.Const.con->Ico.U8; break;
default: vpanic("iselIntExpr_RI.Iex_Const(ppc32h)");
}
return PPC32RI_Imm(u);
@@ -3040,6 +3121,11 @@
//.. return dst;
//.. }
=20
+ case Iop_Dup32x4: {
+ HReg dst =3D mk_AvDuplicateRI(env, e->Iex.Binop.arg1);
+ return dst;
+ }
+
default:
break;
} /* switch (e->Iex.Unop.op) */
@@ -3245,7 +3331,7 @@
do_AvBin: {
HReg arg1 =3D iselVecExpr(env, e->Iex.Binop.arg1);
HReg arg2 =3D iselVecExpr(env, e->Iex.Binop.arg2);
- HReg dst =3D newVRegV(env);
+ HReg dst =3D newVRegV(env);
addInstr(env, PPC32Instr_AvBinary(op, dst, arg1, arg2));
return dst;
}
@@ -3273,6 +3359,16 @@
//.. case Iop_ShrN16x8: op =3D Xsse_SHR16; goto do_SseShift;
//.. case Iop_ShrN32x4: op =3D Xsse_SHR32; goto do_SseShift;
//.. case Iop_ShrN64x2: op =3D Xsse_SHR64; goto do_SseShift;
+
+ case Iop_ShrV128: op =3D Pav_SHR; goto do_AvShiftV128;
+ do_AvShiftV128: {
+ HReg dst =3D newVRegV(env);
+ HReg r_src =3D iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg v_shft =3D mk_AvDuplicateRI(env, e->Iex.Binop.arg2);
+ addInstr(env, PPC32Instr_AvBinary(op, dst, r_src, v_shft));
+ return dst;
+ }
+
//.. do_SseShift: {
//.. HReg greg =3D iselVecExpr(env, e->Iex.Binop.arg1);
//.. X86RMI* rmi =3D iselIntExpr_RMI(env, e->Iex.Binop.arg2)=
;
|