|
From: <sv...@va...> - 2014-03-07 22:52:27
|
Author: sewardj
Date: Fri Mar 7 22:52:19 2014
New Revision: 2833
Log:
Support extra instruction bits and pieces, enough to get Firefox started:
* more scalar int <-> FP conversions
* more vector integer narrowing
* a few more vector shift by imm cases
* FCVTAS (kludged)
Modified:
trunk/priv/guest_arm64_toIR.c
trunk/priv/host_arm64_defs.c
trunk/priv/host_arm64_defs.h
trunk/priv/host_arm64_isel.c
Modified: trunk/priv/guest_arm64_toIR.c
==============================================================================
--- trunk/priv/guest_arm64_toIR.c (original)
+++ trunk/priv/guest_arm64_toIR.c Fri Mar 7 22:52:19 2014
@@ -5401,8 +5401,13 @@
// A bit of ATCery: bounce all cases we haven't seen an example of.
if (/* F32toI32S */
(op == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
+ || (op == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
+ || (op == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
/* F32toI32U */
+ || (op == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
+ || (op == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
/* F32toI64S */
+ || (op == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
/* F32toI64U */
|| (op == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
/* F64toI32S */
@@ -5410,12 +5415,16 @@
|| (op == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
|| (op == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
/* F64toI32U */
- || (op == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
|| (op == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
+ || (op == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
+ || (op == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
/* F64toI64S */
|| (op == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
+ || (op == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
+ || (op == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
/* F64toI64U */
|| (op == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
+ || (op == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
) {
/* validated */
} else {
@@ -5433,6 +5442,37 @@
return True;
}
+ /* -------- FCVTAS (KLUDGED) (scalar, integer) -------- */
+ /* 30 23 20 18 15 9 4
+ 1 00 11110 0x 1 00 100 000000 n d FCVTAS Xd, Fn
+ 0 00 11110 0x 1 00 100 000000 n d FCVTAS Wd, Fn
+ Fn is Dn when x==1, Sn when x==0
+ */
+ if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0)
+ && INSN(21,16) == BITS6(1,0,0,1,0,0)
+ && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
+ Bool isI64 = INSN(31,31) == 1;
+ Bool isF64 = INSN(22,22) == 1;
+ UInt nn = INSN(9,5);
+ UInt dd = INSN(4,0);
+ /* Decide on the IR rounding mode to use. */
+ /* KLUDGE: should be Irrm_NEAREST_TIE_AWAY_0 */
+ IRRoundingMode irrm = Irrm_NEAREST;
+ /* Decide on the conversion primop. */
+ IROp op = isI64 ? (isF64 ? Iop_F64toI64S : Iop_F32toI64S)
+ : (isF64 ? Iop_F64toI32S : Iop_F32toI32S);
+ IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
+ IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
+ IRTemp src = newTemp(srcTy);
+ IRTemp dst = newTemp(dstTy);
+ assign(src, getQRegLO(nn, srcTy));
+ assign(dst, binop(op, mkU32(irrm), mkexpr(src)));
+ putIRegOrZR(isI64, dd, mkexpr(dst));
+ DIP("fcvtas %s, %s (KLUDGED)\n",
+ nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
+ return True;
+ }
+
/* ---------------- FRINT{I,M,P,Z} (scalar) ---------------- */
/* 31 23 21 17 14 9 4
000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
@@ -5444,7 +5484,7 @@
010 -inf (FRINTM)
011 zero (FRINTZ)
000 tieeven
- 100 tieaway
+ 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
110 per FPCR + "exact = TRUE"
101 unallocated
*/
@@ -5461,6 +5501,8 @@
case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
+ // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
+ case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
default: break;
}
if (irrmE) {
@@ -5483,9 +5525,9 @@
000 11110 11 10001 00 10000 n d FCVT Sd, Hn (unimp)
--------- 11 ----- 01 --------- FCVT Dd, Hn (unimp)
--------- 00 ----- 11 --------- FCVT Hd, Sn (unimp)
- --------- 00 ----- 01 --------- FCVT Dd, Sn (unimp)
+ --------- 00 ----- 01 --------- FCVT Dd, Sn
--------- 01 ----- 11 --------- FCVT Hd, Dn (unimp)
- --------- 01 ----- 00 --------- FCVT Sd, Dn (unimp)
+ --------- 01 ----- 00 --------- FCVT Sd, Dn
Rounding, when dst is smaller than src, is per the FPCR.
*/
if (INSN(31,24) == BITS8(0,0,0,1,1,1,1,0)
Modified: trunk/priv/host_arm64_defs.c
==============================================================================
--- trunk/priv/host_arm64_defs.c (original)
+++ trunk/priv/host_arm64_defs.c Fri Mar 7 22:52:19 2014
@@ -905,6 +905,7 @@
{
switch (op) {
case ARM64vecsh_USHR64x2: *nm = "ushr "; *ar = "2d"; return;
+ case ARM64vecsh_USHR16x8: *nm = "ushr "; *ar = "8h"; return;
case ARM64vecsh_SSHR64x2: *nm = "sshr "; *ar = "2d"; return;
case ARM64vecsh_SHL32x4: *nm = "shl "; *ar = "4s"; return;
default: vpanic("showARM64VecShiftImmOp");
@@ -1620,6 +1621,8 @@
maxSh = 63; break;
case ARM64vecsh_SHL32x4:
maxSh = 31; break;
+ case ARM64vecsh_USHR16x8:
+ maxSh = 15; break;
default:
vassert(0);
}
@@ -3351,6 +3354,7 @@
#define X111110 BITS8(0,0, 1,1,1,1,1,0)
#define X111111 BITS8(0,0, 1,1,1,1,1,1)
+#define X0010000 BITS8(0, 0,0,1,0,0,0,0)
#define X0100000 BITS8(0, 0,1,0,0,0,0,0)
#define X1000000 BITS8(0, 1,0,0,0,0,0,0)
@@ -4679,7 +4683,9 @@
case ARM64cvt_F64_I64S: /* SCVTF Dd, Xn */
*p++ = X_3_5_8_6_5_5(X100, X11110, X01100010, X000000, rN, rD);
break;
- /* UCVTF Sd, Wn ATC */
+ case ARM64cvt_F32_I32U: /* UCVTF Sd, Wn */
+ *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X000000, rN, rD);
+ break;
case ARM64cvt_F64_I32U: /* UCVTF Dd, Wn */
*p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X000000, rN, rD);
break;
@@ -4728,12 +4734,18 @@
*p++ = X_3_5_8_6_5_5(X100, X11110, X01100001 | (armRM << 3),
X000000, rN, rD);
break;
- /* */
case ARM64cvt_F32_I32S: /* FCVTxS Wd, Sn */
*p++ = X_3_5_8_6_5_5(X000, X11110, X00100000 | (armRM << 3),
X000000, rN, rD);
break;
- /* */
+ case ARM64cvt_F32_I32U: /* FCVTxU Wd, Sn */
+ *p++ = X_3_5_8_6_5_5(X000, X11110, X00100001 | (armRM << 3),
+ X000000, rN, rD);
+ break;
+ case ARM64cvt_F32_I64S: /* FCVTxS Xd, Sn */
+ *p++ = X_3_5_8_6_5_5(X100, X11110, X00100000 | (armRM << 3),
+ X000000, rN, rD);
+ break;
case ARM64cvt_F32_I64U: /* FCVTxU Xd, Sn */
*p++ = X_3_5_8_6_5_5(X100, X11110, X00100001 | (armRM << 3),
X000000, rN, rD);
@@ -5130,6 +5142,15 @@
goto done;
}
break;
+ //case ARM64vecsh_SSHR16x8: syned = True; ATC
+ case ARM64vecsh_USHR16x8: /* fallthrough */
+ if (sh >= 1 && sh <= 15) {
+ UInt xxxx = 16-sh;
+ *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110,
+ X0010000 | xxxx, X000001, vN, vD);
+ goto done;
+ }
+ break;
default:
break;
}
Modified: trunk/priv/host_arm64_defs.h
==============================================================================
--- trunk/priv/host_arm64_defs.h (original)
+++ trunk/priv/host_arm64_defs.h Fri Mar 7 22:52:19 2014
@@ -357,6 +357,7 @@
typedef
enum {
ARM64vecsh_USHR64x2=350,
+ ARM64vecsh_USHR16x8,
ARM64vecsh_SSHR64x2,
ARM64vecsh_SHL32x4,
ARM64vecsh_INVALID
Modified: trunk/priv/host_arm64_isel.c
==============================================================================
--- trunk/priv/host_arm64_isel.c (original)
+++ trunk/priv/host_arm64_isel.c Fri Mar 7 22:52:19 2014
@@ -1812,6 +1812,10 @@
cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break;
case Iop_F32toI32S:
cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break;
+ case Iop_F32toI32U:
+ cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break;
+ case Iop_F32toI64S:
+ cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break;
case Iop_F32toI64U:
cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break;
default:
@@ -2118,6 +2122,7 @@
ARM64sh_SAR));
return dst;
}
+ case Iop_NarrowUn16to8x8:
case Iop_NarrowUn32to16x4:
case Iop_NarrowUn64to32x2: {
HReg src = iselV128Expr(env, e->Iex.Unop.arg);
@@ -2125,6 +2130,7 @@
HReg dst = newVRegI(env);
UInt dszBlg2 = 3; /* illegal */
switch (e->Iex.Unop.op) {
+ case Iop_NarrowUn16to8x8: dszBlg2 = 0; break; // 16to8_x8
case Iop_NarrowUn32to16x4: dszBlg2 = 1; break; // 32to16_x4
case Iop_NarrowUn64to32x2: dszBlg2 = 2; break; // 64to32_x2
default: vassert(0);
@@ -5425,8 +5431,10 @@
//ZZ case Iop_ShrN16x8:
//ZZ case Iop_ShrN32x4:
case Iop_ShrN64x2:
+ case Iop_ShrN16x8:
case Iop_SarN64x2:
- case Iop_ShlN32x4: {
+ case Iop_ShlN32x4:
+ {
IRExpr* argL = e->Iex.Binop.arg1;
IRExpr* argR = e->Iex.Binop.arg2;
if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
@@ -5436,6 +5444,8 @@
switch (e->Iex.Binop.op) {
case Iop_ShrN64x2:
op = ARM64vecsh_USHR64x2; limit = 63; break;
+ case Iop_ShrN16x8:
+ op = ARM64vecsh_USHR16x8; limit = 15; break;
case Iop_SarN64x2:
op = ARM64vecsh_SSHR64x2; limit = 63; break;
case Iop_ShlN32x4:
@@ -6167,11 +6177,13 @@
addInstr(env, ARM64Instr_VCvtSD(False/*dToS*/, dstS, srcD));
return dstS;
}
+ case Iop_I32UtoF32:
case Iop_I32StoF32:
case Iop_I64UtoF32:
case Iop_I64StoF32: {
ARM64CvtOp cvt_op = ARM64cvt_INVALID;
switch (e->Iex.Binop.op) {
+ case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break;
case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break;
case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break;
case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break;
|