|
From: <sv...@va...> - 2014-06-25 13:05:37
|
Author: sewardj
Date: Wed Jun 25 13:05:23 2014
New Revision: 2887
Log:
arm64: implement:
LD3/ST3 (multi 3-elem structs, 3 regs, post index) (2d variants only)
pmul 16b_16b_16b, 8b_8b_8b
Modified:
trunk/priv/guest_arm64_toIR.c
trunk/priv/host_arm64_defs.c
trunk/priv/host_arm64_defs.h
trunk/priv/host_arm64_isel.c
Modified: trunk/priv/guest_arm64_toIR.c
==============================================================================
--- trunk/priv/guest_arm64_toIR.c (original)
+++ trunk/priv/guest_arm64_toIR.c Wed Jun 25 13:05:23 2014
@@ -4373,7 +4373,7 @@
return False;
}
- /* ---------- LD2/ST2 (multiple structures, post index) ---------- */
+ /* -------- LD2/ST2 (multi 2-elem structs, 2 regs, post index) -------- */
/* Only a very few cases. */
/* 31 23 11 9 4
0100 1100 1101 1111 1000 11 n t LD2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
@@ -4513,6 +4513,58 @@
return True;
}
+ /* -------- LD3/ST3 (multi 3-elem structs, 3 regs, post index) -------- */
+ /* Only a very few cases. */
+ /* 31 23 11 9 4
+ 0100 1100 1101 1111 0100 11 n t LD3 {Vt.2d .. V(t+2)%32.2d}, [Xn|SP], #48
+ 0100 1100 1001 1111 0100 11 n t ST3 {Vt.2d .. V(t+2)%32.2d}, [Xn|SP], #48
+ */
+ if ( (insn & 0xFFFFFC00) == 0x4CDF4C00 // LD3 .2d
+ || (insn & 0xFFFFFC00) == 0x4C9F4C00 // ST3 .2d
+ ) {
+ Bool isLD = INSN(22,22) == 1;
+ UInt rN = INSN(9,5);
+ UInt vT = INSN(4,0);
+ IRTemp tEA = newTemp(Ity_I64);
+ UInt sz = INSN(11,10);
+ const HChar* name = "??";
+ assign(tEA, getIReg64orSP(rN));
+ if (rN == 31) { /* FIXME generate stack alignment check */ }
+ IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
+ IRExpr* tEA_8 = binop(Iop_Add64, mkexpr(tEA), mkU64(8));
+ IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
+ IRExpr* tEA_24 = binop(Iop_Add64, mkexpr(tEA), mkU64(24));
+ IRExpr* tEA_32 = binop(Iop_Add64, mkexpr(tEA), mkU64(32));
+ IRExpr* tEA_40 = binop(Iop_Add64, mkexpr(tEA), mkU64(40));
+ if (sz == BITS2(1,1)) {
+ name = "2d";
+ if (isLD) {
+ putQRegLane((vT+0) % 32, 0, loadLE(Ity_I64, tEA_0));
+ putQRegLane((vT+0) % 32, 1, loadLE(Ity_I64, tEA_24));
+ putQRegLane((vT+1) % 32, 0, loadLE(Ity_I64, tEA_8));
+ putQRegLane((vT+1) % 32, 1, loadLE(Ity_I64, tEA_32));
+ putQRegLane((vT+2) % 32, 0, loadLE(Ity_I64, tEA_16));
+ putQRegLane((vT+2) % 32, 1, loadLE(Ity_I64, tEA_40));
+ } else {
+ storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I64));
+ storeLE(tEA_24, getQRegLane((vT+0) % 32, 1, Ity_I64));
+ storeLE(tEA_8, getQRegLane((vT+1) % 32, 0, Ity_I64));
+ storeLE(tEA_32, getQRegLane((vT+1) % 32, 1, Ity_I64));
+ storeLE(tEA_16, getQRegLane((vT+2) % 32, 0, Ity_I64));
+ storeLE(tEA_40, getQRegLane((vT+2) % 32, 1, Ity_I64));
+ }
+ }
+ else {
+ vassert(0); // Can't happen.
+ }
+ putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(48)));
+ DIP("%s {v%u.%s, v%u.%s, v%u.%s}, [%s], #32\n",
+ isLD ? "ld3" : "st3",
+ (vT+0) % 32, name, (vT+1) % 32, name, (vT+2) % 32, name,
+ nameIReg64orSP(rN));
+ return True;
+ }
+
/* ------------------ LD{,A}X{R,RH,RB} ------------------ */
/* ------------------ ST{,L}X{R,RH,RB} ------------------ */
/* 31 29 23 20 14 9 4
Modified: trunk/priv/host_arm64_defs.c
==============================================================================
--- trunk/priv/host_arm64_defs.c (original)
+++ trunk/priv/host_arm64_defs.c Wed Jun 25 13:05:23 2014
@@ -921,6 +921,7 @@
case ARM64vecb_ZIP232x4: *nm = "zip2"; *ar = "4s"; return;
case ARM64vecb_ZIP216x8: *nm = "zip2"; *ar = "8h"; return;
case ARM64vecb_ZIP28x16: *nm = "zip2"; *ar = "16b"; return;
+ case ARM64vecb_PMUL8x16: *nm = "pmul"; *ar = "16b"; return;
default: vpanic("showARM64VecBinOp");
}
}
@@ -5123,6 +5124,8 @@
010 01110 10 0 m 011110 n d ZIP2 Vd.4s, Vn.4s, Vm.4s
010 01110 01 0 m 011110 n d ZIP2 Vd.8h, Vn.8h, Vm.8h
010 01110 10 0 m 011110 n d ZIP2 Vd.16b, Vn.16b, Vm.16b
+
+ 011 01110 00 1 m 100111 n d PMUL Vd.16b, Vn.16b, Vm.16b
*/
UInt vD = qregNo(i->ARM64in.VBinV.dst);
UInt vN = qregNo(i->ARM64in.VBinV.argL);
@@ -5346,6 +5349,10 @@
*p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X011110, vN, vD);
break;
+ case ARM64vecb_PMUL8x16:
+ *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100111, vN, vD);
+ break;
+
default:
goto bad;
}
Modified: trunk/priv/host_arm64_defs.h
==============================================================================
--- trunk/priv/host_arm64_defs.h (original)
+++ trunk/priv/host_arm64_defs.h Wed Jun 25 13:05:23 2014
@@ -344,6 +344,7 @@
ARM64vecb_ZIP132x4, ARM64vecb_ZIP116x8,
ARM64vecb_ZIP18x16, ARM64vecb_ZIP232x4,
ARM64vecb_ZIP216x8, ARM64vecb_ZIP28x16,
+ ARM64vecb_PMUL8x16,
ARM64vecb_INVALID
}
ARM64VecBinOp;
Modified: trunk/priv/host_arm64_isel.c
==============================================================================
--- trunk/priv/host_arm64_isel.c (original)
+++ trunk/priv/host_arm64_isel.c Wed Jun 25 13:05:23 2014
@@ -4986,6 +4986,7 @@
case Iop_InterleaveLO32x4:
case Iop_InterleaveLO16x8:
case Iop_InterleaveLO8x16:
+ case Iop_PolynomialMul8x16:
{
HReg res = newVRegV(env);
HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
@@ -5066,6 +5067,7 @@
break;
case Iop_InterleaveLO8x16: op = ARM64vecb_ZIP18x16; sw = True;
break;
+ case Iop_PolynomialMul8x16: op = ARM64vecb_PMUL8x16; break;
default: vassert(0);
}
if (sw) {
|