|
From: <sv...@va...> - 2016-08-07 23:33:56
|
Author: sewardj
Date: Mon Aug 8 00:33:48 2016
New Revision: 3242
Log:
Implement VMULL.P64.
dis_neon_data_3diff: don't mistakenly recognise VMULL.P64 as a plain
VMUL due to inadequate checking for the VMULL.P64 case.
Fix ARM decoding of SHA1SU1, SHA256SU0, SHA1H introduced in r3241.
Modified:
trunk/priv/guest_arm_defs.h
trunk/priv/guest_arm_helpers.c
trunk/priv/guest_arm_toIR.c
Modified: trunk/priv/guest_arm_defs.h
==============================================================================
--- trunk/priv/guest_arm_defs.h (original)
+++ trunk/priv/guest_arm_defs.h Mon Aug 8 00:33:48 2016
@@ -217,6 +217,12 @@
UInt argM3, UInt argM2, UInt argM1, UInt argM0
);
+extern
+void armg_dirtyhelper_VMULLP64 (
+ /*OUT*/V128* res,
+ UInt argN1, UInt argN0, UInt argM1, UInt argM0
+ );
+
/*---------------------------------------------------------*/
/*--- Condition code stuff ---*/
Modified: trunk/priv/guest_arm_helpers.c
==============================================================================
--- trunk/priv/guest_arm_helpers.c (original)
+++ trunk/priv/guest_arm_helpers.c Mon Aug 8 00:33:48 2016
@@ -780,6 +780,18 @@
arm64g_dirtyhelper_SHA1H(res, argMhi, argMlo);
}
+/* CALLED FROM GENERATED CODE */
+void armg_dirtyhelper_VMULLP64 (
+ /*OUT*/V128* res,
+ UInt argN1, UInt argN0, UInt argM1, UInt argM0
+ )
+{
+ vassert(0 == (((HWord)res) & (8-1)));
+ ULong argN = (((ULong)argN1) << 32) | ((ULong)argN0);
+ ULong argM = (((ULong)argM1) << 32) | ((ULong)argM0);
+ arm64g_dirtyhelper_PMULLQ(res, argN, argM);
+}
+
/*---------------------------------------------------------------*/
/*--- Flag-helpers translation-time function specialisers. ---*/
Modified: trunk/priv/guest_arm_toIR.c
==============================================================================
--- trunk/priv/guest_arm_toIR.c (original)
+++ trunk/priv/guest_arm_toIR.c Mon Aug 8 00:33:48 2016
@@ -3041,6 +3041,12 @@
static
Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT )
{
+ /* In paths where this returns False, indicating a non-decodable
+ instruction, there may still be some IR assignments to temporaries
+ generated. This is inconvenient but harmless, and the post-front-end
+ IR optimisation pass will just remove them anyway. So there's no
+ effort made here to tidy it up.
+ */
UInt Q = (theInstr >> 6) & 1;
UInt dreg = get_neon_d_regno(theInstr);
UInt nreg = get_neon_n_regno(theInstr);
@@ -4834,6 +4840,12 @@
static
Bool dis_neon_data_3diff ( UInt theInstr, IRTemp condT )
{
+ /* In paths where this returns False, indicating a non-decodable
+ instruction, there may still be some IR assignments to temporaries
+ generated. This is inconvenient but harmless, and the post-front-end
+ IR optimisation pass will just remove them anyway. So there's no
+ effort made here to tidy it up.
+ */
UInt A = (theInstr >> 8) & 0xf;
UInt B = (theInstr >> 20) & 3;
UInt U = (theInstr >> 24) & 1;
@@ -5191,11 +5203,15 @@
op = Iop_PolynomialMull8x8;
break;
case 1:
+ if (P) return False;
op = (U) ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
break;
case 2:
+ if (P) return False;
op = (U) ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
break;
+ case 3:
+ return False;
default:
vassert(0);
}
@@ -12928,7 +12944,7 @@
{
Bool gate = False;
- UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,0);
+ UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
if (INSNA(31,23) == hi9 && INSNA(21,16) == BITS6(1,1,1,0,1,0)
&& INSNA(11,7) == BITS5(0,0,1,1,1) && INSNA(4,4) == 0) {
gate = True;
@@ -12998,7 +13014,7 @@
{
Bool gate = False;
- UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,0);
+ UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
if (INSNA(31,23) == hi9 && INSNA(21,16) == BITS6(1,1,1,0,0,1)
&& INSNA(11,6) == BITS6(0,0,1,0,1,1) && INSNA(4,4) == 0) {
gate = True;
@@ -13043,6 +13059,64 @@
/* fall through */
}
+ /* ----------- VMULL.P64 ----------- */
+ /*
+ 31 27 23 21 19 15 11 7 3
+ T2: 1110 1111 1 D 10 n d 1110 N 0 M 0 m
+ A2: 1111 0010 -------------------------
+
+ The ARM documentation is pretty difficult to follow here.
+ Same comments about conditionalisation as for the AES group above apply.
+ */
+ {
+ Bool gate = False;
+
+ UInt hi9 = isT ? BITS9(1,1,1,0,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,0,1);
+ if (INSNA(31,23) == hi9 && INSNA(21,20) == BITS2(1,0)
+ && INSNA(11,8) == BITS4(1,1,1,0)
+ && INSNA(6,6) == 0 && INSNA(4,4) == 0) {
+ gate = True;
+ }
+
+ UInt regN = (INSNA(7,7) << 4) | INSNA(19,16);
+ UInt regD = (INSNA(22,22) << 4) | INSNA(15,12);
+ UInt regM = (INSNA(5,5) << 4) | INSNA(3,0);
+
+ if ((regD & 1) == 1)
+ gate = False;
+
+ if (gate) {
+ const HChar* iname = "vmull";
+ void (*helper)(V128*,UInt,UInt,UInt,UInt) = &armg_dirtyhelper_VMULLP64;
+ const HChar* hname = "armg_dirtyhelper_VMULLP64";
+
+ if (isT) {
+ gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
+ }
+
+ IRTemp srcN = newTemp(Ity_I64);
+ IRTemp srcM = newTemp(Ity_I64);
+ assign(srcN, getDRegI64(regN));
+ assign(srcM, getDRegI64(regM));
+
+ IRExpr** argvec = mkIRExprVec_5(IRExpr_VECRET(),
+ unop(Iop_64HIto32, mkexpr(srcN)),
+ unop(Iop_64to32, mkexpr(srcN)),
+ unop(Iop_64HIto32, mkexpr(srcM)),
+ unop(Iop_64to32, mkexpr(srcM)));
+
+ IRTemp res = newTemp(Ity_V128);
+ IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
+ hname, helper, argvec );
+ stmt(IRStmt_Dirty(di));
+ putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
+
+ DIP("%s.p64 q%u, q%u, w%u\n", iname, regD >> 1, regN, regM);
+ return True;
+ }
+ /* fall through */
+ }
+
/* ---------- Doesn't match anything. ---------- */
return False;
|