You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
1
(1) |
2
(1) |
3
(5) |
4
(4) |
5
(7) |
6
(4) |
|
7
(3) |
8
(5) |
9
|
10
(5) |
11
|
12
(4) |
13
|
|
14
|
15
(3) |
16
(1) |
17
(2) |
18
|
19
(1) |
20
|
|
21
(1) |
22
|
23
|
24
|
25
|
26
(3) |
27
|
|
28
(2) |
29
(1) |
30
|
31
|
|
|
|
|
From: <sv...@va...> - 2016-08-03 11:55:46
|
Author: sewardj
Date: Wed Aug 3 12:55:33 2016
New Revision: 3236
Log:
* Add infrastructure for decoding (32-bit) ARMv8 instructions.
* Use this to implement AESE, AESD, AESMC, AESIMC.
Modified:
trunk/priv/guest_arm_defs.h
trunk/priv/guest_arm_helpers.c
trunk/priv/guest_arm_toIR.c
Modified: trunk/priv/guest_arm_defs.h
==============================================================================
--- trunk/priv/guest_arm_defs.h (original)
+++ trunk/priv/guest_arm_defs.h Wed Aug 3 12:55:33 2016
@@ -111,6 +111,28 @@
UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2,
UInt resR1, UInt resR2 );
+/* --- DIRTY HELPERS --- */
+
+extern
+void armg_dirtyhelper_AESE ( /*OUT*/V128* res,
+ UInt argW3, UInt argW2,
+ UInt argW1, UInt argW0 );
+
+extern
+void armg_dirtyhelper_AESD ( /*OUT*/V128* res,
+ UInt argW3, UInt argW2,
+ UInt argW1, UInt argW0 );
+
+extern
+void armg_dirtyhelper_AESMC ( /*OUT*/V128* res,
+ UInt argW3, UInt argW2,
+ UInt argW1, UInt argW0 );
+
+extern
+void armg_dirtyhelper_AESIMC ( /*OUT*/V128* res,
+ UInt argW3, UInt argW2,
+ UInt argW1, UInt argW0 );
+
/*---------------------------------------------------------*/
/*--- Condition code stuff ---*/
Modified: trunk/priv/guest_arm_helpers.c
==============================================================================
--- trunk/priv/guest_arm_helpers.c (original)
+++ trunk/priv/guest_arm_helpers.c Wed Aug 3 12:55:33 2016
@@ -38,6 +38,7 @@
#include "main_globals.h"
#include "guest_generic_bb_to_IR.h"
#include "guest_arm_defs.h"
+#include "guest_arm64_defs.h" /* for crypto helper functions */
/* This file contains helper functions for arm guest code. Calls to
@@ -535,6 +536,68 @@
/*---------------------------------------------------------------*/
+/*--- Crypto instruction helpers ---*/
+/*---------------------------------------------------------------*/
+
+/* DIRTY HELPERS for doing AES support:
+ * AESE (SubBytes, then ShiftRows)
+ * AESD (InvShiftRows, then InvSubBytes)
+ * AESMC (MixColumns)
+ * AESIMC (InvMixColumns)
+ These don't actually have to be dirty helpers -- they could be
+ clean, but for the fact that they return a V128 and a clean helper
+ can't do that.
+
+ These just call onwards to the implementations of the same in
+ guest_arm64_helpers.c. In all of these cases, we expect |res| to
+ be at least 8 aligned.
+*/
+/* CALLED FROM GENERATED CODE */
+void armg_dirtyhelper_AESE ( /*OUT*/V128* res,
+ UInt argW3, UInt argW2,
+ UInt argW1, UInt argW0 )
+{
+ vassert(0 == (((HWord)res) & (8-1)));
+ ULong argHi = (((ULong)argW3) << 32) | ((ULong)argW2);
+ ULong argLo = (((ULong)argW1) << 32) | ((ULong)argW0);
+ arm64g_dirtyhelper_AESE(res, argHi, argLo);
+}
+
+/* CALLED FROM GENERATED CODE */
+void armg_dirtyhelper_AESD ( /*OUT*/V128* res,
+ UInt argW3, UInt argW2,
+ UInt argW1, UInt argW0 )
+{
+ vassert(0 == (((HWord)res) & (8-1)));
+ ULong argHi = (((ULong)argW3) << 32) | ((ULong)argW2);
+ ULong argLo = (((ULong)argW1) << 32) | ((ULong)argW0);
+ arm64g_dirtyhelper_AESD(res, argHi, argLo);
+}
+
+/* CALLED FROM GENERATED CODE */
+void armg_dirtyhelper_AESMC ( /*OUT*/V128* res,
+ UInt argW3, UInt argW2,
+ UInt argW1, UInt argW0 )
+{
+ vassert(0 == (((HWord)res) & (8-1)));
+ ULong argHi = (((ULong)argW3) << 32) | ((ULong)argW2);
+ ULong argLo = (((ULong)argW1) << 32) | ((ULong)argW0);
+ arm64g_dirtyhelper_AESMC(res, argHi, argLo);
+}
+
+/* CALLED FROM GENERATED CODE */
+void armg_dirtyhelper_AESIMC ( /*OUT*/V128* res,
+ UInt argW3, UInt argW2,
+ UInt argW1, UInt argW0 )
+{
+ vassert(0 == (((HWord)res) & (8-1)));
+ ULong argHi = (((ULong)argW3) << 32) | ((ULong)argW2);
+ ULong argLo = (((ULong)argW1) << 32) | ((ULong)argW0);
+ arm64g_dirtyhelper_AESIMC(res, argHi, argLo);
+}
+
+
+/*---------------------------------------------------------------*/
/*--- Flag-helpers translation-time function specialisers. ---*/
/*--- These help iropt specialise calls the above run-time ---*/
/*--- flags functions. ---*/
Modified: trunk/priv/guest_arm_toIR.c
==============================================================================
--- trunk/priv/guest_arm_toIR.c (original)
+++ trunk/priv/guest_arm_toIR.c Wed Aug 3 12:55:33 2016
@@ -8837,8 +8837,11 @@
Finally, the caller must indicate whether this occurs in ARM or in
Thumb code.
+
+ This only handles NEON for ARMv7 and below. The NEON extensions
+ for v8 are handled by decode_V8_instruction.
*/
-static Bool decode_NEON_instruction (
+static Bool decode_NEON_instruction_ARMv7_and_below (
/*MOD*/DisResult* dres,
UInt insn32,
IRTemp condT,
@@ -8915,7 +8918,7 @@
Caller must supply an IRTemp 'condT' holding the gating condition,
or IRTemp_INVALID indicating the insn is always executed.
- Caller must also supply an ARMCondcode 'cond'. This is only used
+ Caller must also supply an ARMCondcode 'conq'. This is only used
for debug printing, no other purpose. For ARM, this is simply the
top 4 bits of the original instruction. For Thumb, the condition
is not (really) known until run time, and so ARMCondAL should be
@@ -12581,6 +12584,198 @@
/*------------------------------------------------------------*/
+/*--- V8 instructions ---*/
+/*------------------------------------------------------------*/
+
+/* Break a V128-bit value up into four 32-bit ints. */
+
+static void breakupV128to32s ( IRTemp t128,
+ /*OUTs*/
+ IRTemp* t3, IRTemp* t2,
+ IRTemp* t1, IRTemp* t0 )
+{
+ IRTemp hi64 = newTemp(Ity_I64);
+ IRTemp lo64 = newTemp(Ity_I64);
+ assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
+ assign( lo64, unop(Iop_V128to64, mkexpr(t128)) );
+
+ vassert(t0 && *t0 == IRTemp_INVALID);
+ vassert(t1 && *t1 == IRTemp_INVALID);
+ vassert(t2 && *t2 == IRTemp_INVALID);
+ vassert(t3 && *t3 == IRTemp_INVALID);
+
+ *t0 = newTemp(Ity_I32);
+ *t1 = newTemp(Ity_I32);
+ *t2 = newTemp(Ity_I32);
+ *t3 = newTemp(Ity_I32);
+ assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
+ assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
+ assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
+ assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
+}
+
+
+/* Both ARM and Thumb */
+
+/* Translate a V8 instruction. If successful, returns True and *dres
+ may or may not be updated. If unsuccessful, returns False and
+ doesn't change *dres nor create any IR.
+
+ The Thumb and ARM encodings are potentially different. In both
+ ARM and Thumb mode, the caller must pass the entire 32 bits of
+ the instruction. Callers may pass any instruction; this function
+ ignores anything it doesn't recognise.
+
+ Caller must supply an IRTemp 'condT' holding the gating condition,
+ or IRTemp_INVALID indicating the insn is always executed.
+
+ If we are decoding an ARM instruction which is in the NV space
+ then it is expected that condT will be IRTemp_INVALID, and that is
+ asserted for. That condition is ensured by the logic near the top
+ of disInstr_ARM_WRK, that sets up condT.
+
+ When decoding for Thumb, the caller must pass the ITState pre/post
+ this instruction, so that we can generate a SIGILL in the cases where
+ the instruction may not be in an IT block. When decoding for ARM,
+ both of these must be IRTemp_INVALID.
+
+ Finally, the caller must indicate whether this occurs in ARM or in
+ Thumb code.
+*/
+static Bool decode_V8_instruction (
+ /*MOD*/DisResult* dres,
+ UInt insnv8,
+ IRTemp condT,
+ Bool isT,
+ IRTemp old_itstate,
+ IRTemp new_itstate
+ )
+{
+# define INSNA(_bMax,_bMin) SLICE_UInt(insnv8, (_bMax), (_bMin))
+# define INSNT0(_bMax,_bMin) SLICE_UInt( ((insnv8 >> 16) & 0xFFFF), \
+ (_bMax), (_bMin) )
+# define INSNT1(_bMax,_bMin) SLICE_UInt( ((insnv8 >> 0) & 0xFFFF), \
+ (_bMax), (_bMin) )
+ //HChar dis_buf[128];
+ //dis_buf[0] = 0;
+
+ if (isT) {
+ vassert(old_itstate != IRTemp_INVALID);
+ vassert(new_itstate != IRTemp_INVALID);
+ } else {
+ vassert(old_itstate == IRTemp_INVALID);
+ vassert(new_itstate == IRTemp_INVALID);
+ }
+
+ /* ARMCondcode 'conq' is only used for debug printing and for no other
+ purpose. For ARM, this is simply the top 4 bits of the instruction.
+ For Thumb, the condition is not (really) known until run time, and so
+ we set it to ARMCondAL in order that printing of these instructions
+ does not show any condition. */
+ ARMCondcode conq;
+ if (isT) {
+ conq = ARMCondAL;
+ } else {
+ conq = (ARMCondcode)INSNA(31,28);
+ if (conq == ARMCondNV || conq == ARMCondAL) {
+ vassert(condT == IRTemp_INVALID);
+ } else {
+ vassert(condT != IRTemp_INVALID);
+ }
+ vassert(conq >= ARMCondEQ && conq <= ARMCondNV);
+ }
+
+ /* ----------- AESD.8 q_q ----------- */
+ /* 31 27 23 21 19 17 15 11 7 3
+ T1: 1111 1111 1 D 11 sz 00 d 0011 00 M 0 m AESE Qd, Qm
+ A1: 1111 0011 1 D 11 sz 00 d 0011 00 M 0 m AESE Qd, Qm
+
+ T1: 1111 1111 1 D 11 sz 00 d 0011 01 M 0 m AESD Qd, Qm
+ A1: 1111 0011 1 D 11 sz 00 d 0011 01 M 0 m AESD Qd, Qm
+
+ T1: 1111 1111 1 D 11 sz 00 d 0011 10 M 0 m AESMC Qd, Qm
+ A1: 1111 0011 1 D 11 sz 00 d 0011 10 M 0 m AESMC Qd, Qm
+
+ T1: 1111 1111 1 D 11 sz 00 d 0011 11 M 0 m AESIMC Qd, Qm
+ A1: 1111 0011 1 D 11 sz 00 d 0011 11 M 0 m AESIMC Qd, Qm
+
+ sz must be 00
+ ARM encoding is in NV space
+ */
+ {
+ UInt regD = 99, regM = 99, opc = 4/*invalid*/;
+ Bool gate = True;
+
+ UInt high9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
+ if (INSNA(31,23) == high9 && INSNA(21,16) == BITS6(1,1,0,0,0,0)
+ && INSNA(11,8) == BITS4(0,0,1,1) && INSNA(4,4) == 0) {
+ UInt bitD = INSNA(22,22);
+ UInt fldD = INSNA(15,12);
+ UInt bitM = INSNA(5,5);
+ UInt fldM = INSNA(3,0);
+ opc = INSNA(7,6);
+ regD = (bitD << 4) | fldD;
+ regM = (bitM << 4) | fldM;
+ }
+ if ((regD & 1) == 1 || (regM & 1) == 1)
+ gate = False;
+
+ if (gate) {
+ if (isT) {
+ gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
+ }
+ IRTemp op1 = newTemp(Ity_V128);
+ IRTemp op2 = newTemp(Ity_V128);
+ IRTemp src = newTemp(Ity_V128);
+ IRTemp res = newTemp(Ity_V128);
+ assign(op1, getQReg(regD >> 1));
+ assign(op2, getQReg(regM >> 1));
+ assign(src, opc == BITS2(0,0) || opc == BITS2(0,1)
+ ? binop(Iop_XorV128, mkexpr(op1), mkexpr(op2))
+ : mkexpr(op2));
+
+ void* helpers[4]
+ = { &armg_dirtyhelper_AESE, &armg_dirtyhelper_AESD,
+ &armg_dirtyhelper_AESMC, &armg_dirtyhelper_AESIMC };
+ const HChar* hNames[4]
+ = { "armg_dirtyhelper_AESE", "armg_dirtyhelper_AESD",
+ "armg_dirtyhelper_AESMC", "armg_dirtyhelper_AESIMC" };
+ const HChar* iNames[4]
+ = { "aese", "aesd", "aesmc", "aesimc" };
+
+ vassert(opc >= 0 && opc <= 3);
+ void* helper = helpers[opc];
+ const HChar* hname = hNames[opc];
+
+ IRTemp w32_3, w32_2, w32_1, w32_0;
+ w32_3 = w32_2 = w32_1 = w32_0 = IRTemp_INVALID;
+ breakupV128to32s( src, &w32_3, &w32_2, &w32_1, &w32_0 );
+
+ IRDirty* di
+ = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
+ mkIRExprVec_5(
+ IRExpr_VECRET(),
+ mkexpr(w32_3), mkexpr(w32_2),
+ mkexpr(w32_1), mkexpr(w32_0)) );
+ stmt(IRStmt_Dirty(di));
+
+ putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
+ DIP("%s.8 q%d, q%d\n", iNames[opc], regD >> 1, regM >> 1);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ---------- Doesn't match anything. ---------- */
+ return False;
+
+# undef INSNA
+# undef INSNT0
+# undef INSNT1
+}
+
+
+/*------------------------------------------------------------*/
/*--- LDMxx/STMxx helper (both ARM and Thumb32) ---*/
/*------------------------------------------------------------*/
@@ -14456,10 +14651,12 @@
*dres may or may not be updated. If failure, returns False and
doesn't change *dres nor create any IR.
- Note that all NEON instructions (in ARM mode) are handled through
- here, since they are all in NV space.
+ Note that all NEON instructions (in ARM mode) up to and including
+ ARMv7, but not later, are handled through here, since they are all
+ in NV space.
*/
-static Bool decode_NV_instruction ( /*MOD*/DisResult* dres,
+static Bool decode_NV_instruction_ARMv7_and_below
+ ( /*MOD*/DisResult* dres,
const VexArchInfo* archinfo,
UInt insn )
{
@@ -14585,7 +14782,7 @@
/* ------------------- NEON ------------------- */
if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
- Bool ok_neon = decode_NEON_instruction(
+ Bool ok_neon = decode_NEON_instruction_ARMv7_and_below(
dres, insn, IRTemp_INVALID/*unconditional*/,
False/*!isT*/
);
@@ -14627,16 +14824,10 @@
DisResult dres;
UInt insn;
- //Bool allow_VFP = False;
- //UInt hwcaps = archinfo->hwcaps;
IRTemp condT; /* :: Ity_I32 */
UInt summary;
HChar dis_buf[128]; // big enough to hold LDMIA etc text
- /* What insn variants are we supporting today? */
- //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
- // etc etc
-
/* Set result defaults. */
dres.whatNext = Dis_Continue;
dres.len = 4;
@@ -14751,11 +14942,12 @@
case ARMCondNV: {
// Illegal instruction prior to v5 (see ARM ARM A3-5), but
// some cases are acceptable
- Bool ok = decode_NV_instruction(&dres, archinfo, insn);
+ Bool ok
+ = decode_NV_instruction_ARMv7_and_below(&dres, archinfo, insn);
if (ok)
goto decode_success;
else
- goto decode_failure;
+ goto after_v7_decoder;
}
case ARMCondAL: // Always executed
break;
@@ -15685,7 +15877,7 @@
}
/* --- NB: ARM interworking branches are in NV space, hence
- are handled elsewhere by decode_NV_instruction.
+ are handled elsewhere by decode_NV_instruction_ARMv7_and_below.
---
*/
@@ -17341,7 +17533,8 @@
/* ----------------------------------------------------------- */
/* These are all in NV space, and so are taken care of (far) above,
- by a call from this function to decode_NV_instruction(). */
+ by a call from this function to
+ decode_NV_instruction_ARMv7_and_below(). */
/* ----------------------------------------------------------- */
/* -- v6 media instructions (in ARM mode) -- */
@@ -17356,6 +17549,24 @@
}
/* ----------------------------------------------------------- */
+ /* -- v8 instructions (in ARM mode) -- */
+ /* ----------------------------------------------------------- */
+
+ after_v7_decoder:
+
+ /* If we get here, it means that all attempts to decode the
+ instruction as ARMv7 or earlier have failed. So, if we're doing
+ ARMv8 or later, here is the point to try for it. */
+
+ if (VEX_ARM_ARCHLEVEL(archinfo->hwcaps) >= 8) {
+ Bool ok_v8
+ = decode_V8_instruction( &dres, insn, condT, False/*!isT*/,
+ IRTemp_INVALID, IRTemp_INVALID );
+ if (ok_v8)
+ goto decode_success;
+ }
+
+ /* ----------------------------------------------------------- */
/* -- Undecodable -- */
/* ----------------------------------------------------------- */
@@ -17497,18 +17708,12 @@
DisResult dres;
UShort insn0; /* first 16 bits of the insn */
UShort insn1; /* second 16 bits of the insn */
- //Bool allow_VFP = False;
- //UInt hwcaps = archinfo->hwcaps;
HChar dis_buf[128]; // big enough to hold LDMIA etc text
/* Summary result of the ITxxx backwards analysis: False == safe
but suboptimal. */
Bool guaranteedUnconditional = False;
- /* What insn variants are we supporting today? */
- //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
- // etc etc
-
/* Set result defaults. */
dres.whatNext = Dis_Continue;
dres.len = 2;
@@ -21921,12 +22126,12 @@
}
/* ----------------------------------------------------------- */
- /* -- NEON instructions (in Thumb mode) -- */
+ /* -- NEON instructions (only v7 and below, in Thumb mode) -- */
/* ----------------------------------------------------------- */
if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
- Bool ok_neon = decode_NEON_instruction(
+ Bool ok_neon = decode_NEON_instruction_ARMv7_and_below(
&dres, insn32, condT, True/*isT*/
);
if (ok_neon)
@@ -21947,6 +22152,23 @@
}
/* ----------------------------------------------------------- */
+ /* -- v8 instructions (in Thumb mode) -- */
+ /* ----------------------------------------------------------- */
+
+ /* If we get here, it means that all attempts to decode the
+ instruction as ARMv7 or earlier have failed. So, if we're doing
+ ARMv8 or later, here is the point to try for it. */
+
+ if (VEX_ARM_ARCHLEVEL(archinfo->hwcaps) >= 8) {
+ UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
+ Bool ok_v8
+ = decode_V8_instruction( &dres, insn32, condT, True/*isT*/,
+ old_itstate, new_itstate );
+ if (ok_v8)
+ goto decode_success;
+ }
+
+ /* ----------------------------------------------------------- */
/* -- Undecodable -- */
/* ----------------------------------------------------------- */
|
|
From: <sv...@va...> - 2016-08-03 11:53:19
|
Author: sewardj
Date: Wed Aug 3 12:53:11 2016
New Revision: 3235
Log:
arm32 backend stuff needed to support IR artefacts resulting from
guest support of 32-bit V8 crypto instructions:
* add new pseudo-instruction ARMin_VXferQ, to move values between
two D regs and a Q reg, in either direction. Use this to implement
Iop_64HLtoV128 much more efficiently than before, and to implement
Iop_V128HIto64 and Iop_V128to64.
* Generate code for helper calls in which have four or more
(32-bit) word-sized arguments and a V128 return value.
These require passing arguments on the stack.
Modified:
trunk/priv/host_arm_defs.c
trunk/priv/host_arm_defs.h
trunk/priv/host_arm_isel.c
Modified: trunk/priv/host_arm_defs.c
==============================================================================
--- trunk/priv/host_arm_defs.c (original)
+++ trunk/priv/host_arm_defs.c Wed Aug 3 12:53:11 2016
@@ -1329,6 +1329,15 @@
i->ARMin.VCvtSD.src = src;
return i;
}
+ARMInstr* ARMInstr_VXferQ ( Bool toQ, HReg qD, HReg dHi, HReg dLo ) {
+ ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
+ i->tag = ARMin_VXferQ;
+ i->ARMin.VXferQ.toQ = toQ;
+ i->ARMin.VXferQ.qD = qD;
+ i->ARMin.VXferQ.dHi = dHi;
+ i->ARMin.VXferQ.dLo = dLo;
+ return i;
+}
ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
i->tag = ARMin_VXferD;
@@ -1800,6 +1809,29 @@
vex_printf(", ");
ppHRegARM(i->ARMin.VCvtSD.src);
return;
+ case ARMin_VXferQ:
+ if (i->ARMin.VXferQ.toQ) {
+ vex_printf("vmov ");
+ ppHRegARM(i->ARMin.VXferQ.qD);
+ vex_printf("-lo64, ");
+ ppHRegARM(i->ARMin.VXferQ.dLo);
+ vex_printf(" ; vmov ");
+ ppHRegARM(i->ARMin.VXferQ.qD);
+ vex_printf("-hi64, ");
+ ppHRegARM(i->ARMin.VXferQ.dHi);
+ } else {
+ vex_printf("vmov ");
+ ppHRegARM(i->ARMin.VXferQ.dLo);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VXferQ.qD);
+ vex_printf("-lo64");
+ vex_printf(" ; vmov ");
+ ppHRegARM(i->ARMin.VXferQ.dHi);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VXferQ.qD);
+ vex_printf("-hi64");
+ }
+ return;
case ARMin_VXferD:
vex_printf("vmov ");
if (i->ARMin.VXferD.toD) {
@@ -2201,6 +2233,17 @@
addHRegUse(u, HRmWrite, i->ARMin.VCvtSD.dst);
addHRegUse(u, HRmRead, i->ARMin.VCvtSD.src);
return;
+ case ARMin_VXferQ:
+ if (i->ARMin.VXferQ.toQ) {
+ addHRegUse(u, HRmWrite, i->ARMin.VXferQ.qD);
+ addHRegUse(u, HRmRead, i->ARMin.VXferQ.dHi);
+ addHRegUse(u, HRmRead, i->ARMin.VXferQ.dLo);
+ } else {
+ addHRegUse(u, HRmRead, i->ARMin.VXferQ.qD);
+ addHRegUse(u, HRmWrite, i->ARMin.VXferQ.dHi);
+ addHRegUse(u, HRmWrite, i->ARMin.VXferQ.dLo);
+ }
+ return;
case ARMin_VXferD:
if (i->ARMin.VXferD.toD) {
addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
@@ -2422,6 +2465,11 @@
i->ARMin.VCvtSD.dst = lookupHRegRemap(m, i->ARMin.VCvtSD.dst);
i->ARMin.VCvtSD.src = lookupHRegRemap(m, i->ARMin.VCvtSD.src);
return;
+ case ARMin_VXferQ:
+ i->ARMin.VXferQ.qD = lookupHRegRemap(m, i->ARMin.VXferQ.qD);
+ i->ARMin.VXferQ.dHi = lookupHRegRemap(m, i->ARMin.VXferQ.dHi);
+ i->ARMin.VXferQ.dLo = lookupHRegRemap(m, i->ARMin.VXferQ.dLo);
+ return;
case ARMin_VXferD:
i->ARMin.VXferD.dD = lookupHRegRemap(m, i->ARMin.VXferD.dD);
i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
@@ -3682,6 +3730,46 @@
goto done;
}
}
+ case ARMin_VXferQ: {
+ UInt insn;
+ UInt qD = qregEnc(i->ARMin.VXferQ.qD);
+ UInt dHi = dregEnc(i->ARMin.VXferQ.dHi);
+ UInt dLo = dregEnc(i->ARMin.VXferQ.dLo);
+ /* This is a bit tricky. We need to make 2 D-D moves and we rely
+ on the fact that the Q register can be treated as two D registers.
+ We also rely on the fact that the register allocator will allocate
+ the two D's and the Q to disjoint parts of the register file,
+ and so we don't have to worry about the first move's destination
+ being the same as the second move's source, etc. We do have
+ assertions though. */
+ /* The ARM ARM specifies that
+ D<2n> maps to the least significant half of Q<n>
+ D<2n+1> maps to the most significant half of Q<n>
+ So there are no issues with endianness here.
+ */
+ UInt qDlo = 2 * qD + 0;
+ UInt qDhi = 2 * qD + 1;
+ /* Stay sane .. */
+ vassert(qDhi != dHi && qDhi != dLo);
+ vassert(qDlo != dHi && qDlo != dLo);
+ /* vmov dX, dY is
+ F 2 (0,dX[4],1,0) dY[3:0] dX[3:0] 1 (dY[4],0,dY[4],1) dY[3:0]
+ */
+# define VMOV_D_D(_xx,_yy) \
+ XXXXXXXX( 0xF, 0x2, BITS4(0, (((_xx) >> 4) & 1), 1, 0), \
+ ((_yy) & 0xF), ((_xx) & 0xF), 0x1, \
+ BITS4( (((_yy) >> 4) & 1), 0, (((_yy) >> 4) & 1), 1), \
+ ((_yy) & 0xF) )
+ if (i->ARMin.VXferQ.toQ) {
+ insn = VMOV_D_D(qDlo, dLo); *p++ = insn;
+ insn = VMOV_D_D(qDhi, dHi); *p++ = insn;
+ } else {
+ insn = VMOV_D_D(dLo, qDlo); *p++ = insn;
+ insn = VMOV_D_D(dHi, qDhi); *p++ = insn;
+ }
+# undef VMOV_D_D
+ goto done;
+ }
case ARMin_VXferD: {
UInt dD = dregEnc(i->ARMin.VXferD.dD);
UInt rHi = iregEnc(i->ARMin.VXferD.rHi);
Modified: trunk/priv/host_arm_defs.h
==============================================================================
--- trunk/priv/host_arm_defs.h (original)
+++ trunk/priv/host_arm_defs.h Wed Aug 3 12:53:11 2016
@@ -591,6 +591,7 @@
ARMin_VCMovD,
ARMin_VCMovS,
ARMin_VCvtSD,
+ ARMin_VXferQ,
ARMin_VXferD,
ARMin_VXferS,
ARMin_VCvtID,
@@ -824,6 +825,13 @@
HReg dst;
HReg src;
} VCvtSD;
+ /* Transfer a NEON Q reg to/from two D registers (VMOV x 2) */
+ struct {
+ Bool toQ;
+ HReg qD;
+ HReg dHi;
+ HReg dLo;
+ } VXferQ;
/* Transfer a VFP D reg to/from two integer registers (VMOV) */
struct {
Bool toD;
@@ -994,6 +1002,7 @@
extern ARMInstr* ARMInstr_VCMovD ( ARMCondCode, HReg dst, HReg src );
extern ARMInstr* ARMInstr_VCMovS ( ARMCondCode, HReg dst, HReg src );
extern ARMInstr* ARMInstr_VCvtSD ( Bool sToD, HReg dst, HReg src );
+extern ARMInstr* ARMInstr_VXferQ ( Bool toQ, HReg qD, HReg dHi, HReg dLo );
extern ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo );
extern ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo );
extern ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
Modified: trunk/priv/host_arm_isel.c
==============================================================================
--- trunk/priv/host_arm_isel.c (original)
+++ trunk/priv/host_arm_isel.c Wed Aug 3 12:53:11 2016
@@ -368,6 +368,134 @@
}
+static
+Bool doHelperCallWithArgsOnStack ( /*OUT*/UInt* stackAdjustAfterCall,
+ /*OUT*/RetLoc* retloc,
+ ISelEnv* env,
+ IRExpr* guard,
+ IRCallee* cee, IRType retTy, IRExpr** args )
+{
+ /* This function deals just with the case where the arg sequence is:
+ VECRET followed by between 4 and 12 Ity_I32 values. So far no other
+ cases are necessary or supported. */
+
+ /* Check this matches the required format. */
+ if (args[0] == NULL || args[0]->tag != Iex_VECRET)
+ goto no_match;
+
+ UInt i;
+ UInt n_real_args = 0;
+ for (i = 1; args[i]; i++) {
+ IRExpr* arg = args[i];
+ if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(arg)))
+ goto no_match;
+ IRType argTy = typeOfIRExpr(env->type_env, arg);
+ if (UNLIKELY(argTy != Ity_I32))
+ goto no_match;
+ n_real_args++;
+ }
+
+ /* We expect to pass at least some args on the stack. */
+ if (n_real_args <= 3)
+ goto no_match;
+
+ /* But not too many. */
+ if (n_real_args > 12)
+ goto no_match;
+
+ /* General rules for a call:
+
+ Args 1 .. 4 go in R0 .. R3. The rest are pushed R to L on the
+ stack; that is, arg 5 is at the lowest address, arg 6 at the
+ next lowest, etc.
+
+ The stack is to be kept 8 aligned.
+
+ It appears (for unclear reasons) that the highest 3 words made
+ available when moving SP downwards are not to be used. For
+ example, if 5 args are to go on the stack, then SP must be moved
+ down 32 bytes, and the area at SP+20 .. SP+31 is not to be used
+ by the caller.
+ */
+
+ /* For this particular case, we use the following layout:
+
+ ------ original SP
+ 112 bytes
+ ------
+ return value
+ ------ original SP - 128
+ space
+ args words, between 1 and 11
+ ------ new SP = original_SP - 256
+
+ Using 256 bytes is overkill, but it is simple and good enough.
+ */
+
+ /* This should really be
+ HReg argVRegs[n_real_args];
+ but that makes it impossible to do 'goto's forward past.
+ Hence the following kludge. */
+ vassert(n_real_args <= 11);
+ HReg argVRegs[11];
+ for (i = 0; i < 11; i++)
+ argVRegs[i] = INVALID_HREG;
+
+ /* Compute args into vregs. */
+ for (i = 0; i < n_real_args; i++) {
+ argVRegs[i] = iselIntExpr_R(env, args[i+1]);
+ }
+
+ /* Now we can compute the condition. We can't do it earlier
+ because the argument computations could trash the condition
+ codes. Be a bit clever to handle the common case where the
+ guard is 1:Bit. */
+ ARMCondCode cc = ARMcc_AL;
+ if (guard) {
+ if (guard->tag == Iex_Const
+ && guard->Iex.Const.con->tag == Ico_U1
+ && guard->Iex.Const.con->Ico.U1 == True) {
+ /* unconditional -- do nothing */
+ } else {
+ goto no_match; //ATC
+ cc = iselCondCode( env, guard );
+ }
+ }
+
+ HReg r0 = hregARM_R0();
+ HReg sp = hregARM_R13();
+
+ ARMRI84* c256 = ARMRI84_I84(64, 15); // 64 `ror` (15 * 2)
+
+ addInstr(env, ARMInstr_Alu(ARMalu_SUB, r0, sp, ARMRI84_I84(128, 0)));
+
+ addInstr(env, mk_iMOVds_RR(hregARM_R1(), argVRegs[0]));
+ addInstr(env, mk_iMOVds_RR(hregARM_R2(), argVRegs[1]));
+ addInstr(env, mk_iMOVds_RR(hregARM_R3(), argVRegs[2]));
+
+ addInstr(env, ARMInstr_Alu(ARMalu_SUB, sp, sp, c256));
+
+ for (i = 3; i < n_real_args; i++) {
+ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/, argVRegs[i],
+ ARMAMode1_RI(sp, (i-3) * 4)));
+ }
+
+ vassert(*stackAdjustAfterCall == 0);
+ vassert(is_RetLoc_INVALID(*retloc));
+
+ *stackAdjustAfterCall = 256;
+ *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 128);
+
+ Addr32 target = (Addr)cee->addr;
+ addInstr(env, ARMInstr_Call( cc, target, 4, *retloc ));
+
+ return True; /* success */
+
+ no_match:
+ return False;
+}
+
+
/* Do a complete function call. |guard| is a Ity_Bit expression
indicating whether or not the call happens. If guard==NULL, the
call is unconditional. |retloc| is set to indicate where the
@@ -470,6 +598,21 @@
n_args++;
}
+ /* If there are more than 4 args, we are going to have to pass
+ some via memory. Use a different function to (possibly) deal with
+ that; dealing with it here is too complex. */
+ if (n_args > ARM_N_ARGREGS) {
+ return doHelperCallWithArgsOnStack(stackAdjustAfterCall, retloc,
+ env, guard, cee, retTy, args );
+
+ }
+
+ /* After this point we make no attempt to pass args on the stack,
+ and just give up if that case (which is OK because it never
+ happens). Even if there are for example only 3 args, it might
+ still be necessary to pass some of them on the stack if for example
+ two or more of them are 64-bit integers. */
+
argregs[0] = hregARM_R0();
argregs[1] = hregARM_R1();
argregs[2] = hregARM_R2();
@@ -653,30 +796,30 @@
vassert(*stackAdjustAfterCall == 0);
vassert(is_RetLoc_INVALID(*retloc));
switch (retTy) {
- case Ity_INVALID:
- /* Function doesn't return a value. */
- *retloc = mk_RetLoc_simple(RLPri_None);
- break;
- case Ity_I64:
- *retloc = mk_RetLoc_simple(RLPri_2Int);
- break;
- case Ity_I32: case Ity_I16: case Ity_I8:
- *retloc = mk_RetLoc_simple(RLPri_Int);
- break;
- case Ity_V128:
- vassert(0); // ATC
- *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
- *stackAdjustAfterCall = 16;
- break;
- case Ity_V256:
- vassert(0); // ATC
- *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
- *stackAdjustAfterCall = 32;
- break;
- default:
- /* IR can denote other possible return types, but we don't
- handle those here. */
- vassert(0);
+ case Ity_INVALID:
+ /* Function doesn't return a value. */
+ *retloc = mk_RetLoc_simple(RLPri_None);
+ break;
+ case Ity_I64:
+ *retloc = mk_RetLoc_simple(RLPri_2Int);
+ break;
+ case Ity_I32: case Ity_I16: case Ity_I8:
+ *retloc = mk_RetLoc_simple(RLPri_Int);
+ break;
+ case Ity_V128:
+ vassert(0); // ATC
+ *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
+ *stackAdjustAfterCall = 16;
+ break;
+ case Ity_V256:
+ vassert(0); // ATC
+ *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
+ *stackAdjustAfterCall = 32;
+ break;
+ default:
+ /* IR can denote other possible return types, but we don't
+ handle those here. */
+ vassert(0);
}
/* Finally, generate the call itself. This needs the *retloc value
@@ -3714,6 +3857,14 @@
res, arg, 0, False));
return res;
}
+ case Iop_V128to64:
+ case Iop_V128HIto64: {
+ HReg src = iselNeonExpr(env, e->Iex.Unop.arg);
+ HReg resLo = newVRegD(env);
+ HReg resHi = newVRegD(env);
+ addInstr(env, ARMInstr_VXferQ(False/*!toQ*/, src, resHi, resLo));
+ return e->Iex.Unop.op == Iop_V128HIto64 ? resHi : resLo;
+ }
default:
break;
}
@@ -4305,7 +4456,7 @@
if (e->tag == Iex_Binop) {
switch (e->Iex.Binop.op) {
- case Iop_64HLtoV128:
+ case Iop_64HLtoV128: {
/* Try to match into single "VMOV reg, imm" instruction */
if (e->Iex.Binop.arg1->tag == Iex_Const &&
e->Iex.Binop.arg2->tag == Iex_Const &&
@@ -4349,45 +4500,12 @@
}
/* Does not match "VMOV Reg, Imm" form. We'll have to do
it the slow way. */
- {
- /* local scope */
- /* Done via the stack for ease of use. */
- /* FIXME: assumes little endian host */
- HReg w3, w2, w1, w0;
- HReg res = newVRegV(env);
- ARMAMode1* sp_0 = ARMAMode1_RI(hregARM_R13(), 0);
- ARMAMode1* sp_4 = ARMAMode1_RI(hregARM_R13(), 4);
- ARMAMode1* sp_8 = ARMAMode1_RI(hregARM_R13(), 8);
- ARMAMode1* sp_12 = ARMAMode1_RI(hregARM_R13(), 12);
- ARMRI84* c_16 = ARMRI84_I84(16,0);
- /* Make space for SP */
- addInstr(env, ARMInstr_Alu(ARMalu_SUB, hregARM_R13(),
- hregARM_R13(), c_16));
-
- /* Store the less significant 64 bits */
- iselInt64Expr(&w1, &w0, env, e->Iex.Binop.arg2);
- addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
- w0, sp_0));
- addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
- w1, sp_4));
-
- /* Store the more significant 64 bits */
- iselInt64Expr(&w3, &w2, env, e->Iex.Binop.arg1);
- addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
- w2, sp_8));
- addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
- w3, sp_12));
-
- /* Load result back from stack. */
- addInstr(env, ARMInstr_NLdStQ(True/*load*/, res,
- mkARMAModeN_R(hregARM_R13())));
-
- /* Restore SP */
- addInstr(env, ARMInstr_Alu(ARMalu_ADD, hregARM_R13(),
- hregARM_R13(), c_16));
- return res;
- } /* local scope */
- goto neon_expr_bad;
+ HReg dHi = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg dLo = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ HReg res = newVRegV(env);
+ addInstr(env, ARMInstr_VXferQ(True/*toQ*/, res, dHi, dLo));
+ return res;
+ }
case Iop_AndV128: {
HReg res = newVRegV(env);
HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
@@ -5359,7 +5477,7 @@
return dst;
}
- neon_expr_bad:
+ /* neon_expr_bad: */
ppIRExpr(e);
vpanic("iselNeonExpr_wrk");
}
@@ -5974,7 +6092,7 @@
switch (retty) {
case Ity_INVALID: /* function doesn't return anything */
case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
- //case Ity_V128: //ATC
+ case Ity_V128:
retty_ok = True; break;
default:
break;
@@ -5987,7 +6105,9 @@
call is skipped. */
UInt addToSp = 0;
RetLoc rloc = mk_RetLoc_INVALID();
- doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
+ Bool ok = doHelperCall( &addToSp, &rloc, env,
+ d->guard, d->cee, retty, d->args );
+ if (!ok) goto stmt_fail;
vassert(is_sane_RetLoc(rloc));
/* Now figure out what to do with the returned value, if any. */
@@ -6026,11 +6146,6 @@
return;
}
case Ity_V128: {
- vassert(0); // ATC. The code that this produces really
- // needs to be looked at, to verify correctness.
- // I don't think this can ever happen though, since the
- // ARM front end never produces 128-bit loads/stores.
- // Hence the following is mostly theoretical.
/* The returned value is on the stack, and *retloc tells
us where. Fish it off the stack and then move the
stack pointer upwards to clear it, as directed by
@@ -6038,16 +6153,26 @@
vassert(rloc.pri == RLPri_V128SpRel);
vassert(rloc.spOff < 256); // else ARMRI84_I84(_,0) can't encode it
vassert(addToSp >= 16);
- vassert(addToSp < 256); // ditto reason as for rloc.spOff
+ vassert(addToSp <= 256);
+ /* Both the stack delta and the offset must be at least 8-aligned.
+ If that isn't so, doHelperCall() has generated bad code. */
+ vassert(0 == (rloc.spOff % 8));
+ vassert(0 == (addToSp % 8));
HReg dst = lookupIRTemp(env, d->tmp);
HReg tmp = newVRegI(env);
- HReg r13 = hregARM_R13(); // sp
+ HReg sp = hregARM_R13();
addInstr(env, ARMInstr_Alu(ARMalu_ADD,
- tmp, r13, ARMRI84_I84(rloc.spOff,0)));
+ tmp, sp, ARMRI84_I84(rloc.spOff,0)));
ARMAModeN* am = mkARMAModeN_R(tmp);
+ /* This load could be done with its effective address 0 % 8,
+ because that's the best stack alignment that we can be
+ assured of. */
addInstr(env, ARMInstr_NLdStQ(True/*load*/, dst, am));
- addInstr(env, ARMInstr_Alu(ARMalu_ADD,
- r13, r13, ARMRI84_I84(addToSp,0)));
+
+ ARMRI84* spAdj
+ = addToSp == 256 ? ARMRI84_I84(64, 15) // 64 `ror` (15 * 2)
+ : ARMRI84_I84(addToSp, 0);
+ addInstr(env, ARMInstr_Alu(ARMalu_ADD, sp, sp, spAdj));
return;
}
default:
|
|
From: <sv...@va...> - 2016-08-03 11:44:09
|
Author: sewardj
Date: Wed Aug 3 12:44:02 2016
New Revision: 15922
Log:
Add test cases for v8 crypto instructions in 32-bit mode. Is not yet connected
to the build/test system.
Added:
trunk/none/tests/arm/v8crypto.c
Added: trunk/none/tests/arm/v8crypto.c
==============================================================================
--- trunk/none/tests/arm/v8crypto.c (added)
+++ trunk/none/tests/arm/v8crypto.c Wed Aug 3 12:44:02 2016
@@ -0,0 +1,250 @@
+
+/*
+gcc -o v8crypto v8crypto.c -march=armv8-a -mfpu=crypto-neon-fp-armv8
+gcc -o v8crypto v8crypto.c -mfpu=crypto-neon-fp-armv8
+*/
+
+#include <stdio.h>
+#include <assert.h>
+#include <malloc.h> // memalign
+#include <string.h> // memset
+#include "tests/malloc.h"
+#include <math.h> // isnormal
+
+typedef unsigned char UChar;
+typedef unsigned short int UShort;
+typedef unsigned int UInt;
+typedef signed int Int;
+typedef unsigned char UChar;
+typedef unsigned long long int ULong;
+typedef signed long long int Long;
+typedef double Double;
+typedef float Float;
+
+typedef unsigned char Bool;
+#define False ((Bool)0)
+#define True ((Bool)1)
+
+
+#define ITERS 1
+
+typedef
+ enum { TyHF=1234, TySF, TyDF, TyB, TyH, TyS, TyD, TyNONE }
+ LaneTy;
+
+union _V128 {
+ UChar u8[16];
+ UShort u16[8];
+ UInt u32[4];
+ ULong u64[2];
+ Float f32[4];
+ Double f64[2];
+};
+typedef union _V128 V128;
+
+static inline UChar randUChar ( void )
+{
+ static UInt seed = 80021;
+ seed = 1103515245 * seed + 12345;
+ return (seed >> 17) & 0xFF;
+}
+
+static ULong randULong ( LaneTy ty )
+{
+ Int i;
+ ULong r = 0;
+ for (i = 0; i < 8; i++) {
+ r = (r << 8) | (ULong)(0xFF & randUChar());
+ }
+ return r;
+}
+
+/* Generates a random V128. Ensures that that it contains normalised
+ FP numbers when viewed as either F32x4 or F64x2, so that it is
+ reasonable to use in FP test cases. */
+static void randV128 ( /*OUT*/V128* v, LaneTy ty )
+{
+ static UInt nCalls = 0, nIters = 0;
+ Int i;
+ nCalls++;
+ while (1) {
+ nIters++;
+ for (i = 0; i < 16; i++) {
+ v->u8[i] = randUChar();
+ }
+ if (isnormal(v->f32[0]) && isnormal(v->f32[1]) && isnormal(v->f32[2])
+ && isnormal(v->f32[3]) && isnormal(v->f64[0]) && isnormal(v->f64[1]))
+ break;
+ }
+ if (0 == (nCalls & 0xFF))
+ printf("randV128: %u calls, %u iters\n", nCalls, nIters);
+}
+
+static void showV128 ( V128* v )
+{
+ Int i;
+ for (i = 15; i >= 0; i--)
+ printf("%02x", (Int)v->u8[i]);
+}
+
+static void showBlock ( const char* msg, V128* block, Int nBlock )
+{
+ Int i;
+ printf("%s\n", msg);
+ for (i = 0; i < nBlock; i++) {
+ printf(" ");
+ showV128(&block[i]);
+ printf("\n");
+ }
+}
+
+
+/* ---------------------------------------------------------------- */
+/* -- Parameterisable test macros -- */
+/* ---------------------------------------------------------------- */
+
+#define DO50(_action) \
+ do { \
+ Int _qq; for (_qq = 0; _qq < 50; _qq++) { _action ; } \
+ } while (0)
+
+
+/* Generate a test that involves two vector regs,
+ with no bias as towards which is input or output.
+ It's OK to use r8 as scratch.*/
+#define GEN_TWOVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO) \
+ __attribute__((noinline)) \
+ static void test_##TESTNAME ( LaneTy ty ) { \
+ Int i; \
+ for (i = 0; i < ITERS; i++) { \
+ V128 block[4+1]; \
+ memset(block, 0x55, sizeof(block)); \
+ randV128(&block[0], ty); \
+ randV128(&block[1], ty); \
+ randV128(&block[2], ty); \
+ randV128(&block[3], ty); \
+ __asm__ __volatile__( \
+ "mov r9, #0 ; vmsr fpscr, r9 ; " \
+ "add r9, %0, #0 ; vld1.8 { q"#VECREG1NO" }, [r9] ; " \
+ "add r9, %0, #16 ; vld1.8 { q"#VECREG2NO" }, [r9] ; " \
+ INSN " ; " \
+ "add r9, %0, #32 ; vst1.8 { q"#VECREG1NO" }, [r9] ; " \
+ "add r9, %0, #48 ; vst1.8 { q"#VECREG2NO" }, [r9] ; " \
+ "vmrs r9, fpscr ; str r9, [%0, #64] " \
+ : : "r"(&block[0]) \
+ : "cc", "memory", "q"#VECREG1NO, "q"#VECREG2NO, "r8", "r9" \
+ ); \
+ printf(INSN " "); \
+ UInt fpscr = 0xFFFFFFFF & block[4].u32[0]; \
+ showV128(&block[0]); printf(" "); \
+ showV128(&block[1]); printf(" "); \
+ showV128(&block[2]); printf(" "); \
+ showV128(&block[3]); printf(" fpscr=%08x\n", fpscr); \
+ } \
+ }
+
+
+/* Generate a test that involves three vector regs,
+ with no bias as towards which is input or output. It's also OK
+ to use r8 scratch. */
+#define GEN_THREEVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO,VECREG3NO) \
+ __attribute__((noinline)) \
+ static void test_##TESTNAME ( LaneTy ty ) { \
+ Int i; \
+ for (i = 0; i < ITERS; i++) { \
+ V128 block[6+1]; \
+ memset(block, 0x55, sizeof(block)); \
+ randV128(&block[0], ty); \
+ randV128(&block[1], ty); \
+ randV128(&block[2], ty); \
+ randV128(&block[3], ty); \
+ randV128(&block[4], ty); \
+ randV128(&block[5], ty); \
+ __asm__ __volatile__( \
+ "mov r9, #0 ; vmsr fpscr, r9 ; " \
+ "add r9, %0, #0 ; vld1.8 { q"#VECREG1NO" }, [r9] ; " \
+ "add r9, %0, #16 ; vld1.8 { q"#VECREG2NO" }, [r9] ; " \
+ "add r9, %0, #32 ; vld1.8 { q"#VECREG3NO" }, [r9] ; " \
+ INSN " ; " \
+ "add r9, %0, #48 ; vst1.8 { q"#VECREG1NO" }, [r9] ; " \
+ "add r9, %0, #64 ; vst1.8 { q"#VECREG2NO" }, [r9] ; " \
+ "add r9, %0, #80 ; vst1.8 { q"#VECREG3NO" }, [r9] ; " \
+ "vmrs r9, fpscr ; str r9, [%0, #96] " \
+ : : "r"(&block[0]) \
+ : "cc", "memory", "q"#VECREG1NO, "q"#VECREG2NO, "q"#VECREG3NO, \
+ "r8", "r9" \
+ ); \
+ printf(INSN " "); \
+ UInt fpscr = 0xFFFFFFFF & block[6].u32[0]; \
+ showV128(&block[0]); printf(" "); \
+ showV128(&block[1]); printf(" "); \
+ showV128(&block[2]); printf(" "); \
+ showV128(&block[3]); printf(" "); \
+ showV128(&block[4]); printf(" "); \
+ showV128(&block[5]); printf(" fpscr=%08x\n", fpscr); \
+ } \
+ }
+
+// ======================== CRYPTO ========================
+
+GEN_TWOVEC_TEST(aesd_q_q, "aesd.8 q3, q4", 3, 4)
+GEN_TWOVEC_TEST(aese_q_q, "aese.8 q12, q13", 12, 13)
+GEN_TWOVEC_TEST(aesimc_q_q, "aesimc.8 q15, q0", 15, 0)
+GEN_TWOVEC_TEST(aesmc_q_q, "aesmc.8 q1, q9", 1, 9)
+
+GEN_THREEVEC_TEST(sha1c_q_q_q, "sha1c.32 q11, q10, q2", 11, 10, 2)
+GEN_TWOVEC_TEST(sha1h_q_q, "sha1h.32 q6, q7", 6, 7)
+GEN_THREEVEC_TEST(sha1m_q_q_q, "sha1m.32 q2, q8, q13", 2, 8, 13)
+GEN_THREEVEC_TEST(sha1p_q_q_q, "sha1p.32 q3, q9, q14", 3, 9, 14)
+GEN_THREEVEC_TEST(sha1su0_q_q_q, "sha1su0.32 q4, q10, q15", 4, 10, 15)
+GEN_TWOVEC_TEST(sha1su1_q_q, "sha1su1.32 q11, q2", 11, 2)
+
+GEN_THREEVEC_TEST(sha256h2_q_q_q, "sha256h2.32 q9, q8, q7", 9, 8, 7)
+GEN_THREEVEC_TEST(sha256h_q_q_q, "sha256h.32 q10, q9, q8", 10, 9, 8)
+GEN_TWOVEC_TEST(sha256su0_q_q, "sha256su0.32 q11, q10", 11, 10)
+GEN_THREEVEC_TEST(sha256su1_q_q_q, "sha256su1.32 q12, q11, q10", 12, 11, 10)
+
+// This is a bit complex.
+//GEN_THREEVEC_TEST(pmull_q_d_d, 1q, 1d, 1d)
+
+int main ( void )
+{
+ // ======================== CRYPTO ========================
+
+ // aesd.8 q_q (aes single round decryption)
+ // aese.8 q_q (aes single round encryption)
+ // aesimc.8 q_q (aes inverse mix columns)
+ // aesmc.8 q_q (aes mix columns)
+ if (1) DO50( test_aesd_q_q(TyNONE) );
+ if (1) DO50( test_aese_q_q(TyNONE) );
+ if (1) DO50( test_aesimc_q_q(TyNONE) );
+ if (1) DO50( test_aesmc_q_q(TyNONE) );
+
+#if 0
+ // sha1c.32 q_q_q
+ // sha1h.32 q_q
+ // sha1m.32 q_q_q
+ // sha1p.32 q_q_q
+ // sha1su0.32 q_q_q
+ // sha1su1.32 q_q
+ if (1) DO50( test_sha1c_q_q_q(TyNONE) );
+ if (1) DO50( test_sha1h_q_q(TyNONE) );
+ if (1) DO50( test_sha1m_q_q_q(TyNONE) );
+ if (1) DO50( test_sha1p_q_q_q(TyNONE) );
+ if (1) DO50( test_sha1su0_q_q_q(TyNONE) );
+ if (1) DO50( test_sha1su1_q_q(TyNONE) );
+
+ // sha256h2.32 q_q_q
+ // sha256h.32 q_q_q
+ // sha256su0.32 q_q
+ // sha256su1.32 q_q_q
+ if (1) DO50( test_sha256h2_q_q_q(TyNONE) );
+ if (1) DO50( test_sha256h_q_q_q(TyNONE) );
+ if (1) DO50( test_sha256su0_q_q(TyNONE) );
+ if (1) DO50( test_sha256su1_q_q_q(TyNONE) );
+
+ // vmull.64 q_d_d
+ if (1) test_pmull_q_d_d(TyD);
+#endif
+ return 0;
+}
|
|
From: <sv...@va...> - 2016-08-03 11:41:30
|
Author: sewardj
Date: Wed Aug 3 12:41:24 2016
New Revision: 3234
Log:
Add infrastructure for detection of 32-bit ARMv8 capable CPUs (VEX side).
Modified:
trunk/priv/main_main.c
Modified: trunk/priv/main_main.c
==============================================================================
--- trunk/priv/main_main.c (original)
+++ trunk/priv/main_main.c Wed Aug 3 12:41:24 2016
@@ -1933,8 +1933,8 @@
case VexArchARM: {
Bool NEON = ((hwcaps & VEX_HWCAPS_ARM_NEON) != 0);
+ Bool VFP3 = ((hwcaps & VEX_HWCAPS_ARM_VFP3) != 0);
UInt level = VEX_ARM_ARCHLEVEL(hwcaps);
-
switch (level) {
case 5:
if (NEON)
@@ -1948,6 +1948,11 @@
return;
case 7:
return;
+ case 8:
+ if (!NEON || !VFP3)
+ invalid_hwcaps(arch, hwcaps,
+ "NEON and VFP3 are required for ARMv8.\n");
+ return;
default:
invalid_hwcaps(arch, hwcaps,
"ARM architecture level is not supported.\n");
|
|
From: <sv...@va...> - 2016-08-03 11:40:45
|
Author: sewardj
Date: Wed Aug 3 12:40:36 2016
New Revision: 15921
Log:
Add infrastructure for detection of 32-bit ARMv8 capable CPUs (Valgrind side).
Modified:
trunk/coregrind/m_initimg/initimg-linux.c
trunk/coregrind/m_machine.c
Modified: trunk/coregrind/m_initimg/initimg-linux.c
==============================================================================
--- trunk/coregrind/m_initimg/initimg-linux.c (original)
+++ trunk/coregrind/m_initimg/initimg-linux.c Wed Aug 3 12:40:36 2016
@@ -691,8 +691,9 @@
"ARM has-neon from-auxv: %s\n",
has_neon ? "YES" : "NO");
VG_(machine_arm_set_has_NEON)( has_neon );
- #define VKI_HWCAP_TLS 32768
+# define VKI_HWCAP_TLS 32768
Bool has_tls = (auxv->u.a_val & VKI_HWCAP_TLS) > 0;
+# undef VKI_HWCAP_TLS
VG_(debugLog)(2, "initimg",
"ARM has-tls from-auxv: %s\n",
has_tls ? "YES" : "NO");
Modified: trunk/coregrind/m_machine.c
==============================================================================
--- trunk/coregrind/m_machine.c (original)
+++ trunk/coregrind/m_machine.c Wed Aug 3 12:40:36 2016
@@ -1523,7 +1523,7 @@
vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
- volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
+ volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON, have_V8;
volatile Int archlevel;
Int r;
@@ -1602,6 +1602,19 @@
}
}
+ /* ARMv8 insns */
+ have_V8 = True;
+ if (archlevel == 7) {
+ if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
+ have_V8 = False;
+ } else {
+ __asm__ __volatile__(".word 0xF3044F54"); /* VMAXNM.F32 q2,q2,q2 */
+ }
+ if (have_V8 && have_NEON && have_VFP3) {
+ archlevel = 8;
+ }
+ }
+
VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
|