|
From: <sv...@va...> - 2014-01-12 12:49:33
|
Author: sewardj
Date: Sun Jan 12 12:49:10 2014
New Revision: 2803
Log:
Add support for ARMv8 AArch64 (the 64 bit ARM instruction set):
integer and FP instructions.
Added:
trunk/priv/guest_arm64_defs.h
trunk/priv/guest_arm64_helpers.c
trunk/priv/guest_arm64_toIR.c
trunk/priv/host_arm64_defs.c
trunk/priv/host_arm64_defs.h
trunk/priv/host_arm64_isel.c
trunk/pub/libvex_guest_arm64.h
Modified:
trunk/Makefile-gcc
trunk/auxprogs/genoffsets.c
trunk/priv/guest_s390_helpers.c
trunk/priv/host_generic_simd64.c
trunk/priv/host_generic_simd64.h
trunk/priv/ir_opt.c
trunk/priv/main_main.c
trunk/pub/libvex.h
trunk/pub/libvex_basictypes.h
trunk/pub/libvex_guest_s390x.h
trunk/pub/libvex_ir.h
trunk/switchback/Makefile
trunk/switchback/linker.c
trunk/switchback/switchback.c
Modified: trunk/Makefile-gcc
==============================================================================
--- trunk/Makefile-gcc (original)
+++ trunk/Makefile-gcc Sun Jan 12 12:49:10 2014
@@ -50,12 +50,14 @@
priv/host_x86_defs.o \
priv/host_amd64_defs.o \
priv/host_arm_defs.o \
+ priv/host_arm64_defs.o \
priv/host_ppc_defs.o \
priv/host_s390_defs.o \
priv/host_mips_defs.o \
priv/host_x86_isel.o \
priv/host_amd64_isel.o \
priv/host_arm_isel.o \
+ priv/host_arm64_isel.o \
priv/host_ppc_isel.o \
priv/host_s390_isel.o \
priv/host_mips_isel.o \
@@ -70,12 +72,14 @@
priv/guest_x86_helpers.o \
priv/guest_amd64_helpers.o \
priv/guest_arm_helpers.o \
+ priv/guest_arm64_helpers.o \
priv/guest_ppc_helpers.o \
priv/guest_s390_helpers.o \
priv/guest_mips_helpers.o \
priv/guest_x86_toIR.o \
priv/guest_amd64_toIR.o \
priv/guest_arm_toIR.o \
+ priv/guest_arm64_toIR.o \
priv/guest_ppc_toIR.o \
priv/guest_s390_toIR.o \
priv/guest_mips_toIR.o
@@ -194,6 +198,12 @@
if [ ! -f TAG-amd64-darwin ] ; then rm -f $(LIB_OBJS) TAG-* libvex.a ; fi
touch TAG-amd64-darwin
+libvex-arm64-linux.a: TAG-arm64-linux libvex.a
+ mv -f libvex.a libvex-arm64-linux.a
+TAG-arm64-linux:
+ if [ ! -f TAG-arm64-linux ] ; then rm -f $(LIB_OBJS) TAG-* libvex.a ; fi
+ touch TAG-arm64-linux
+
clean:
rm -f $(LIB_OBJS) *.a vex test_main.o TAG-* \
@@ -273,6 +283,10 @@
$(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/host_arm_defs.o \
-c priv/host_arm_defs.c
+priv/host_arm64_defs.o: $(ALL_HEADERS) priv/host_arm64_defs.c
+ $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/host_arm64_defs.o \
+ -c priv/host_arm64_defs.c
+
priv/host_ppc_defs.o: $(ALL_HEADERS) priv/host_ppc_defs.c
$(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/host_ppc_defs.o \
-c priv/host_ppc_defs.c
@@ -297,6 +311,10 @@
$(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/host_arm_isel.o \
-c priv/host_arm_isel.c
+priv/host_arm64_isel.o: $(ALL_HEADERS) priv/host_arm64_isel.c
+ $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/host_arm64_isel.o \
+ -c priv/host_arm64_isel.c
+
priv/host_ppc_isel.o: $(ALL_HEADERS) priv/host_ppc_isel.c
$(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/host_ppc_isel.o \
-c priv/host_ppc_isel.c
@@ -361,10 +379,18 @@
$(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/guest_arm_helpers.o \
-c priv/guest_arm_helpers.c
+priv/guest_arm64_helpers.o: $(ALL_HEADERS) priv/guest_arm64_helpers.c
+ $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/guest_arm64_helpers.o \
+ -c priv/guest_arm64_helpers.c
+
priv/guest_arm_toIR.o: $(ALL_HEADERS) priv/guest_arm_toIR.c
$(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/guest_arm_toIR.o \
-c priv/guest_arm_toIR.c
+priv/guest_arm64_toIR.o: $(ALL_HEADERS) priv/guest_arm64_toIR.c
+ $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/guest_arm64_toIR.o \
+ -c priv/guest_arm64_toIR.c
+
priv/guest_ppc_helpers.o: $(ALL_HEADERS) priv/guest_ppc_helpers.c
$(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/guest_ppc_helpers.o \
-c priv/guest_ppc_helpers.c
Modified: trunk/auxprogs/genoffsets.c
==============================================================================
--- trunk/auxprogs/genoffsets.c (original)
+++ trunk/auxprogs/genoffsets.c Sun Jan 12 12:49:10 2014
@@ -51,6 +51,7 @@
#include "../pub/libvex_guest_ppc32.h"
#include "../pub/libvex_guest_ppc64.h"
#include "../pub/libvex_guest_arm.h"
+#include "../pub/libvex_guest_arm64.h"
#include "../pub/libvex_guest_s390x.h"
#include "../pub/libvex_guest_mips32.h"
#include "../pub/libvex_guest_mips64.h"
@@ -159,6 +160,19 @@
GENOFFSET(ARM,arm,R14);
GENOFFSET(ARM,arm,R15T);
+ // arm64
+ GENOFFSET(ARM64,arm64,X0);
+ GENOFFSET(ARM64,arm64,X1);
+ GENOFFSET(ARM64,arm64,X2);
+ GENOFFSET(ARM64,arm64,X3);
+ GENOFFSET(ARM64,arm64,X4);
+ GENOFFSET(ARM64,arm64,X5);
+ GENOFFSET(ARM64,arm64,X6);
+ GENOFFSET(ARM64,arm64,X7);
+ GENOFFSET(ARM64,arm64,X8);
+ GENOFFSET(ARM64,arm64,SP);
+ GENOFFSET(ARM64,arm64,PC);
+
// s390x
GENOFFSET(S390X,s390x,r2);
GENOFFSET(S390X,s390x,r3);
Added: trunk/priv/guest_arm64_defs.h
==============================================================================
--- trunk/priv/guest_arm64_defs.h (added)
+++ trunk/priv/guest_arm64_defs.h Sun Jan 12 12:49:10 2014
@@ -0,0 +1,244 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin guest_arm64_defs.h ---*/
+/*---------------------------------------------------------------*/
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2013-2013 OpenWorks
+ in...@op...
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __VEX_GUEST_ARM64_DEFS_H
+#define __VEX_GUEST_ARM64_DEFS_H
+
+#include "libvex_basictypes.h"
+#include "guest_generic_bb_to_IR.h" // DisResult
+
+/*---------------------------------------------------------*/
+/*--- arm64 to IR conversion ---*/
+/*---------------------------------------------------------*/
+
+/* Convert one ARM64 insn to IR. See the type DisOneInstrFn in
+ bb_to_IR.h. */
+extern
+DisResult disInstr_ARM64 ( IRSB* irbb,
+ Bool (*resteerOkFn) ( void*, Addr64 ),
+ Bool resteerCisOk,
+ void* callback_opaque,
+ UChar* guest_code,
+ Long delta,
+ Addr64 guest_IP,
+ VexArch guest_arch,
+ VexArchInfo* archinfo,
+ VexAbiInfo* abiinfo,
+ Bool host_bigendian,
+ Bool sigill_diag );
+
+/* Used by the optimiser to specialise calls to helpers. */
+extern
+IRExpr* guest_arm64_spechelper ( const HChar* function_name,
+ IRExpr** args,
+ IRStmt** precedingStmts,
+ Int n_precedingStmts );
+
+/* Describes to the optimser which part of the guest state require
+ precise memory exceptions. This is logically part of the guest
+ state description. */
+extern
+Bool guest_arm64_state_requires_precise_mem_exns ( Int, Int );
+
+extern
+VexGuestLayout arm64Guest_layout;
+
+
+/*---------------------------------------------------------*/
+/*--- arm64 guest helpers ---*/
+/*---------------------------------------------------------*/
+
+/* --- CLEAN HELPERS --- */
+
+/* Calculate NZCV from the supplied thunk components, in the positions
+ they appear in the CPSR, viz bits 31:28 for N Z C V respectively.
+ Returned bits 63:32 and 27:0 are zero. */
+extern
+ULong arm64g_calculate_flags_nzcv ( ULong cc_op, ULong cc_dep1,
+ ULong cc_dep2, ULong cc_dep3 );
+
+//ZZ /* Calculate the C flag from the thunk components, in the lowest bit
+//ZZ of the word (bit 0). */
+//ZZ extern
+//ZZ UInt armg_calculate_flag_c ( UInt cc_op, UInt cc_dep1,
+//ZZ UInt cc_dep2, UInt cc_dep3 );
+//ZZ
+//ZZ /* Calculate the V flag from the thunk components, in the lowest bit
+//ZZ of the word (bit 0). */
+//ZZ extern
+//ZZ UInt armg_calculate_flag_v ( UInt cc_op, UInt cc_dep1,
+//ZZ UInt cc_dep2, UInt cc_dep3 );
+//ZZ
+/* Calculate the specified condition from the thunk components, in the
+ lowest bit of the word (bit 0). */
+extern
+ULong arm64g_calculate_condition ( /* ARM64Condcode << 4 | cc_op */
+ ULong cond_n_op ,
+ ULong cc_dep1,
+ ULong cc_dep2, ULong cc_dep3 );
+
+//ZZ /* Calculate the QC flag from the thunk components, in the lowest bit
+//ZZ of the word (bit 0). */
+//ZZ extern
+//ZZ UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2,
+//ZZ UInt resR1, UInt resR2 );
+
+
+/*---------------------------------------------------------*/
+/*--- Condition code stuff ---*/
+/*---------------------------------------------------------*/
+
+/* Flag masks. Defines positions of flag bits in the NZCV
+ register. */
+#define ARM64G_CC_SHIFT_N 31
+#define ARM64G_CC_SHIFT_Z 30
+#define ARM64G_CC_SHIFT_C 29
+#define ARM64G_CC_SHIFT_V 28
+//ZZ #define ARMG_CC_SHIFT_Q 27
+//ZZ
+//ZZ #define ARMG_CC_MASK_N (1 << ARMG_CC_SHIFT_N)
+//ZZ #define ARMG_CC_MASK_Z (1 << ARMG_CC_SHIFT_Z)
+//ZZ #define ARMG_CC_MASK_C (1 << ARMG_CC_SHIFT_C)
+//ZZ #define ARMG_CC_MASK_V (1 << ARMG_CC_SHIFT_V)
+//ZZ #define ARMG_CC_MASK_Q (1 << ARMG_CC_SHIFT_Q)
+
+/* Flag thunk descriptors. A four-word thunk is used to record
+ details of the most recent flag-setting operation, so NZCV can
+ be computed later if needed.
+
+ The four words are:
+
+ CC_OP, which describes the operation.
+
+ CC_DEP1, CC_DEP2, CC_NDEP. These are arguments to the
+ operation. We want set up the mcx_masks in flag helper calls
+ involving these fields so that Memcheck "believes" that the
+ resulting flags are data-dependent on both CC_DEP1 and
+ CC_DEP2. Hence the name DEP.
+
+ When building the thunk, it is always necessary to write words into
+ CC_DEP1/2 and NDEP, even if those args are not used given the CC_OP
+ field. This is important because otherwise Memcheck could give
+ false positives as it does not understand the relationship between
+ the CC_OP field and CC_DEP1/2/NDEP, and so believes that the
+ definedness of the stored flags always depends on all 3 DEP values.
+
+ A summary of the field usages is:
+
+ OP DEP1 DEP2 DEP3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ OP_COPY curr_NZCV:28x0 unused unused
+ OP_ADD32 argL argR unused
+ OP_ADD64 argL argR unused
+ OP_SUB32 argL argR unused
+ OP_SUB64 argL argR unused
+//ZZ OP_ADC argL argR 31x0:old_C
+//ZZ OP_SBB argL argR 31x0:old_C
+ OP_LOGIC32 result unused unused
+ OP_LOGIC64 result unused unused
+//ZZ OP_MUL result unused 30x0:old_C:old_V
+//ZZ OP_MULL resLO32 resHI32 30x0:old_C:old_V
+//ZZ */
+
+enum {
+ ARM64G_CC_OP_COPY=0, /* DEP1 = NZCV in 31:28, DEP2 = 0, DEP3 = 0
+ just copy DEP1 to output */
+
+ ARM64G_CC_OP_ADD32, /* DEP1 = argL (Rn), DEP2 = argR (shifter_op),
+ DEP3 = 0 */
+
+ ARM64G_CC_OP_ADD64, /* DEP1 = argL (Rn), DEP2 = argR (shifter_op),
+ DEP3 = 0 */
+
+ ARM64G_CC_OP_SUB32, /* DEP1 = argL (Rn), DEP2 = argR (shifter_op),
+ DEP3 = 0 */
+
+ ARM64G_CC_OP_SUB64, /* DEP1 = argL (Rn), DEP2 = argR (shifter_op),
+ DEP3 = 0 */
+
+//ZZ ARMG_CC_OP_ADC, /* DEP1 = argL (Rn), DEP2 = arg2 (shifter_op),
+//ZZ DEP3 = oldC (in LSB) */
+//ZZ
+//ZZ ARMG_CC_OP_SBB, /* DEP1 = argL (Rn), DEP2 = arg2 (shifter_op),
+//ZZ DEP3 = oldC (in LSB) */
+
+ ARM64G_CC_OP_LOGIC32, /* DEP1 = result, DEP2 = 0, DEP3 = 0 */
+ ARM64G_CC_OP_LOGIC64, /* DEP1 = result, DEP2 = 0, DEP3 = 0 */
+
+//ZZ ARMG_CC_OP_MUL, /* DEP1 = result, DEP2 = 0, DEP3 = oldC:old_V
+//ZZ (in bits 1:0) */
+//ZZ
+//ZZ ARMG_CC_OP_MULL, /* DEP1 = resLO32, DEP2 = resHI32, DEP3 = oldC:old_V
+//ZZ (in bits 1:0) */
+
+ ARM64G_CC_OP_NUMBER
+};
+
+/* XXXX because of the calling conventions for
+ arm64g_calculate_condition, all these OP values MUST be in the range
+ 0 .. 15 only (viz, 4-bits). */
+
+
+
+/* Defines conditions which we can ask for */
+
+typedef
+ enum {
+ ARM64CondEQ = 0, /* equal : Z=1 */
+ ARM64CondNE = 1, /* not equal : Z=0 */
+
+ ARM64CondCS = 2, /* >=u (higher or same) (aka HS) : C=1 */
+ ARM64CondCC = 3, /* <u (lower) (aka LO) : C=0 */
+
+ ARM64CondMI = 4, /* minus (negative) : N=1 */
+ ARM64CondPL = 5, /* plus (zero or +ve) : N=0 */
+
+ ARM64CondVS = 6, /* overflow : V=1 */
+ ARM64CondVC = 7, /* no overflow : V=0 */
+
+ ARM64CondHI = 8, /* >u (higher) : C=1 && Z=0 */
+ ARM64CondLS = 9, /* <=u (lower or same) : C=0 || Z=1 */
+
+ ARM64CondGE = 10, /* >=s (signed greater or equal) : N=V */
+ ARM64CondLT = 11, /* <s (signed less than) : N!=V */
+
+ ARM64CondGT = 12, /* >s (signed greater) : Z=0 && N=V */
+ ARM64CondLE = 13, /* <=s (signed less or equal) : Z=1 || N!=V */
+
+ ARM64CondAL = 14, /* always (unconditional) : 1 */
+ ARM64CondNV = 15 /* always (unconditional) : 1 */
+ }
+ ARM64Condcode;
+
+#endif /* ndef __VEX_GUEST_ARM64_DEFS_H */
+
+/*---------------------------------------------------------------*/
+/*--- end guest_arm64_defs.h ---*/
+/*---------------------------------------------------------------*/
Added: trunk/priv/guest_arm64_helpers.c
==============================================================================
--- trunk/priv/guest_arm64_helpers.c (added)
+++ trunk/priv/guest_arm64_helpers.c Sun Jan 12 12:49:10 2014
@@ -0,0 +1,1292 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin guest_arm64_helpers.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2013-2013 OpenWorks
+ in...@op...
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex_emnote.h"
+#include "libvex_guest_arm64.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+
+#include "main_util.h"
+#include "main_globals.h"
+#include "guest_generic_bb_to_IR.h"
+#include "guest_arm64_defs.h"
+
+
+/* This file contains helper functions for arm guest code. Calls to
+ these functions are generated by the back end. These calls are of
+ course in the host machine code and this file will be compiled to
+ host machine code, so that all makes sense.
+
+ Only change the signatures of these helper functions very
+ carefully. If you change the signature here, you'll have to change
+ the parameters passed to it in the IR calls constructed by
+ guest_arm64_toIR.c.
+*/
+
+
+/* Set to 1 to get detailed profiling info about individual N, Z, C
+ and V flag evaluation. */
+#define PROFILE_NZCV_FLAGS 0
+
+#if PROFILE_NZCV_FLAGS
+
+static UInt tab_eval[ARM64G_CC_OP_NUMBER][16];
+static UInt initted = 0;
+static UInt tot_evals = 0;
+
+static void initCounts ( void )
+{
+ UInt i, j;
+ for (i = 0; i < ARM64G_CC_OP_NUMBER; i++) {
+ for (j = 0; j < 16; j++) {
+ tab_eval[i][j] = 0;
+ }
+ }
+ initted = 1;
+}
+
+static void showCounts ( void )
+{
+ const HChar* nameCC[16]
+ = { "EQ", "NE", "CS", "CC", "MI", "PL", "VS", "VC",
+ "HI", "LS", "GE", "LT", "GT", "LE", "AL", "NV" };
+ UInt i, j;
+ ULong sum = 0;
+ vex_printf("\nCC_OP 0 1 2 3 "
+ " 4 5 6\n");
+ vex_printf( "--------------------------------------------------"
+ "--------------------------\n");
+ for (j = 0; j < 16; j++) {
+ vex_printf("%2d %s ", j, nameCC[j]);
+ for (i = 0; i < ARM64G_CC_OP_NUMBER; i++) {
+ vex_printf("%9d ", tab_eval[i][j]);
+ sum += tab_eval[i][j];
+ }
+ vex_printf("\n");
+ }
+ vex_printf("(In total %llu calls)\n", sum);
+}
+
+#define NOTE_EVAL(_cc_op, _cond) \
+ do { \
+ if (!initted) initCounts(); \
+ vassert( ((UInt)(_cc_op)) < ARM64G_CC_OP_NUMBER); \
+ vassert( ((UInt)(_cond)) < 16); \
+ tab_eval[(UInt)(_cc_op)][(UInt)(cond)]++; \
+ tot_evals++; \
+ if (0 == (tot_evals & 0x7FFF)) \
+ showCounts(); \
+ } while (0)
+
+#endif /* PROFILE_NZCV_FLAGS */
+
+
+/* Calculate the N flag from the supplied thunk components, in the
+ least significant bit of the word. Returned bits 63:1 are zero. */
+static
+ULong arm64g_calculate_flag_n ( ULong cc_op, ULong cc_dep1,
+ ULong cc_dep2, ULong cc_dep3 )
+{
+ switch (cc_op) {
+ case ARM64G_CC_OP_COPY: {
+ /* (nzcv:28x0, unused, unused) */
+ ULong nf = (cc_dep1 >> ARM64G_CC_SHIFT_N) & 1;
+ return nf;
+ }
+ case ARM64G_CC_OP_ADD32: {
+ /* (argL, argR, unused) */
+ UInt argL = (UInt)cc_dep1;
+ UInt argR = (UInt)cc_dep2;
+ UInt res = argL + argR;
+ ULong nf = (ULong)(res >> 31);
+ return nf;
+ }
+ case ARM64G_CC_OP_ADD64: {
+ /* (argL, argR, unused) */
+ ULong argL = cc_dep1;
+ ULong argR = cc_dep2;
+ ULong res = argL + argR;
+ ULong nf = (ULong)(res >> 63);
+ return nf;
+ }
+ case ARM64G_CC_OP_SUB32: {
+ /* (argL, argR, unused) */
+ UInt argL = (UInt)cc_dep1;
+ UInt argR = (UInt)cc_dep2;
+ UInt res = argL - argR;
+ ULong nf = (ULong)(res >> 31);
+ return nf;
+ }
+ case ARM64G_CC_OP_SUB64: {
+ /* (argL, argR, unused) */
+ ULong argL = cc_dep1;
+ ULong argR = cc_dep2;
+ ULong res = argL - argR;
+ ULong nf = res >> 63;
+ return nf;
+ }
+//ZZ case ARMG_CC_OP_ADC: {
+//ZZ /* (argL, argR, oldC) */
+//ZZ UInt argL = cc_dep1;
+//ZZ UInt argR = cc_dep2;
+//ZZ UInt oldC = cc_dep3;
+//ZZ vassert((oldC & ~1) == 0);
+//ZZ UInt res = argL + argR + oldC;
+//ZZ UInt nf = res >> 31;
+//ZZ return nf;
+//ZZ }
+//ZZ case ARMG_CC_OP_SBB: {
+//ZZ /* (argL, argR, oldC) */
+//ZZ UInt argL = cc_dep1;
+//ZZ UInt argR = cc_dep2;
+//ZZ UInt oldC = cc_dep3;
+//ZZ vassert((oldC & ~1) == 0);
+//ZZ UInt res = argL - argR - (oldC ^ 1);
+//ZZ UInt nf = res >> 31;
+//ZZ return nf;
+//ZZ }
+ case ARM64G_CC_OP_LOGIC32: {
+ /* (res, unused, unused) */
+ UInt res = (UInt)cc_dep1;
+ ULong nf = res >> 31;
+ return nf;
+ }
+ case ARM64G_CC_OP_LOGIC64: {
+ /* (res, unused, unused) */
+ ULong res = cc_dep1;
+ ULong nf = res >> 63;
+ return nf;
+ }
+//ZZ case ARMG_CC_OP_MUL: {
+//ZZ /* (res, unused, oldC:oldV) */
+//ZZ UInt res = cc_dep1;
+//ZZ UInt nf = res >> 31;
+//ZZ return nf;
+//ZZ }
+//ZZ case ARMG_CC_OP_MULL: {
+//ZZ /* (resLo32, resHi32, oldC:oldV) */
+//ZZ UInt resHi32 = cc_dep2;
+//ZZ UInt nf = resHi32 >> 31;
+//ZZ return nf;
+//ZZ }
+ default:
+ /* shouldn't really make these calls from generated code */
+ vex_printf("arm64g_calculate_flag_n"
+ "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
+ cc_op, cc_dep1, cc_dep2, cc_dep3 );
+ vpanic("arm64g_calculate_flag_n");
+ }
+}
+
+
+/* Calculate the Z flag from the supplied thunk components, in the
+ least significant bit of the word. Returned bits 63:1 are zero. */
+static
+ULong arm64g_calculate_flag_z ( ULong cc_op, ULong cc_dep1,
+ ULong cc_dep2, ULong cc_dep3 )
+{
+ switch (cc_op) {
+ case ARM64G_CC_OP_COPY: {
+ /* (nzcv:28x0, unused, unused) */
+ ULong zf = (cc_dep1 >> ARM64G_CC_SHIFT_Z) & 1;
+ return zf;
+ }
+ case ARM64G_CC_OP_ADD32: {
+ /* (argL, argR, unused) */
+ UInt argL = (UInt)cc_dep1;
+ UInt argR = (UInt)cc_dep2;
+ UInt res = argL + argR;
+ ULong zf = res == 0;
+ return zf;
+ }
+ case ARM64G_CC_OP_ADD64: {
+ /* (argL, argR, unused) */
+ ULong argL = cc_dep1;
+ ULong argR = cc_dep2;
+ ULong res = argL + argR;
+ ULong zf = res == 0;
+ return zf;
+ }
+ case ARM64G_CC_OP_SUB32: {
+ /* (argL, argR, unused) */
+ UInt argL = (UInt)cc_dep1;
+ UInt argR = (UInt)cc_dep2;
+ UInt res = argL - argR;
+ ULong zf = res == 0;
+ return zf;
+ }
+ case ARM64G_CC_OP_SUB64: {
+ /* (argL, argR, unused) */
+ ULong argL = cc_dep1;
+ ULong argR = cc_dep2;
+ ULong res = argL - argR;
+ ULong zf = res == 0;
+ return zf;
+ }
+//ZZ case ARMG_CC_OP_ADC: {
+//ZZ /* (argL, argR, oldC) */
+//ZZ UInt argL = cc_dep1;
+//ZZ UInt argR = cc_dep2;
+//ZZ UInt oldC = cc_dep3;
+//ZZ vassert((oldC & ~1) == 0);
+//ZZ UInt res = argL + argR + oldC;
+//ZZ UInt zf = res == 0;
+//ZZ return zf;
+//ZZ }
+//ZZ case ARMG_CC_OP_SBB: {
+//ZZ /* (argL, argR, oldC) */
+//ZZ UInt argL = cc_dep1;
+//ZZ UInt argR = cc_dep2;
+//ZZ UInt oldC = cc_dep3;
+//ZZ vassert((oldC & ~1) == 0);
+//ZZ UInt res = argL - argR - (oldC ^ 1);
+//ZZ UInt zf = res == 0;
+//ZZ return zf;
+//ZZ }
+ case ARM64G_CC_OP_LOGIC32: {
+ /* (res, unused, unused) */
+ UInt res = (UInt)cc_dep1;
+ ULong zf = res == 0;
+ return zf;
+ }
+ case ARM64G_CC_OP_LOGIC64: {
+ /* (res, unused, unused) */
+ ULong res = cc_dep1;
+ ULong zf = res == 0;
+ return zf;
+ }
+//ZZ case ARMG_CC_OP_MUL: {
+//ZZ /* (res, unused, oldC:oldV) */
+//ZZ UInt res = cc_dep1;
+//ZZ UInt zf = res == 0;
+//ZZ return zf;
+//ZZ }
+//ZZ case ARMG_CC_OP_MULL: {
+//ZZ /* (resLo32, resHi32, oldC:oldV) */
+//ZZ UInt resLo32 = cc_dep1;
+//ZZ UInt resHi32 = cc_dep2;
+//ZZ UInt zf = (resHi32|resLo32) == 0;
+//ZZ return zf;
+//ZZ }
+ default:
+ /* shouldn't really make these calls from generated code */
+ vex_printf("arm64g_calculate_flag_z"
+ "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
+ cc_op, cc_dep1, cc_dep2, cc_dep3 );
+ vpanic("arm64g_calculate_flag_z");
+ }
+}
+
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* Calculate the C flag from the supplied thunk components, in the
+ least significant bit of the word. Returned bits 63:1 are zero. */
+static
+ULong arm64g_calculate_flag_c ( ULong cc_op, ULong cc_dep1,
+ ULong cc_dep2, ULong cc_dep3 )
+{
+ switch (cc_op) {
+ case ARM64G_CC_OP_COPY: {
+ /* (nzcv:28x0, unused, unused) */
+ ULong cf = (cc_dep1 >> ARM64G_CC_SHIFT_C) & 1;
+ return cf;
+ }
+ case ARM64G_CC_OP_ADD32: {
+ /* (argL, argR, unused) */
+ UInt argL = (UInt)cc_dep1;
+ UInt argR = (UInt)cc_dep2;
+ UInt res = argL + argR;
+ ULong cf = res < argL;
+ return cf;
+ }
+ case ARM64G_CC_OP_ADD64: {
+ /* (argL, argR, unused) */
+ ULong argL = cc_dep1;
+ ULong argR = cc_dep2;
+ ULong res = argL + argR;
+ ULong cf = res < argL;
+ return cf;
+ }
+ case ARM64G_CC_OP_SUB32: {
+ /* (argL, argR, unused) */
+ UInt argL = (UInt)cc_dep1;
+ UInt argR = (UInt)cc_dep2;
+ ULong cf = argL >= argR;
+ return cf;
+ }
+ case ARM64G_CC_OP_SUB64: {
+ /* (argL, argR, unused) */
+ ULong argL = cc_dep1;
+ ULong argR = cc_dep2;
+ ULong cf = argL >= argR;
+ return cf;
+ }
+//ZZ case ARMG_CC_OP_ADC: {
+//ZZ /* (argL, argR, oldC) */
+//ZZ UInt argL = cc_dep1;
+//ZZ UInt argR = cc_dep2;
+//ZZ UInt oldC = cc_dep3;
+//ZZ vassert((oldC & ~1) == 0);
+//ZZ UInt res = argL + argR + oldC;
+//ZZ UInt cf = oldC ? (res <= argL) : (res < argL);
+//ZZ return cf;
+//ZZ }
+//ZZ case ARMG_CC_OP_SBB: {
+//ZZ /* (argL, argR, oldC) */
+//ZZ UInt argL = cc_dep1;
+//ZZ UInt argR = cc_dep2;
+//ZZ UInt oldC = cc_dep3;
+//ZZ vassert((oldC & ~1) == 0);
+//ZZ UInt cf = oldC ? (argL >= argR) : (argL > argR);
+//ZZ return cf;
+//ZZ }
+ case ARM64G_CC_OP_LOGIC32:
+ case ARM64G_CC_OP_LOGIC64: {
+ /* (res, unused, unused) */
+ return 0; // C after logic is zero on arm64
+ }
+//ZZ case ARMG_CC_OP_MUL: {
+//ZZ /* (res, unused, oldC:oldV) */
+//ZZ UInt oldC = (cc_dep3 >> 1) & 1;
+//ZZ vassert((cc_dep3 & ~3) == 0);
+//ZZ UInt cf = oldC;
+//ZZ return cf;
+//ZZ }
+//ZZ case ARMG_CC_OP_MULL: {
+//ZZ /* (resLo32, resHi32, oldC:oldV) */
+//ZZ UInt oldC = (cc_dep3 >> 1) & 1;
+//ZZ vassert((cc_dep3 & ~3) == 0);
+//ZZ UInt cf = oldC;
+//ZZ return cf;
+//ZZ }
+ default:
+ /* shouldn't really make these calls from generated code */
+ vex_printf("arm64g_calculate_flag_c"
+ "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
+ cc_op, cc_dep1, cc_dep2, cc_dep3 );
+ vpanic("arm64g_calculate_flag_c");
+ }
+}
+
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* Calculate the V flag from the supplied thunk components, in the
+ least significant bit of the word. Returned bits 63:1 are zero. */
+static
+ULong arm64g_calculate_flag_v ( ULong cc_op, ULong cc_dep1,
+ ULong cc_dep2, ULong cc_dep3 )
+{
+ switch (cc_op) {
+ case ARM64G_CC_OP_COPY: {
+ /* (nzcv:28x0, unused, unused) */
+ ULong vf = (cc_dep1 >> ARM64G_CC_SHIFT_V) & 1;
+ return vf;
+ }
+ case ARM64G_CC_OP_ADD32: {
+ /* (argL, argR, unused) */
+ UInt argL = (UInt)cc_dep1;
+ UInt argR = (UInt)cc_dep2;
+ UInt res = argL + argR;
+ ULong vf = (ULong)(((res ^ argL) & (res ^ argR)) >> 31);
+ return vf;
+ }
+ case ARM64G_CC_OP_ADD64: {
+ /* (argL, argR, unused) */
+ ULong argL = cc_dep1;
+ ULong argR = cc_dep2;
+ ULong res = argL + argR;
+ ULong vf = ((res ^ argL) & (res ^ argR)) >> 63;
+ return vf;
+ }
+ case ARM64G_CC_OP_SUB32: {
+ /* (argL, argR, unused) */
+ UInt argL = (UInt)cc_dep1;
+ UInt argR = (UInt)cc_dep2;
+ UInt res = argL - argR;
+ ULong vf = (ULong)(((argL ^ argR) & (argL ^ res)) >> 31);
+ return vf;
+ }
+ case ARM64G_CC_OP_SUB64: {
+ /* (argL, argR, unused) */
+ ULong argL = cc_dep1;
+ ULong argR = cc_dep2;
+ ULong res = argL - argR;
+ ULong vf = (((argL ^ argR) & (argL ^ res))) >> 63;
+ return vf;
+ }
+//ZZ case ARMG_CC_OP_ADC: {
+//ZZ /* (argL, argR, oldC) */
+//ZZ UInt argL = cc_dep1;
+//ZZ UInt argR = cc_dep2;
+//ZZ UInt oldC = cc_dep3;
+//ZZ vassert((oldC & ~1) == 0);
+//ZZ UInt res = argL + argR + oldC;
+//ZZ UInt vf = ((res ^ argL) & (res ^ argR)) >> 31;
+//ZZ return vf;
+//ZZ }
+//ZZ case ARMG_CC_OP_SBB: {
+//ZZ /* (argL, argR, oldC) */
+//ZZ UInt argL = cc_dep1;
+//ZZ UInt argR = cc_dep2;
+//ZZ UInt oldC = cc_dep3;
+//ZZ vassert((oldC & ~1) == 0);
+//ZZ UInt res = argL - argR - (oldC ^ 1);
+//ZZ UInt vf = ((argL ^ argR) & (argL ^ res)) >> 31;
+//ZZ return vf;
+//ZZ }
+ case ARM64G_CC_OP_LOGIC32:
+ case ARM64G_CC_OP_LOGIC64: {
+ /* (res, unused, unused) */
+ return 0; // V after logic is zero on arm64
+ }
+//ZZ case ARMG_CC_OP_MUL: {
+//ZZ /* (res, unused, oldC:oldV) */
+//ZZ UInt oldV = (cc_dep3 >> 0) & 1;
+//ZZ vassert((cc_dep3 & ~3) == 0);
+//ZZ UInt vf = oldV;
+//ZZ return vf;
+//ZZ }
+//ZZ case ARMG_CC_OP_MULL: {
+//ZZ /* (resLo32, resHi32, oldC:oldV) */
+//ZZ UInt oldV = (cc_dep3 >> 0) & 1;
+//ZZ vassert((cc_dep3 & ~3) == 0);
+//ZZ UInt vf = oldV;
+//ZZ return vf;
+//ZZ }
+ default:
+ /* shouldn't really make these calls from generated code */
+ vex_printf("arm64g_calculate_flag_v"
+ "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
+ cc_op, cc_dep1, cc_dep2, cc_dep3 );
+ vpanic("arm64g_calculate_flag_v");
+ }
+}
+
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* Calculate NZCV from the supplied thunk components, in the positions
+ they appear in the CPSR, viz bits 31:28 for N Z C V respectively.
+ Returned bits 27:0 are zero. */
+ULong arm64g_calculate_flags_nzcv ( ULong cc_op, ULong cc_dep1,
+ ULong cc_dep2, ULong cc_dep3 )
+{
+ ULong f;
+ ULong res = 0;
+ f = 1 & arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
+ res |= (f << ARM64G_CC_SHIFT_N);
+ f = 1 & arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
+ res |= (f << ARM64G_CC_SHIFT_Z);
+ f = 1 & arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
+ res |= (f << ARM64G_CC_SHIFT_C);
+ f = 1 & arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
+ res |= (f << ARM64G_CC_SHIFT_V);
+ return res;
+}
+
+//ZZ
+//ZZ /* CALLED FROM GENERATED CODE: CLEAN HELPER */
+//ZZ /* Calculate the QC flag from the arguments, in the lowest bit
+//ZZ of the word (bit 0). Urr, having this out of line is bizarre.
+//ZZ Push back inline. */
+//ZZ UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2,
+//ZZ UInt resR1, UInt resR2 )
+//ZZ {
+//ZZ if (resL1 != resR1 || resL2 != resR2)
+//ZZ return 1;
+//ZZ else
+//ZZ return 0;
+//ZZ }
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* Calculate the specified condition from the thunk components, in the
+ lowest bit of the word (bit 0). Returned bits 63:1 are zero. */
+ULong arm64g_calculate_condition ( /* ARM64Condcode << 4 | cc_op */
+ ULong cond_n_op ,
+ ULong cc_dep1,
+ ULong cc_dep2, ULong cc_dep3 )
+{
+ ULong cond = cond_n_op >> 4;
+ ULong cc_op = cond_n_op & 0xF;
+ ULong inv = cond & 1;
+ ULong nf, zf, vf, cf;
+
+# if PROFILE_NZCV_FLAGS
+ NOTE_EVAL(cc_op, cond);
+# endif
+
+ // vex_printf("XXXXXXXX %llx %llx %llx %llx\n",
+ // cond_n_op, cc_dep1, cc_dep2, cc_dep3);
+
+ switch (cond) {
+ case ARM64CondEQ: // Z=1 => z
+ case ARM64CondNE: // Z=0
+ zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
+ return inv ^ zf;
+
+ case ARM64CondCS: // C=1 => c
+ case ARM64CondCC: // C=0
+ cf = arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
+ return inv ^ cf;
+
+ case ARM64CondMI: // N=1 => n
+ case ARM64CondPL: // N=0
+ nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
+ return inv ^ nf;
+
+ case ARM64CondVS: // V=1 => v
+ case ARM64CondVC: // V=0
+ vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
+ return inv ^ vf;
+
+ case ARM64CondHI: // C=1 && Z=0 => c & ~z
+ case ARM64CondLS: // C=0 || Z=1
+ cf = arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
+ zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
+ return inv ^ (1 & (cf & ~zf));
+
+ case ARM64CondGE: // N=V => ~(n^v)
+ case ARM64CondLT: // N!=V
+ nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
+ vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
+ return inv ^ (1 & ~(nf ^ vf));
+
+ case ARM64CondGT: // Z=0 && N=V => ~z & ~(n^v) => ~(z | (n^v))
+ case ARM64CondLE: // Z=1 || N!=V
+ nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
+ vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
+ zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
+ return inv ^ (1 & ~(zf | (nf ^ vf)));
+
+ case ARM64CondAL: // 1
+ case ARM64CondNV: // 1
+ return 1;
+
+ default:
+ /* shouldn't really make these calls from generated code */
+ vex_printf("arm64g_calculate_condition(ARM64)"
+ "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
+ cond, cc_op, cc_dep1, cc_dep2, cc_dep3 );
+ vpanic("armg_calculate_condition(ARM64)");
+ }
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Flag-helpers translation-time function specialisers. ---*/
+/*--- These help iropt specialise calls the above run-time ---*/
+/*--- flags functions. ---*/
+/*---------------------------------------------------------------*/
+
+/* Used by the optimiser to try specialisations. Returns an
+ equivalent expression, or NULL if none. */
+
+static Bool isU64 ( IRExpr* e, ULong n )
+{
+ return
+ toBool( e->tag == Iex_Const
+ && e->Iex.Const.con->tag == Ico_U64
+ && e->Iex.Const.con->Ico.U64 == n );
+}
+
+IRExpr* guest_arm64_spechelper ( const HChar* function_name,
+ IRExpr** args,
+ IRStmt** precedingStmts,
+ Int n_precedingStmts )
+{
+# define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
+# define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
+# define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
+# define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
+
+ Int i, arity = 0;
+ for (i = 0; args[i]; i++)
+ arity++;
+//ZZ # if 0
+//ZZ vex_printf("spec request:\n");
+//ZZ vex_printf(" %s ", function_name);
+//ZZ for (i = 0; i < arity; i++) {
+//ZZ vex_printf(" ");
+//ZZ ppIRExpr(args[i]);
+//ZZ }
+//ZZ vex_printf("\n");
+//ZZ # endif
+
+ /* --------- specialising "arm64g_calculate_condition" --------- */
+
+ if (vex_streq(function_name, "arm64g_calculate_condition")) {
+
+ /* specialise calls to the "arm64g_calculate_condition" function.
+ Not sure whether this is strictly necessary, but: the
+ replacement IR must produce only the values 0 or 1. Bits
+ 63:1 are required to be zero. */
+ IRExpr *cond_n_op, *cc_dep1, *cc_dep2 ; //, *cc_ndep;
+ vassert(arity == 4);
+ cond_n_op = args[0]; /* (ARM64Condcode << 4) | ARM64G_CC_OP_* */
+ cc_dep1 = args[1];
+ cc_dep2 = args[2];
+ //cc_ndep = args[3];
+
+ /*---------------- SUB64 ----------------*/
+
+ /* 0, 1 */
+ if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_SUB64)) {
+ /* EQ after SUB --> test argL == argR */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpEQ64, cc_dep1, cc_dep2));
+ }
+ if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_SUB64)) {
+ /* NE after SUB --> test argL != argR */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpNE64, cc_dep1, cc_dep2));
+ }
+
+ /* 2, 3 */
+ if (isU64(cond_n_op, (ARM64CondCS << 4) | ARM64G_CC_OP_SUB64)) {
+ /* CS after SUB --> test argL >=u argR
+ --> test argR <=u argL */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
+ }
+ if (isU64(cond_n_op, (ARM64CondCC << 4) | ARM64G_CC_OP_SUB64)) {
+ /* CC after SUB --> test argL <u argR */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
+ }
+
+ /* 8, 9 */
+ if (isU64(cond_n_op, (ARM64CondLS << 4) | ARM64G_CC_OP_SUB64)) {
+ /* LS after SUB --> test argL <=u argR */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
+ }
+ if (isU64(cond_n_op, (ARM64CondHI << 4) | ARM64G_CC_OP_SUB64)) {
+ /* HI after SUB --> test argL >u argR
+ --> test argR <u argL */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLT64U, cc_dep2, cc_dep1));
+ }
+
+ /* 10, 11 */
+ if (isU64(cond_n_op, (ARM64CondLT << 4) | ARM64G_CC_OP_SUB64)) {
+ /* LT after SUB --> test argL <s argR */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
+ }
+ if (isU64(cond_n_op, (ARM64CondGE << 4) | ARM64G_CC_OP_SUB64)) {
+ /* GE after SUB --> test argL >=s argR
+ --> test argR <=s argL */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLE64S, cc_dep2, cc_dep1));
+ }
+
+ /* 12, 13 */
+ if (isU64(cond_n_op, (ARM64CondGT << 4) | ARM64G_CC_OP_SUB64)) {
+ /* GT after SUB --> test argL >s argR
+ --> test argR <s argL */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLT64S, cc_dep2, cc_dep1));
+ }
+ if (isU64(cond_n_op, (ARM64CondLE << 4) | ARM64G_CC_OP_SUB64)) {
+ /* LE after SUB --> test argL <=s argR */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLE64S, cc_dep1, cc_dep2));
+ }
+
+ /*---------------- SUB32 ----------------*/
+
+ /* 0, 1 */
+ if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_SUB32)) {
+ /* EQ after SUB --> test argL == argR */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpEQ32, unop(Iop_64to32, cc_dep1),
+ unop(Iop_64to32, cc_dep2)));
+ }
+ if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_SUB32)) {
+ /* NE after SUB --> test argL != argR */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpNE32, unop(Iop_64to32, cc_dep1),
+ unop(Iop_64to32, cc_dep2)));
+ }
+
+ /* 2, 3 */
+ if (isU64(cond_n_op, (ARM64CondCS << 4) | ARM64G_CC_OP_SUB32)) {
+ /* CS after SUB --> test argL >=u argR
+ --> test argR <=u argL */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLE32U, unop(Iop_64to32, cc_dep2),
+ unop(Iop_64to32, cc_dep1)));
+ }
+ if (isU64(cond_n_op, (ARM64CondCC << 4) | ARM64G_CC_OP_SUB32)) {
+ /* CC after SUB --> test argL <u argR */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLT32U, unop(Iop_64to32, cc_dep1),
+ unop(Iop_64to32, cc_dep2)));
+ }
+
+ /* 8, 9 */
+ if (isU64(cond_n_op, (ARM64CondLS << 4) | ARM64G_CC_OP_SUB32)) {
+ /* LS after SUB --> test argL <=u argR */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLE32U, unop(Iop_64to32, cc_dep1),
+ unop(Iop_64to32, cc_dep2)));
+ }
+ if (isU64(cond_n_op, (ARM64CondHI << 4) | ARM64G_CC_OP_SUB32)) {
+ /* HI after SUB --> test argL >u argR
+ --> test argR <u argL */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLT32U, unop(Iop_64to32, cc_dep2),
+ unop(Iop_64to32, cc_dep1)));
+ }
+
+ /* 10, 11 */
+ if (isU64(cond_n_op, (ARM64CondLT << 4) | ARM64G_CC_OP_SUB32)) {
+ /* LT after SUB --> test argL <s argR */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLT32S, unop(Iop_64to32, cc_dep1),
+ unop(Iop_64to32, cc_dep2)));
+ }
+ if (isU64(cond_n_op, (ARM64CondGE << 4) | ARM64G_CC_OP_SUB32)) {
+ /* GE after SUB --> test argL >=s argR
+ --> test argR <=s argL */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLE32S, unop(Iop_64to32, cc_dep2),
+ unop(Iop_64to32, cc_dep1)));
+ }
+
+ /* 12, 13 */
+ if (isU64(cond_n_op, (ARM64CondGT << 4) | ARM64G_CC_OP_SUB32)) {
+ /* GT after SUB --> test argL >s argR
+ --> test argR <s argL */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLT32S, unop(Iop_64to32, cc_dep2),
+ unop(Iop_64to32, cc_dep1)));
+ }
+ if (isU64(cond_n_op, (ARM64CondLE << 4) | ARM64G_CC_OP_SUB32)) {
+ /* LE after SUB --> test argL <=s argR */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLE32S, unop(Iop_64to32, cc_dep1),
+ unop(Iop_64to32, cc_dep2)));
+ }
+
+//ZZ /*---------------- SBB ----------------*/
+//ZZ
+//ZZ if (isU32(cond_n_op, (ARMCondHS << 4) | ARMG_CC_OP_SBB)) {
+//ZZ /* This seems to happen a lot in softfloat code, eg __divdf3+140 */
+//ZZ /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
+//ZZ /* HS after SBB (same as C after SBB below)
+//ZZ --> oldC ? (argL >=u argR) : (argL >u argR)
+//ZZ --> oldC ? (argR <=u argL) : (argR <u argL)
+//ZZ */
+//ZZ return
+//ZZ IRExpr_ITE(
+//ZZ binop(Iop_CmpNE32, cc_ndep, mkU32(0)),
+//ZZ /* case oldC != 0 */
+//ZZ unop(Iop_1Uto32, binop(Iop_CmpLE32U, cc_dep2, cc_dep1)),
+//ZZ /* case oldC == 0 */
+//ZZ unop(Iop_1Uto32, binop(Iop_CmpLT32U, cc_dep2, cc_dep1))
+//ZZ );
+//ZZ }
+//ZZ
+//ZZ /*---------------- LOGIC ----------------*/
+//ZZ
+//ZZ if (isU32(cond_n_op, (ARMCondEQ << 4) | ARMG_CC_OP_LOGIC)) {
+//ZZ /* EQ after LOGIC --> test res == 0 */
+//ZZ return unop(Iop_1Uto32,
+//ZZ binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
+//ZZ }
+//ZZ if (isU32(cond_n_op, (ARMCondNE << 4) | ARMG_CC_OP_LOGIC)) {
+//ZZ /* NE after LOGIC --> test res != 0 */
+//ZZ return unop(Iop_1Uto32,
+//ZZ binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
+//ZZ }
+//ZZ
+//ZZ if (isU32(cond_n_op, (ARMCondPL << 4) | ARMG_CC_OP_LOGIC)) {
+//ZZ /* PL after LOGIC --> test (res >> 31) == 0 */
+//ZZ return unop(Iop_1Uto32,
+//ZZ binop(Iop_CmpEQ32,
+//ZZ binop(Iop_Shr32, cc_dep1, mkU8(31)),
+//ZZ mkU32(0)));
+//ZZ }
+//ZZ if (isU32(cond_n_op, (ARMCondMI << 4) | ARMG_CC_OP_LOGIC)) {
+//ZZ /* MI after LOGIC --> test (res >> 31) == 1 */
+//ZZ return unop(Iop_1Uto32,
+//ZZ binop(Iop_CmpEQ32,
+//ZZ binop(Iop_Shr32, cc_dep1, mkU8(31)),
+//ZZ mkU32(1)));
+//ZZ }
+
+ /*---------------- COPY ----------------*/
+
+ if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_COPY)) {
+ /* EQ after COPY --> (cc_dep1 >> ARM64G_CC_SHIFT_Z) & 1 */
+ return binop(Iop_And64,
+ binop(Iop_Shr64, cc_dep1,
+ mkU8(ARM64G_CC_SHIFT_Z)),
+ mkU64(1));
+ }
+ if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_COPY)) {
+ /* NE after COPY --> ((cc_dep1 >> ARM64G_CC_SHIFT_Z) ^ 1) & 1 */
+ return binop(Iop_And64,
+ binop(Iop_Xor64,
+ binop(Iop_Shr64, cc_dep1,
+ mkU8(ARM64G_CC_SHIFT_Z)),
+ mkU64(1)),
+ mkU64(1));
+ }
+
+//ZZ /*----------------- AL -----------------*/
+//ZZ
+//ZZ /* A critically important case for Thumb code.
+//ZZ
+//ZZ What we're trying to spot is the case where cond_n_op is an
+//ZZ expression of the form Or32(..., 0xE0) since that means the
+//ZZ caller is asking for CondAL and we can simply return 1
+//ZZ without caring what the ... part is. This is a potentially
+//ZZ dodgy kludge in that it assumes that the ... part has zeroes
+//ZZ in bits 7:4, so that the result of the Or32 is guaranteed to
+//ZZ be 0xE in bits 7:4. Given that the places where this first
+//ZZ arg are constructed (in guest_arm_toIR.c) are very
+//ZZ constrained, we can get away with this. To make this
+//ZZ guaranteed safe would require to have a new primop, Slice44
+//ZZ or some such, thusly
+//ZZ
+//ZZ Slice44(arg1, arg2) = 0--(24)--0 arg1[7:4] arg2[3:0]
+//ZZ
+//ZZ and we would then look for Slice44(0xE0, ...)
+//ZZ which would give the required safety property.
+//ZZ
+//ZZ It would be infeasibly expensive to scan backwards through
+//ZZ the entire block looking for an assignment to the temp, so
+//ZZ just look at the previous 16 statements. That should find it
+//ZZ if it is an interesting case, as a result of how the
+//ZZ boilerplate guff at the start of each Thumb insn translation
+//ZZ is made.
+//ZZ */
+//ZZ if (cond_n_op->tag == Iex_RdTmp) {
+//ZZ Int j;
+//ZZ IRTemp look_for = cond_n_op->Iex.RdTmp.tmp;
+//ZZ Int limit = n_precedingStmts - 16;
+//ZZ if (limit < 0) limit = 0;
+//ZZ if (0) vex_printf("scanning %d .. %d\n", n_precedingStmts-1, limit);
+//ZZ for (j = n_precedingStmts - 1; j >= limit; j--) {
+//ZZ IRStmt* st = precedingStmts[j];
+//ZZ if (st->tag == Ist_WrTmp
+//ZZ && st->Ist.WrTmp.tmp == look_for
+//ZZ && st->Ist.WrTmp.data->tag == Iex_Binop
+//ZZ && st->Ist.WrTmp.data->Iex.Binop.op == Iop_Or32
+//ZZ && isU32(st->Ist.WrTmp.data->Iex.Binop.arg2, (ARMCondAL << 4)))
+//ZZ return mkU32(1);
+//ZZ }
+//ZZ /* Didn't find any useful binding to the first arg
+//ZZ in the previous 16 stmts. */
+//ZZ }
+ }
+
+//ZZ /* --------- specialising "armg_calculate_flag_c" --------- */
+//ZZ
+//ZZ else
+//ZZ if (vex_streq(function_name, "armg_calculate_flag_c")) {
+//ZZ
+//ZZ /* specialise calls to the "armg_calculate_flag_c" function.
+//ZZ Note that the returned value must be either 0 or 1; nonzero
+//ZZ bits 31:1 are not allowed. In turn, incoming oldV and oldC
+//ZZ values (from the thunk) are assumed to have bits 31:1
+//ZZ clear. */
+//ZZ IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
+//ZZ vassert(arity == 4);
+//ZZ cc_op = args[0]; /* ARMG_CC_OP_* */
+//ZZ cc_dep1 = args[1];
+//ZZ cc_dep2 = args[2];
+//ZZ cc_ndep = args[3];
+//ZZ
+//ZZ if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
+//ZZ /* Thunk args are (result, shco, oldV) */
+//ZZ /* C after LOGIC --> shco */
+//ZZ return cc_dep2;
+//ZZ }
+//ZZ
+//ZZ if (isU32(cc_op, ARMG_CC_OP_SUB)) {
+//ZZ /* Thunk args are (argL, argR, unused) */
+//ZZ /* C after SUB --> argL >=u argR
+//ZZ --> argR <=u argL */
+//ZZ return unop(Iop_1Uto32,
+//ZZ binop(Iop_CmpLE32U, cc_dep2, cc_dep1));
+//ZZ }
+//ZZ
+//ZZ if (isU32(cc_op, ARMG_CC_OP_SBB)) {
+//ZZ /* This happens occasionally in softfloat code, eg __divdf3+140 */
+//ZZ /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
+//ZZ /* C after SBB (same as HS after SBB above)
+//ZZ --> oldC ? (argL >=u argR) : (argL >u argR)
+//ZZ --> oldC ? (argR <=u argL) : (argR <u argL)
+//ZZ */
+//ZZ return
+//ZZ IRExpr_ITE(
+//ZZ binop(Iop_CmpNE32, cc_ndep, mkU32(0)),
+//ZZ /* case oldC != 0 */
+//ZZ unop(Iop_1Uto32, binop(Iop_CmpLE32U, cc_dep2, cc_dep1)),
+//ZZ /* case oldC == 0 */
+//ZZ unop(Iop_1Uto32, binop(Iop_CmpLT32U, cc_dep2, cc_dep1))
+//ZZ );
+//ZZ }
+//ZZ
+//ZZ }
+//ZZ
+//ZZ /* --------- specialising "armg_calculate_flag_v" --------- */
+//ZZ
+//ZZ else
+//ZZ if (vex_streq(function_name, "armg_calculate_flag_v")) {
+//ZZ
+//ZZ /* specialise calls to the "armg_calculate_flag_v" function.
+//ZZ Note that the returned value must be either 0 or 1; nonzero
+//ZZ bits 31:1 are not allowed. In turn, incoming oldV and oldC
+//ZZ values (from the thunk) are assumed to have bits 31:1
+//ZZ clear. */
+//ZZ IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
+//ZZ vassert(arity == 4);
+//ZZ cc_op = args[0]; /* ARMG_CC_OP_* */
+//ZZ cc_dep1 = args[1];
+//ZZ cc_dep2 = args[2];
+//ZZ cc_ndep = args[3];
+//ZZ
+//ZZ if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
+//ZZ /* Thunk args are (result, shco, oldV) */
+//ZZ /* V after LOGIC --> oldV */
+//ZZ return cc_ndep;
+//ZZ }
+//ZZ
+//ZZ if (isU32(cc_op, ARMG_CC_OP_SUB)) {
+//ZZ /* Thunk args are (argL, argR, unused) */
+//ZZ /* V after SUB
+//ZZ --> let res = argL - argR
+//ZZ in ((argL ^ argR) & (argL ^ res)) >> 31
+//ZZ --> ((argL ^ argR) & (argL ^ (argL - argR))) >> 31
+//ZZ */
+//ZZ IRExpr* argL = cc_dep1;
+//ZZ IRExpr* argR = cc_dep2;
+//ZZ return
+//ZZ binop(Iop_Shr32,
+//ZZ binop(Iop_And32,
+//ZZ binop(Iop_Xor32, argL, argR),
+//ZZ binop(Iop_Xor32, argL, binop(Iop_Sub32, argL, argR))
+//ZZ ),
+//ZZ mkU8(31)
+//ZZ );
+//ZZ }
+//ZZ
+//ZZ if (isU32(cc_op, ARMG_CC_OP_SBB)) {
+//ZZ /* This happens occasionally in softfloat code, eg __divdf3+140 */
+//ZZ /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
+//ZZ /* V after SBB
+//ZZ --> let res = argL - argR - (oldC ^ 1)
+//ZZ in (argL ^ argR) & (argL ^ res) & 1
+//ZZ */
+//ZZ return
+//ZZ binop(
+//ZZ Iop_And32,
+//ZZ binop(
+//ZZ Iop_And32,
+//ZZ // argL ^ argR
+//ZZ binop(Iop_Xor32, cc_dep1, cc_dep2),
+//ZZ // argL ^ (argL - argR - (oldC ^ 1))
+//ZZ binop(Iop_Xor32,
+//ZZ cc_dep1,
+//ZZ binop(Iop_Sub32,
+//ZZ binop(Iop_Sub32, cc_dep1, cc_dep2),
+//ZZ binop(Iop_Xor32, cc_ndep, mkU32(1)))
+//ZZ )
+//ZZ ),
+//ZZ mkU32(1)
+//ZZ );
+//ZZ }
+//ZZ
+//ZZ }
+
+# undef unop
+# undef binop
+# undef mkU64
+# undef mkU8
+
+ return NULL;
+}
+
+
+/*----------------------------------------------*/
+/*--- The exported fns .. ---*/
+/*----------------------------------------------*/
+
+//ZZ /* VISIBLE TO LIBVEX CLIENT */
+//ZZ #if 0
+//ZZ void LibVEX_GuestARM_put_flags ( UInt flags_native,
+//ZZ /*OUT*/VexGuestARMState* vex_state )
+//ZZ {
+//ZZ vassert(0); // FIXME
+//ZZ
+//ZZ /* Mask out everything except N Z V C. */
+//ZZ flags_native
+//ZZ &= (ARMG_CC_MASK_N | ARMG_CC_MASK_Z | ARMG_CC_MASK_V | ARMG_CC_MASK_C);
+//ZZ
+//ZZ vex_state->guest_CC_OP = ARMG_CC_OP_COPY;
+//ZZ vex_state->guest_CC_DEP1 = flags_native;
+//ZZ vex_state->guest_CC_DEP2 = 0;
+//ZZ vex_state->guest_CC_NDEP = 0;
+//ZZ }
+//ZZ #endif
+
+/* VISIBLE TO LIBVEX CLIENT */
+ULong LibVEX_GuestARM64_get_nzcv ( /*IN*/const VexGuestARM64State* vex_state )
+{
+ ULong nzcv = 0;
+ // NZCV
+ nzcv |= arm64g_calculate_flags_nzcv(
+ vex_state->guest_CC_OP,
+ vex_state->guest_CC_DEP1,
+ vex_state->guest_CC_DEP2,
+ vex_state->guest_CC_NDEP
+ );
+ vassert(0 == (nzcv & 0xFFFFFFFF0FFFFFFFULL));
+//ZZ // Q
+//ZZ if (vex_state->guest_QFLAG32 > 0)
+//ZZ cpsr |= (1 << 27);
+//ZZ // GE
+//ZZ if (vex_state->guest_GEFLAG0 > 0)
+//ZZ cpsr |= (1 << 16);
+//ZZ if (vex_state->guest_GEFLAG1 > 0)
+//ZZ cpsr |= (1 << 17);
+//ZZ if (vex_state->guest_GEFLAG2 > 0)
+//ZZ cpsr |= (1 << 18);
+//ZZ if (vex_state->guest_GEFLAG3 > 0)
+//ZZ cpsr |= (1 << 19);
+//ZZ // M
+//ZZ cpsr |= (1 << 4); // 0b10000 means user-mode
+//ZZ // J,T J (bit 24) is zero by initialisation above
+//ZZ // T we copy from R15T[0]
+//ZZ if (vex_state->guest_R15T & 1)
+//ZZ cpsr |= (1 << 5);
+//ZZ // ITSTATE we punt on for the time being. Could compute it
+//ZZ // if needed though.
+//ZZ // E, endianness, 0 (littleendian) from initialisation above
+//ZZ // A,I,F disable some async exceptions. Not sure about these.
+//ZZ // Leave as zero for the time being.
+ return nzcv;
+}
+
+/* VISIBLE TO LIBVEX CLIENT */
+void LibVEX_GuestARM64_initialise ( /*OUT*/VexGuestARM64State* vex_state )
+{
+ vex_bzero(vex_state, sizeof(*vex_state));
+//ZZ vex_state->host_EvC_FAILADDR = 0;
+//ZZ vex_state->host_EvC_COUNTER = 0;
+//ZZ
+//ZZ vex_state->guest_R0 = 0;
+//ZZ vex_state->guest_R1 = 0;
+//ZZ vex_state->guest_R2 = 0;
+//ZZ vex_state->guest_R3 = 0;
+//ZZ vex_state->guest_R4 = 0;
+//ZZ vex_state->guest_R5 = 0;
+//ZZ vex_state->guest_R6 = 0;
+//ZZ vex_state->guest_R7 = 0;
+//ZZ vex_state->guest_R8 = 0;
+//ZZ vex_state->guest_R9 = 0;
+//ZZ vex_state->guest_R10 = 0;
+//ZZ vex_state->guest_R11 = 0;
+//ZZ vex_state->guest_R12 = 0;
+//ZZ vex_state->guest_R13 = 0;
+//ZZ vex_state->guest_R14 = 0;
+//ZZ vex_state->guest_R15T = 0; /* NB: implies ARM mode */
+//ZZ
+ vex_state->guest_CC_OP = ARM64G_CC_OP_COPY;
+//ZZ vex_state->guest_CC_DEP1 = 0;
+//ZZ vex_state->guest_CC_DEP2 = 0;
+//ZZ vex_state->guest_CC_NDEP = 0;
+//ZZ vex_state->guest_QFLAG32 = 0;
+//ZZ vex_state->guest_GEFLAG0 = 0;
+//ZZ vex_state->guest_GEFLAG1 = 0;
+//ZZ vex_state->guest_GEFLAG2 = 0;
+//ZZ vex_state->guest_GEFLAG3 = 0;
+//ZZ
+//ZZ vex_state->guest_EMNOTE = EmNote_NONE;
+//ZZ vex_state->guest_TISTART = 0;
+//ZZ vex_state->guest_TILEN = 0;
+//ZZ vex_state->guest_NRADDR = 0;
+//ZZ vex_state->guest_IP_AT_SYSCALL = 0;
+//ZZ
+//ZZ vex_state->guest_D0 = 0;
+//ZZ vex_state->guest_D1 = 0;
+//ZZ vex_state->guest_D2 = 0;
+//ZZ vex_state->guest_D3 = 0;
+//ZZ vex_state->guest_D4 = 0;
+//ZZ vex_state->guest_D5 = 0;
+//ZZ vex_state->guest_D6 = 0;
+//ZZ vex_state->guest_D7 = 0;
+//ZZ vex_state->guest_D8 = 0;
+//ZZ vex_state->guest_D9 = 0;
+//ZZ vex_state->guest_D10 = 0;
+//ZZ vex_state->guest_D11 = 0;
+//ZZ vex_state->guest_D12 = 0;
+//ZZ vex_state->guest_D13 = 0;
+//ZZ vex_state->guest_D14 = 0;
+//ZZ vex_state->guest_D15 = 0;
+//ZZ vex_state->guest_D16 = 0;
+//ZZ vex_state->guest_D17 = 0;
+//ZZ vex_state->guest_D18 = 0;
+//ZZ vex_state->guest_D19 = 0;
+//ZZ vex_state->guest_D20 = 0;
+//ZZ vex_state->guest_D21 = 0;
+//ZZ vex_state->guest_D22 = 0;
+//ZZ vex_state->guest_D23 = 0;
+//ZZ vex_state->guest_D24 = 0;
+//ZZ vex_state->guest_D25 = 0;
+//ZZ vex_state->guest_D26 = 0;
+//ZZ vex_state->guest_D27 = 0;
+//ZZ vex_state->guest_D28 = 0;
+//ZZ vex_state->guest_D29 = 0;
+//ZZ vex_state->guest_D30 = 0;
+//ZZ vex_state->guest_D31 = 0;
+//ZZ
+//ZZ /* ARM encoded; zero is the default as it happens (result flags
+//ZZ (NZCV) cleared, FZ disabled, round to nearest, non-vector mode,
+//ZZ all exns masked, all exn sticky bits cleared). */
+//ZZ vex_state->guest_FPSCR = 0;
+//ZZ
+//ZZ vex_state->guest_TPIDRURO = 0;
+//ZZ
+//ZZ /* Not in a Thumb IT block. */
+//ZZ vex_state->guest_ITSTATE = 0;
+//ZZ
+//ZZ vex_state->padding1 = 0;
+//ZZ vex_state->padding2 = 0;
+//ZZ vex_state->padding3 = 0;
+//ZZ vex_state->padding4 = 0;
+//ZZ vex_state->padding5 = 0;
+}
+
+
+/*-----------------------------------------------------------*/
+/*--- Describing the arm guest state, for the benefit ---*/
+/*--- of iropt and instrumenters. ---*/
+/*-----------------------------------------------------------*/
+
+/* Figure out if any part of the guest state contained in minoff
+ .. maxoff requires precise memory exceptions. If in doubt return
+ True (but this generates significantly slower code).
+
+ We enforce precise exns for guest SP, PC, 29(FP), 30(LR).
+ That might be overkill (for 29 and 30); I don't know.
+*/
+Bool guest_arm64_state_requires_precise_mem_exns ( Int minoff,
+ Int maxoff)
+{
+ Int sp_min = offsetof(VexGuestARM64State, guest_SP);
+ Int sp_max = sp_min + 8 - 1...
[truncated message content] |