|
From: <sv...@va...> - 2015-04-01 18:50:07
|
Author: sewardj
Date: Wed Apr 1 19:49:55 2015
New Revision: 3114
Log:
Catchup merge with trunk: merge in all trunk changes up to and
including r3113.
Added:
branches/NCODE/priv/multiarch_main_main.c
- copied unchanged from r3113, trunk/priv/multiarch_main_main.c
Modified:
branches/NCODE/ (props changed)
branches/NCODE/priv/guest_amd64_defs.h
branches/NCODE/priv/guest_amd64_helpers.c
branches/NCODE/priv/guest_amd64_toIR.c
branches/NCODE/priv/guest_arm64_defs.h
branches/NCODE/priv/guest_arm64_helpers.c
branches/NCODE/priv/guest_arm64_toIR.c
branches/NCODE/priv/guest_arm_defs.h
branches/NCODE/priv/guest_arm_helpers.c
branches/NCODE/priv/guest_generic_bb_to_IR.c
branches/NCODE/priv/guest_generic_bb_to_IR.h
branches/NCODE/priv/guest_mips_defs.h
branches/NCODE/priv/guest_mips_helpers.c
branches/NCODE/priv/guest_mips_toIR.c
branches/NCODE/priv/guest_ppc_defs.h
branches/NCODE/priv/guest_ppc_helpers.c
branches/NCODE/priv/guest_ppc_toIR.c
branches/NCODE/priv/guest_s390_defs.h
branches/NCODE/priv/guest_s390_helpers.c
branches/NCODE/priv/guest_s390_toIR.c
branches/NCODE/priv/guest_x86_defs.h
branches/NCODE/priv/guest_x86_helpers.c
branches/NCODE/priv/guest_x86_toIR.c
branches/NCODE/priv/host_amd64_defs.c
branches/NCODE/priv/host_amd64_defs.h
branches/NCODE/priv/host_amd64_isel.c
branches/NCODE/priv/host_arm64_defs.c
branches/NCODE/priv/host_arm64_defs.h
branches/NCODE/priv/host_arm64_isel.c
branches/NCODE/priv/host_arm_defs.c
branches/NCODE/priv/host_arm_defs.h
branches/NCODE/priv/host_arm_isel.c
branches/NCODE/priv/host_generic_reg_alloc2.c
branches/NCODE/priv/host_generic_regs.c
branches/NCODE/priv/host_generic_regs.h
branches/NCODE/priv/host_generic_simd128.c
branches/NCODE/priv/host_mips_defs.c
branches/NCODE/priv/host_mips_defs.h
branches/NCODE/priv/host_mips_isel.c
branches/NCODE/priv/host_ppc_defs.c
branches/NCODE/priv/host_ppc_defs.h
branches/NCODE/priv/host_ppc_isel.c
branches/NCODE/priv/host_s390_defs.c
branches/NCODE/priv/host_s390_defs.h
branches/NCODE/priv/host_s390_isel.c
branches/NCODE/priv/host_x86_defs.c
branches/NCODE/priv/host_x86_defs.h
branches/NCODE/priv/host_x86_isel.c
branches/NCODE/priv/ir_defs.c
branches/NCODE/priv/ir_opt.c
branches/NCODE/priv/ir_opt.h
branches/NCODE/priv/main_main.c
branches/NCODE/priv/main_util.c
branches/NCODE/priv/main_util.h
branches/NCODE/priv/s390_disasm.c
branches/NCODE/pub/libvex.h
branches/NCODE/pub/libvex_ir.h
branches/NCODE/useful/Makefile-vex
branches/NCODE/useful/test_main.c
Modified: branches/NCODE/priv/guest_amd64_defs.h
==============================================================================
--- branches/NCODE/priv/guest_amd64_defs.h (original)
+++ branches/NCODE/priv/guest_amd64_defs.h Wed Apr 1 19:49:55 2015
@@ -74,7 +74,8 @@
precise memory exceptions. This is logically part of the guest
state description. */
extern
-Bool guest_amd64_state_requires_precise_mem_exns ( Int, Int );
+Bool guest_amd64_state_requires_precise_mem_exns ( Int, Int,
+ VexRegisterUpdates );
extern
VexGuestLayout amd64guest_layout;
Modified: branches/NCODE/priv/guest_amd64_helpers.c
==============================================================================
--- branches/NCODE/priv/guest_amd64_helpers.c (original)
+++ branches/NCODE/priv/guest_amd64_helpers.c Wed Apr 1 19:49:55 2015
@@ -151,7 +151,7 @@
static inline Long lshift ( Long x, Int n )
{
if (n >= 0)
- return x << n;
+ return (ULong)x << n;
else
return x >> (-n);
}
@@ -190,8 +190,8 @@
#define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
- { Long cf, pf, af, zf, sf, of; \
- Long argL, argR, res; \
+ { ULong cf, pf, af, zf, sf, of; \
+ ULong argL, argR, res; \
argL = CC_DEP1; \
argR = CC_DEP2; \
res = argL + argR; \
@@ -211,8 +211,8 @@
#define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
- { Long cf, pf, af, zf, sf, of; \
- Long argL, argR, res; \
+ { ULong cf, pf, af, zf, sf, of; \
+ ULong argL, argR, res; \
argL = CC_DEP1; \
argR = CC_DEP2; \
res = argL - argR; \
@@ -232,8 +232,8 @@
#define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
- { Long cf, pf, af, zf, sf, of; \
- Long argL, argR, oldC, res; \
+ { ULong cf, pf, af, zf, sf, of; \
+ ULong argL, argR, oldC, res; \
oldC = CC_NDEP & AMD64G_CC_MASK_C; \
argL = CC_DEP1; \
argR = CC_DEP2 ^ oldC; \
@@ -257,8 +257,8 @@
#define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
- { Long cf, pf, af, zf, sf, of; \
- Long argL, argR, oldC, res; \
+ { ULong cf, pf, af, zf, sf, of; \
+ ULong argL, argR, oldC, res; \
oldC = CC_NDEP & AMD64G_CC_MASK_C; \
argL = CC_DEP1; \
argR = CC_DEP2 ^ oldC; \
@@ -282,7 +282,7 @@
#define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
- { Long cf, pf, af, zf, sf, of; \
+ { ULong cf, pf, af, zf, sf, of; \
cf = 0; \
pf = parity_table[(UChar)CC_DEP1]; \
af = 0; \
@@ -298,8 +298,8 @@
#define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
- { Long cf, pf, af, zf, sf, of; \
- Long argL, argR, res; \
+ { ULong cf, pf, af, zf, sf, of; \
+ ULong argL, argR, res; \
res = CC_DEP1; \
argL = res - 1; \
argR = 1; \
@@ -318,8 +318,8 @@
#define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
- { Long cf, pf, af, zf, sf, of; \
- Long argL, argR, res; \
+ { ULong cf, pf, af, zf, sf, of; \
+ ULong argL, argR, res; \
res = CC_DEP1; \
argL = res + 1; \
argR = 1; \
@@ -339,7 +339,7 @@
#define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
- { Long cf, pf, af, zf, sf, of; \
+ { ULong cf, pf, af, zf, sf, of; \
cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \
pf = parity_table[(UChar)CC_DEP1]; \
af = 0; /* undefined */ \
@@ -357,7 +357,7 @@
#define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
- { Long cf, pf, af, zf, sf, of; \
+ { ULong cf, pf, af, zf, sf, of; \
cf = CC_DEP2 & 1; \
pf = parity_table[(UChar)CC_DEP1]; \
af = 0; /* undefined */ \
@@ -377,7 +377,7 @@
#define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
- { Long fl \
+ { ULong fl \
= (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
| (AMD64G_CC_MASK_C & CC_DEP1) \
| (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
@@ -394,7 +394,7 @@
#define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
- { Long fl \
+ { ULong fl \
= (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
| (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
| (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
@@ -410,7 +410,7 @@
DATA_U2TYPE, NARROWto2U) \
{ \
PREAMBLE(DATA_BITS); \
- { Long cf, pf, af, zf, sf, of; \
+ { ULong cf, pf, af, zf, sf, of; \
DATA_UTYPE hi; \
DATA_UTYPE lo \
= NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
@@ -436,11 +436,11 @@
DATA_S2TYPE, NARROWto2S) \
{ \
PREAMBLE(DATA_BITS); \
- { Long cf, pf, af, zf, sf, of; \
+ { ULong cf, pf, af, zf, sf, of; \
DATA_STYPE hi; \
DATA_STYPE lo \
- = NARROWtoS( ((DATA_STYPE)CC_DEP1) \
- * ((DATA_STYPE)CC_DEP2) ); \
+ = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \
+ * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \
DATA_S2TYPE rr \
= NARROWto2S( \
((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
@@ -461,7 +461,7 @@
#define ACTIONS_UMULQ \
{ \
PREAMBLE(64); \
- { Long cf, pf, af, zf, sf, of; \
+ { ULong cf, pf, af, zf, sf, of; \
ULong lo, hi; \
mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \
cf = (hi != 0); \
@@ -479,7 +479,7 @@
#define ACTIONS_SMULQ \
{ \
PREAMBLE(64); \
- { Long cf, pf, af, zf, sf, of; \
+ { ULong cf, pf, af, zf, sf, of; \
Long lo, hi; \
mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \
cf = (hi != (lo >>/*s*/ (64-1))); \
@@ -497,7 +497,7 @@
#define ACTIONS_ANDN(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
- { Long cf, pf, af, zf, sf, of; \
+ { ULong cf, pf, af, zf, sf, of; \
cf = 0; \
pf = 0; \
af = 0; \
@@ -513,7 +513,7 @@
#define ACTIONS_BLSI(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
- { Long cf, pf, af, zf, sf, of; \
+ { ULong cf, pf, af, zf, sf, of; \
cf = ((DATA_UTYPE)CC_DEP2 != 0); \
pf = 0; \
af = 0; \
@@ -545,7 +545,7 @@
#define ACTIONS_BLSR(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
- { Long cf, pf, af, zf, sf, of; \
+ { ULong cf, pf, af, zf, sf, of; \
cf = ((DATA_UTYPE)CC_DEP2 == 0); \
pf = 0; \
af = 0; \
@@ -1288,6 +1288,7 @@
/*---------------- SUBW ----------------*/
+ /* 4, 5 */
if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) {
/* word sub/cmp, then Z --> test dst==src */
return unop(Iop_1Uto64,
@@ -1303,6 +1304,7 @@
unop(Iop_64to16,cc_dep2)));
}
+ /* 6, */
if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondBE)) {
/* word sub/cmp, then BE (unsigned less than or equal)
--> test dst <=u src */
@@ -1312,6 +1314,7 @@
binop(Iop_Shl64, cc_dep2, mkU8(48))));
}
+ /* 14, */
if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) {
/* word sub/cmp, then LE (signed less than or equal)
--> test dst <=s src */
@@ -1324,6 +1327,26 @@
/*---------------- SUBB ----------------*/
+ /* 2, 3 */
+ if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondB)) {
+ /* byte sub/cmp, then B (unsigned less than)
+ --> test dst <u src */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLT64U,
+ binop(Iop_And64, cc_dep1, mkU64(0xFF)),
+ binop(Iop_And64, cc_dep2, mkU64(0xFF))));
+ }
+ if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNB)) {
+ /* byte sub/cmp, then NB (unsigned greater than or equal)
+ --> test src <=u dst */
+ /* Note, args are opposite way round from the usual */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLE64U,
+ binop(Iop_And64, cc_dep2, mkU64(0xFF)),
+ binop(Iop_And64, cc_dep1, mkU64(0xFF))));
+ }
+
+ /* 4, 5 */
if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) {
/* byte sub/cmp, then Z --> test dst==src */
return unop(Iop_1Uto64,
@@ -1339,6 +1362,7 @@
unop(Iop_64to8,cc_dep2)));
}
+ /* 6, */
if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondBE)) {
/* byte sub/cmp, then BE (unsigned less than or equal)
--> test dst <=u src */
@@ -1348,6 +1372,7 @@
binop(Iop_And64, cc_dep2, mkU64(0xFF))));
}
+ /* 8, 9 */
if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS)
&& isU64(cc_dep2, 0)) {
/* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
@@ -3963,8 +3988,9 @@
Only %RSP is needed in mode VexRegUpdSpAtMemAccess.
*/
-Bool guest_amd64_state_requires_precise_mem_exns ( Int minoff,
- Int maxoff)
+Bool guest_amd64_state_requires_precise_mem_exns (
+ Int minoff, Int maxoff, VexRegisterUpdates pxControl
+ )
{
Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP);
Int rbp_max = rbp_min + 8 - 1;
@@ -3975,7 +4001,7 @@
if (maxoff < rsp_min || minoff > rsp_max) {
/* no overlap with rsp */
- if (vex_control.iropt_register_updates == VexRegUpdSpAtMemAccess)
+ if (pxControl == VexRegUpdSpAtMemAccess)
return False; // We only need to check stack pointer.
} else {
return True;
Modified: branches/NCODE/priv/guest_amd64_toIR.c
==============================================================================
--- branches/NCODE/priv/guest_amd64_toIR.c (original)
+++ branches/NCODE/priv/guest_amd64_toIR.c Wed Apr 1 19:49:55 2015
@@ -474,17 +474,17 @@
static ULong extend_s_8to64 ( UChar x )
{
- return (ULong)((((Long)x) << 56) >> 56);
+ return (ULong)((Long)(((ULong)x) << 56) >> 56);
}
static ULong extend_s_16to64 ( UShort x )
{
- return (ULong)((((Long)x) << 48) >> 48);
+ return (ULong)((Long)(((ULong)x) << 48) >> 48);
}
static ULong extend_s_32to64 ( UInt x )
{
- return (ULong)((((Long)x) << 32) >> 32);
+ return (ULong)((Long)(((ULong)x) << 32) >> 32);
}
/* Figure out whether the mod and rm parts of a modRM byte refer to a
@@ -27235,10 +27235,11 @@
}
-/* Masked load. */
-static ULong dis_VMASKMOV_load ( Bool *uses_vvvv, const VexAbiInfo* vbi,
- Prefix pfx, Long delta,
- const HChar* opname, Bool isYMM, IRType ty )
+/* Masked load or masked store. */
+static ULong dis_VMASKMOV ( Bool *uses_vvvv, const VexAbiInfo* vbi,
+ Prefix pfx, Long delta,
+ const HChar* opname, Bool isYMM, IRType ty,
+ Bool isLoad )
{
HChar dis_buf[50];
Int alen, i;
@@ -27246,51 +27247,59 @@
UChar modrm = getUChar(delta);
UInt rG = gregOfRexRM(pfx,modrm);
UInt rV = getVexNvvvv(pfx);
- IRTemp res[8], cond;
+
addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
- if (isYMM) {
+ delta += alen;
+
+ /**/ if (isLoad && isYMM) {
DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
- } else {
+ }
+ else if (isLoad && !isYMM) {
DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
}
- delta += alen;
- for (i = 0; i < 2 * (isYMM ? 2 : 1) * (ty == Ity_I32 ? 2 : 1); i++) {
- res[i] = newTemp(ty);
- cond = newTemp(Ity_I1);
- assign( cond,
- binop(ty == Ity_I32 ? Iop_CmpLT32S : Iop_CmpLT64S,
- ty == Ity_I32 ? getYMMRegLane32( rV, i )
- : getYMMRegLane64( rV, i ),
- mkU(ty, 0) ));
- assign( res[i],
- IRExpr_ITE(
- mkexpr(cond),
- loadLE(ty, IRExpr_ITE(
- mkexpr(cond),
- binop(Iop_Add64, mkexpr(addr),
- mkU64(i*(ty == Ity_I32 ? 4 : 8))),
- getIReg64(R_RSP)
- )
- ),
- mkU(ty, 0)
- )
- );
+ else if (!isLoad && isYMM) {
+ DIP("%s %s,%s,%s\n", opname, nameYMMReg(rG), nameYMMReg(rV), dis_buf );
}
- switch (ty) {
- case Ity_I32:
- for (i = 0; i < 8; i++)
- putYMMRegLane32( rG, i, (i < 4 || isYMM)
- ? mkexpr(res[i]) : mkU32(0) );
- break;
- case Ity_I64:
- for (i = 0; i < 4; i++)
- putYMMRegLane64( rG, i, (i < 2 || isYMM)
- ? mkexpr(res[i]) : mkU64(0) );
- break;
- default: vassert(0);
+ else {
+ vassert(!isLoad && !isYMM);
+ DIP("%s %s,%s,%s\n", opname, nameXMMReg(rG), nameXMMReg(rV), dis_buf );
}
+ vassert(ty == Ity_I32 || ty == Ity_I64);
+ Bool laneIs32 = ty == Ity_I32;
+
+ Int nLanes = (isYMM ? 2 : 1) * (laneIs32 ? 4 : 2);
+
+ for (i = 0; i < nLanes; i++) {
+ IRExpr* shAmt = laneIs32 ? mkU8(31) : mkU8(63);
+ IRExpr* one = laneIs32 ? mkU32(1) : mkU64(1);
+ IROp opSHR = laneIs32 ? Iop_Shr32 : Iop_Shr64;
+ IROp opEQ = laneIs32 ? Iop_CmpEQ32 : Iop_CmpEQ64;
+ IRExpr* lane = (laneIs32 ? getYMMRegLane32 : getYMMRegLane64)( rV, i );
+
+ IRTemp cond = newTemp(Ity_I1);
+ assign(cond, binop(opEQ, binop(opSHR, lane, shAmt), one));
+
+ IRTemp data = newTemp(ty);
+ IRExpr* ea = binop(Iop_Add64, mkexpr(addr),
+ mkU64(i * (laneIs32 ? 4 : 8)));
+ if (isLoad) {
+ stmt(
+ IRStmt_LoadG(
+ Iend_LE, laneIs32 ? ILGop_Ident32 : ILGop_Ident64,
+ data, ea, laneIs32 ? mkU32(0) : mkU64(0), mkexpr(cond)
+ ));
+ (laneIs32 ? putYMMRegLane32 : putYMMRegLane64)( rG, i, mkexpr(data) );
+ } else {
+ assign(data, (laneIs32 ? getYMMRegLane32 : getYMMRegLane64)( rG, i ));
+ stmt( IRStmt_StoreG(Iend_LE, ea, mkexpr(data), mkexpr(cond)) );
+ }
+ }
+
+ if (isLoad && !isYMM)
+ putYMMRegLane128( rG, 1, mkV128(0) );
+
*uses_vvvv = True;
return delta;
}
@@ -28200,35 +28209,77 @@
break;
case 0x2C:
- /* VMASKMOVPS m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 2C /r */
+ /* VMASKMOVPS m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2C /r */
if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
+ && 0==getRexW(pfx)/*W0*/
&& !epartIsReg(getUChar(delta))) {
- delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
- /*!isYMM*/False, Ity_I32 );
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
+ /*!isYMM*/False, Ity_I32, /*isLoad*/True );
goto decode_success;
}
- /* VMASKMOVPS m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 2C /r */
+ /* VMASKMOVPS m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2C /r */
if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
+ && 0==getRexW(pfx)/*W0*/
&& !epartIsReg(getUChar(delta))) {
- delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
- /*isYMM*/True, Ity_I32 );
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
+ /*isYMM*/True, Ity_I32, /*isLoad*/True );
goto decode_success;
}
break;
case 0x2D:
- /* VMASKMOVPD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 2D /r */
+ /* VMASKMOVPD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2D /r */
+ if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
+ && 0==getRexW(pfx)/*W0*/
+ && !epartIsReg(getUChar(delta))) {
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
+ /*!isYMM*/False, Ity_I64, /*isLoad*/True );
+ goto decode_success;
+ }
+ /* VMASKMOVPD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2D /r */
+ if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
+ && 0==getRexW(pfx)/*W0*/
+ && !epartIsReg(getUChar(delta))) {
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
+ /*isYMM*/True, Ity_I64, /*isLoad*/True );
+ goto decode_success;
+ }
+ break;
+
+ case 0x2E:
+ /* VMASKMOVPS xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2E /r */
+ if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
+ && 0==getRexW(pfx)/*W0*/
+ && !epartIsReg(getUChar(delta))) {
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
+ /*!isYMM*/False, Ity_I32, /*!isLoad*/False );
+ goto decode_success;
+ }
+ /* VMASKMOVPS ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2E /r */
+ if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
+ && 0==getRexW(pfx)/*W0*/
+ && !epartIsReg(getUChar(delta))) {
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
+ /*isYMM*/True, Ity_I32, /*!isLoad*/False );
+ goto decode_success;
+ }
+ break;
+
+ case 0x2F:
+ /* VMASKMOVPD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2F /r */
if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
+ && 0==getRexW(pfx)/*W0*/
&& !epartIsReg(getUChar(delta))) {
- delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
- /*!isYMM*/False, Ity_I64 );
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
+ /*!isYMM*/False, Ity_I64, /*!isLoad*/False );
goto decode_success;
}
- /* VMASKMOVPD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 2D /r */
+ /* VMASKMOVPD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2F /r */
if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
+ && 0==getRexW(pfx)/*W0*/
&& !epartIsReg(getUChar(delta))) {
- delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
- /*isYMM*/True, Ity_I64 );
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
+ /*isYMM*/True, Ity_I64, /*!isLoad*/False );
goto decode_success;
}
break;
@@ -28789,29 +28840,60 @@
/* VPMASKMOVD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 8C /r */
if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
&& 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
- delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
- /*!isYMM*/False, Ity_I32 );
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
+ /*!isYMM*/False, Ity_I32, /*isLoad*/True );
goto decode_success;
}
/* VPMASKMOVD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 8C /r */
if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
&& 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
- delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
- /*isYMM*/True, Ity_I32 );
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
+ /*isYMM*/True, Ity_I32, /*isLoad*/True );
goto decode_success;
}
/* VPMASKMOVQ m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 8C /r */
if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
&& 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
- delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
- /*!isYMM*/False, Ity_I64 );
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
+ /*!isYMM*/False, Ity_I64, /*isLoad*/True );
goto decode_success;
}
/* VPMASKMOVQ m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 8C /r */
if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
&& 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
- delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
- /*isYMM*/True, Ity_I64 );
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
+ /*isYMM*/True, Ity_I64, /*isLoad*/True );
+ goto decode_success;
+ }
+ break;
+
+ case 0x8E:
+ /* VPMASKMOVD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 8E /r */
+ if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
+ && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
+ /*!isYMM*/False, Ity_I32, /*!isLoad*/False );
+ goto decode_success;
+ }
+ /* VPMASKMOVD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 8E /r */
+ if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
+ && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
+ /*isYMM*/True, Ity_I32, /*!isLoad*/False );
+ goto decode_success;
+ }
+ /* VPMASKMOVQ xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W1 8E /r */
+ if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
+ && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
+ /*!isYMM*/False, Ity_I64, /*!isLoad*/False );
+ goto decode_success;
+ }
+ /* VPMASKMOVQ ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W1 8E /r */
+ if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
+ && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
+ /*isYMM*/True, Ity_I64, /*!isLoad*/False );
goto decode_success;
}
break;
Modified: branches/NCODE/priv/guest_arm64_defs.h
==============================================================================
--- branches/NCODE/priv/guest_arm64_defs.h (original)
+++ branches/NCODE/priv/guest_arm64_defs.h Wed Apr 1 19:49:55 2015
@@ -64,7 +64,8 @@
precise memory exceptions. This is logically part of the guest
state description. */
extern
-Bool guest_arm64_state_requires_precise_mem_exns ( Int, Int );
+Bool guest_arm64_state_requires_precise_mem_exns ( Int, Int,
+ VexRegisterUpdates );
extern
VexGuestLayout arm64Guest_layout;
Modified: branches/NCODE/priv/guest_arm64_helpers.c
==============================================================================
--- branches/NCODE/priv/guest_arm64_helpers.c (original)
+++ branches/NCODE/priv/guest_arm64_helpers.c Wed Apr 1 19:49:55 2015
@@ -1317,8 +1317,9 @@
We enforce precise exns for guest SP, PC, 29(FP), 30(LR).
That might be overkill (for 29 and 30); I don't know.
*/
-Bool guest_arm64_state_requires_precise_mem_exns ( Int minoff,
- Int maxoff)
+Bool guest_arm64_state_requires_precise_mem_exns (
+ Int minoff, Int maxoff, VexRegisterUpdates pxControl
+ )
{
Int xsp_min = offsetof(VexGuestARM64State, guest_XSP);
Int xsp_max = xsp_min + 8 - 1;
@@ -1327,7 +1328,7 @@
if (maxoff < xsp_min || minoff > xsp_max) {
/* no overlap with xsp */
- if (vex_control.iropt_register_updates == VexRegUpdSpAtMemAccess)
+ if (pxControl == VexRegUpdSpAtMemAccess)
return False; // We only need to check stack pointer.
} else {
return True;
Modified: branches/NCODE/priv/guest_arm64_toIR.c
==============================================================================
--- branches/NCODE/priv/guest_arm64_toIR.c (original)
+++ branches/NCODE/priv/guest_arm64_toIR.c Wed Apr 1 19:49:55 2015
@@ -39,10 +39,15 @@
Both should be fixed. They behave incorrectly in the presence of
NaNs.
+ FMULX is treated the same as FMUL. That's also not correct.
+
* Floating multiply-add (etc) insns. Are split into a multiply and
an add, and so suffer double rounding and hence sometimes the
least significant mantissa bit is incorrect. Fix: use the IR
multiply-add IROps instead.
+
+ * FRINTA, FRINTN are kludged .. they just round to nearest. No special
+ handling for the "ties" case. FRINTX might be dubious too.
*/
/* "Special" instructions.
@@ -1411,7 +1416,7 @@
UInt laneSzB = 0;
switch (laneTy) {
case Ity_I8: laneSzB = 1; break;
- case Ity_I16: laneSzB = 2; break;
+ case Ity_F16: case Ity_I16: laneSzB = 2; break;
case Ity_F32: case Ity_I32: laneSzB = 4; break;
case Ity_F64: case Ity_I64: laneSzB = 8; break;
case Ity_V128: laneSzB = 16; break;
@@ -1431,7 +1436,7 @@
Int off = offsetQRegLane(qregNo, ty, 0);
switch (ty) {
case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
- case Ity_F32: case Ity_F64: case Ity_V128:
+ case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128:
break;
default:
vassert(0); // Other cases are probably invalid
@@ -1445,7 +1450,7 @@
Int off = offsetQRegLane(qregNo, ty, 0);
switch (ty) {
case Ity_I8:
- case Ity_I16:
+ case Ity_F16: case Ity_I16:
case Ity_I32: case Ity_I64:
case Ity_F32: case Ity_F64: case Ity_V128:
break;
@@ -1532,7 +1537,7 @@
switch (laneTy) {
case Ity_F64: case Ity_I64:
case Ity_I32: case Ity_F32:
- case Ity_I16:
+ case Ity_I16: case Ity_F16:
case Ity_I8:
break;
default:
@@ -1547,7 +1552,7 @@
Int off = offsetQRegLane(qregNo, laneTy, laneNo);
switch (laneTy) {
case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
- case Ity_F64: case Ity_F32:
+ case Ity_F64: case Ity_F32: case Ity_F16:
break;
default:
vassert(0); // Other cases are ATC
@@ -7292,6 +7297,7 @@
assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
return res;
}
+ case Iop_Max32Fx4: case Iop_Min32Fx4:
case Iop_Min32Sx4: case Iop_Min32Ux4:
case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
IRTemp x3210 = src;
@@ -8480,7 +8486,7 @@
: mkexpr(tN1));
IRTemp res = math_FOLDV(tN2, op);
if (res == IRTemp_INVALID)
- return False; /* means math_MINMAXV
+ return False; /* means math_FOLDV
doesn't handle this case yet */
putQReg128(dd, mkexpr(res));
const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
@@ -8491,6 +8497,26 @@
return True;
}
+ if ((size == X00 || size == X10)
+ && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
+ /* -------- 0,00,01100: FMAXMNV s_4s -------- */
+ /* -------- 0,10,01100: FMINMNV s_4s -------- */
+ /* -------- 1,00,01111: FMAXV s_4s -------- */
+ /* -------- 1,10,01111: FMINV s_4s -------- */
+ /* FMAXNM, FMINNM: FIXME -- KLUDGED */
+ if (bitQ == 0) return False; // Only 4s is allowed
+ Bool isMIN = (size & 2) == 2;
+ Bool isNM = opcode == BITS5(0,1,1,0,0);
+ IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(2);
+ IRTemp src = newTempV128();
+ assign(src, getQReg128(nn));
+ IRTemp res = math_FOLDV(src, opMXX);
+ putQReg128(dd, mkexpr(res));
+ DIP("%s%sv s%u, %u.4s\n",
+ isMIN ? "fmin" : "fmax", isNM ? "nm" : "", dd, nn);
+ return True;
+ }
+
# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
return False;
# undef INSN
@@ -8849,7 +8875,9 @@
case BITS5(0,1,1,1,0):
ok = True; isMOV = True; break;
- /* FMOV (vector, immediate, single precision) */
+ /* -------- x,0,1111 FMOV (vector, immediate, F32) -------- */
+ case BITS5(0,1,1,1,1): // 0:1111
+ ok = True; isFMOV = True; break;
/* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
/* -------- x,1,0010 MVNI 32-bit shifted imm -------- */
@@ -8887,7 +8915,7 @@
case BITS5(1,1,1,1,0):
ok = True; isMOV = True; break;
- /* -------- 1,1,1111 FMOV (vector, immediate) -------- */
+ /* -------- 1,1,1111 FMOV (vector, immediate, F64) -------- */
case BITS5(1,1,1,1,1): // 1:1111
ok = bitQ == 1; isFMOV = True; break;
@@ -9054,6 +9082,33 @@
return True;
}
+ if (bitU == 1
+ && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
+ /* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */
+ /* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */
+ /* -------- 1,0x,01111 FMAXP d_2d, s_2s -------- */
+ /* -------- 1,1x,01111 FMINP d_2d, s_2s -------- */
+ /* FMAXNM, FMINNM: FIXME -- KLUDGED */
+ Bool isD = (sz & 1) == 1;
+ Bool isMIN = (sz & 2) == 2;
+ Bool isNM = opcode == BITS5(0,1,1,0,0);
+ IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
+ IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
+ IRTemp src = newTempV128();
+ IRTemp argL = newTempV128();
+ IRTemp argR = newTempV128();
+ assign(src, getQReg128(nn));
+ assign(argL, unop(opZHI, mkexpr(src)));
+ assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
+ mkU8(isD ? 8 : 4))));
+ putQReg128(dd, unop(opZHI,
+ binop(opMXX, mkexpr(argL), mkexpr(argR))));
+ HChar c = isD ? 'd' : 's';
+ DIP("%s%sp %c%u, v%u.2%c\n",
+ isMIN ? "fmin" : "fmax", isNM ? "nm" : "", c, dd, nn, c);
+ return True;
+ }
+
return False;
# undef INSN
}
@@ -9579,6 +9634,89 @@
return True;
}
+ if (bitU == 0 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
+ /* -------- 0,0x,11011 FMULX d_d_d, s_s_s -------- */
+ // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
+ IRType ity = size == X01 ? Ity_F64 : Ity_F32;
+ IRTemp res = newTemp(ity);
+ assign(res, triop(mkMULF(ity),
+ mkexpr(mk_get_IR_rounding_mode()),
+ getQRegLO(nn,ity), getQRegLO(mm,ity)));
+ putQReg128(dd, mkV128(0x0000));
+ putQRegLO(dd, mkexpr(res));
+ DIP("fmulx %s, %s, %s\n",
+ nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
+ return True;
+ }
+
+ if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
+ /* -------- 0,0x,11100 FCMEQ d_d_d, s_s_s -------- */
+ /* -------- 1,0x,11100 FCMGE d_d_d, s_s_s -------- */
+ Bool isD = size == X01;
+ IRType ity = isD ? Ity_F64 : Ity_F32;
+ Bool isGE = bitU == 1;
+ IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
+ : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
+ IRTemp res = newTempV128();
+ assign(res, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
+ : binop(opCMP, getQReg128(nn), getQReg128(mm)));
+ putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
+ mkexpr(res))));
+ DIP("%s %s, %s, %s\n", isGE ? "fcmge" : "fcmeq",
+ nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
+ return True;
+ }
+
+ if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
+ /* -------- 1,1x,11100 FCMGT d_d_d, s_s_s -------- */
+ Bool isD = size == X11;
+ IRType ity = isD ? Ity_F64 : Ity_F32;
+ IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
+ IRTemp res = newTempV128();
+ assign(res, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
+ putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
+ mkexpr(res))));
+ DIP("%s %s, %s, %s\n", "fcmgt",
+ nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
+ return True;
+ }
+
+ if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
+ /* -------- 1,0x,11101 FACGE d_d_d, s_s_s -------- */
+ /* -------- 1,1x,11101 FACGT d_d_d, s_s_s -------- */
+ Bool isD = (size & 1) == 1;
+ IRType ity = isD ? Ity_F64 : Ity_F32;
+ Bool isGT = (size & 2) == 2;
+ IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
+ : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
+ IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
+ IRTemp res = newTempV128();
+ assign(res, binop(opCMP, unop(opABS, getQReg128(mm)),
+ unop(opABS, getQReg128(nn)))); // swapd
+ putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
+ mkexpr(res))));
+ DIP("%s %s, %s, %s\n", isGT ? "facgt" : "facge",
+ nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
+ return True;
+ }
+
+ if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
+ /* -------- 0,0x,11111: FRECPS d_d_d, s_s_s -------- */
+ /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */
+ Bool isSQRT = (size & 2) == 2;
+ Bool isD = (size & 1) == 1;
+ IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
+ : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
+ IRTemp res = newTempV128();
+ assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
+ putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
+ mkexpr(res))));
+ HChar c = isD ? 'd' : 's';
+ DIP("%s %c%u, %c%u, %c%u\n", isSQRT ? "frsqrts" : "frecps",
+ c, dd, c, nn, c, mm);
+ return True;
+ }
+
return False;
# undef INSN
}
@@ -9700,6 +9838,48 @@
return True;
}
+ UInt ix = 0; /*INVALID*/
+ if (size >= X10) {
+ switch (opcode) {
+ case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
+ case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
+ case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
+ default: break;
+ }
+ }
+ if (ix > 0) {
+ /* -------- 0,1x,01100 FCMGT d_d_#0.0, s_s_#0.0 (ix 1) -------- */
+ /* -------- 0,1x,01101 FCMEQ d_d_#0.0, s_s_#0.0 (ix 2) -------- */
+ /* -------- 0,1x,01110 FCMLT d_d_#0.0, s_s_#0.0 (ix 3) -------- */
+ /* -------- 1,1x,01100 FCMGE d_d_#0.0, s_s_#0.0 (ix 4) -------- */
+ /* -------- 1,1x,01101 FCMLE d_d_#0.0, s_s_#0.0 (ix 5) -------- */
+ Bool isD = size == X11;
+ IRType ity = isD ? Ity_F64 : Ity_F32;
+ IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
+ IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
+ IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
+ IROp opCmp = Iop_INVALID;
+ Bool swap = False;
+ const HChar* nm = "??";
+ switch (ix) {
+ case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
+ case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
+ case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
+ case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
+ case 5: nm = "fcmle"; opCmp = opCmpLE; break;
+ default: vassert(0);
+ }
+ IRExpr* zero = mkV128(0x0000);
+ IRTemp res = newTempV128();
+ assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
+ : binop(opCmp, getQReg128(nn), zero));
+ putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
+ mkexpr(res))));
+
+ DIP("%s %s, %s, #0.0\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
+ return True;
+ }
+
if (opcode == BITS5(1,0,1,0,0)
|| (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
/* -------- 0,xx,10100: SQXTN -------- */
@@ -9737,7 +9917,89 @@
return True;
}
-# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
+ ix = 0; /*INVALID*/
+ switch (opcode) {
+ case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
+ case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
+ case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
+ default: break;
+ }
+ if (ix > 0) {
+ /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
+ /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
+ /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
+ /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
+ /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
+ /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
+ /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
+ /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
+ /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
+ /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
+ Bool isD = (size & 1) == 1;
+ IRType tyF = isD ? Ity_F64 : Ity_F32;
+ IRType tyI = isD ? Ity_I64 : Ity_I32;
+ IRRoundingMode irrm = 8; /*impossible*/
+ HChar ch = '?';
+ switch (ix) {
+ case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
+ case 2: ch = 'm'; irrm = Irrm_NegINF; break;
+ case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
+ case 4: ch = 'p'; irrm = Irrm_PosINF; break;
+ case 5: ch = 'z'; irrm = Irrm_ZERO; break;
+ default: vassert(0);
+ }
+ IROp cvt = Iop_INVALID;
+ if (bitU == 1) {
+ cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
+ } else {
+ cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
+ }
+ IRTemp src = newTemp(tyF);
+ IRTemp res = newTemp(tyI);
+ assign(src, getQRegLane(nn, 0, tyF));
+ assign(res, binop(cvt, mkU32(irrm), mkexpr(src)));
+ putQRegLane(dd, 0, mkexpr(res)); /* bits 31-0 or 63-0 */
+ if (!isD) {
+ putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
+ }
+ putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
+ HChar sOrD = isD ? 'd' : 's';
+ DIP("fcvt%c%c %c%u, %c%u\n", ch, bitU == 1 ? 'u' : 's',
+ sOrD, dd, sOrD, nn);
+ return True;
+ }
+
+ if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
+ /* -------- 0,1x,11101: FRECPE d_d, s_s -------- */
+ /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
+ Bool isSQRT = bitU == 1;
+ Bool isD = (size & 1) == 1;
+ IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
+ : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
+ IRTemp resV = newTempV128();
+ assign(resV, unop(op, getQReg128(nn)));
+ putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
+ mkexpr(resV))));
+ HChar c = isD ? 'd' : 's';
+ DIP("%s %c%u, %c%u\n", isSQRT ? "frsqrte" : "frecpe", c, dd, c, nn);
+ return True;
+ }
+
+ if (bitU == 0 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
+ /* -------- 0,1x,11111: FRECPX d_d, s_s -------- */
+ Bool isD = (size & 1) == 1;
+ IRType ty = isD ? Ity_F64 : Ity_F32;
+ IROp op = isD ? Iop_RecpExpF64 : Iop_RecpExpF32;
+ IRTemp res = newTemp(ty);
+ IRTemp rm = mk_get_IR_rounding_mode();
+ assign(res, binop(op, mkexpr(rm), getQRegLane(nn, 0, ty)));
+ putQReg128(dd, mkV128(0x0000));
+ putQRegLane(dd, 0, mkexpr(res));
+ HChar c = isD ? 'd' : 's';
+ DIP("%s %c%u, %c%u\n", "frecpx", c, dd, c, nn);
+ return True;
+ }
+
return False;
# undef INSN
}
@@ -9769,6 +10031,70 @@
vassert(size < 4);
vassert(bitH < 2 && bitM < 2 && bitL < 2);
+ if (bitU == 0 && size >= X10
+ && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
+ /* -------- 0,1x,0001 FMLA d_d_d[], s_s_s[] -------- */
+ /* -------- 0,1x,0101 FMLS d_d_d[], s_s_s[] -------- */
+ Bool isD = (size & 1) == 1;
+ Bool isSUB = opcode == BITS4(0,1,0,1);
+ UInt index;
+ if (!isD) index = (bitH << 1) | bitL;
+ else if (isD && bitL == 0) index = bitH;
+ else return False; // sz:L == x11 => unallocated encoding
+ vassert(index < (isD ? 2 : 4));
+ IRType ity = isD ? Ity_F64 : Ity_F32;
+ IRTemp elem = newTemp(ity);
+ UInt mm = (bitM << 4) | mmLO4;
+ assign(elem, getQRegLane(mm, index, ity));
+ IRTemp dupd = math_DUP_TO_V128(elem, ity);
+ IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
+ IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
+ IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
+ IRTemp rm = mk_get_IR_rounding_mode();
+ IRTemp t1 = newTempV128();
+ IRTemp t2 = newTempV128();
+ // FIXME: double rounding; use FMA primops instead
+ assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
+ assign(t2, triop(isSUB ? opSUB : opADD,
+ mkexpr(rm), getQReg128(dd), mkexpr(t1)));
+ putQReg128(dd,
+ mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
+ mkexpr(t2))));
+ const HChar c = isD ? 'd' : 's';
+ DIP("%s %c%u, %c%u, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
+ c, dd, c, nn, nameQReg128(mm), c, index);
+ return True;
+ }
+
+ if (size >= X10 && opcode == BITS4(1,0,0,1)) {
+ /* -------- 0,1x,1001 FMUL d_d_d[], s_s_s[] -------- */
+ /* -------- 1,1x,1001 FMULX d_d_d[], s_s_s[] -------- */
+ Bool isD = (size & 1) == 1;
+ Bool isMULX = bitU == 1;
+ UInt index;
+ if (!isD) index = (bitH << 1) | bitL;
+ else if (isD && bitL == 0) index = bitH;
+ else return False; // sz:L == x11 => unallocated encoding
+ vassert(index < (isD ? 2 : 4));
+ IRType ity = isD ? Ity_F64 : Ity_F32;
+ IRTemp elem = newTemp(ity);
+ UInt mm = (bitM << 4) | mmLO4;
+ assign(elem, getQRegLane(mm, index, ity));
+ IRTemp dupd = math_DUP_TO_V128(elem, ity);
+ IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
+ IRTemp rm = mk_get_IR_rounding_mode();
+ IRTemp t1 = newTempV128();
+ // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
+ assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
+ putQReg128(dd,
+ mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
+ mkexpr(t1))));
+ const HChar c = isD ? 'd' : 's';
+ DIP("%s %c%u, %c%u, %s.%c[%u]\n", isMULX ? "fmulx" : "fmul",
+ c, dd, c, nn, nameQReg128(mm), c, index);
+ return True;
+ }
+
if (bitU == 0
&& (opcode == BITS4(1,0,1,1)
|| opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
@@ -10993,6 +11319,28 @@
return True;
}
+ if (bitU == 0
+ && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
+ /* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+ /* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+ /* -------- 0,0x,11110 FMAX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+ /* -------- 0,1x,11110 FMIN 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+ /* FMAXNM, FMINNM: FIXME -- KLUDGED */
+ Bool isD = (size & 1) == 1;
+ if (bitQ == 0 && isD) return False; // implied 1d case
+ Bool isMIN = (size & 2) == 2;
+ Bool isNM = opcode == BITS5(1,1,0,0,0);
+ IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? X11 : X10);
+ IRTemp res = newTempV128();
+ assign(res, binop(opMXX, getQReg128(nn), getQReg128(mm)));
+ putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
+ const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
+ DIP("%s%s %s.%s, %s.%s, %s.%s\n",
+ isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
+ nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
+ return True;
+ }
+
if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
/* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
/* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
@@ -11057,9 +11405,12 @@
return True;
}
- if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
- /* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
- Bool isD = (size & 1) == 1;
+ if (size <= X01 && opcode == BITS5(1,1,0,1,1)) {
+ /* -------- 0,0x,11011 FMULX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+ /* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+ // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
+ Bool isD = (size & 1) == 1;
+ Bool isMULX = bitU == 0;
if (bitQ == 0 && isD) return False; // implied 1d case
IRTemp rm = mk_get_IR_rounding_mode();
IRTemp t1 = newTempV128();
@@ -11067,7 +11418,7 @@
mkexpr(rm), getQReg128(nn), getQReg128(mm)));
putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
- DIP("fmul %s.%s, %s.%s, %s.%s\n",
+ DIP("%s %s.%s, %s.%s, %s.%s\n", isMULX ? "fmulx" : "fmul",
nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
return True;
}
@@ -11123,6 +11474,37 @@
return True;
}
+ if (bitU == 1
+ && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
+ /* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+ /* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+ /* -------- 1,0x,11110 FMAXP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+ /* -------- 1,1x,11110 FMINP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+ /* FMAXNM, FMINNM: FIXME -- KLUDGED */
+ Bool isD = (size & 1) == 1;
+ if (bitQ == 0 && isD) return False; // implied 1d case
+ Bool isMIN = (size & 2) == 2;
+ Bool isNM = opcode == BITS5(1,1,0,0,0);
+ IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
+ IRTemp srcN = newTempV128();
+ IRTemp srcM = newTempV128();
+ IRTemp preL = IRTemp_INVALID;
+ IRTemp preR = IRTemp_INVALID;
+ assign(srcN, getQReg128(nn));
+ assign(srcM, getQReg128(mm));
+ math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
+ srcM, srcN, isD, bitQ);
+ putQReg128(
+ dd, math_MAYBE_ZERO_HI64_fromE(
+ bitQ,
+ binop(opMXX, mkexpr(preL), mkexpr(preR))));
+ const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
+ DIP("%s%sp %s.%s, %s.%s, %s.%s\n",
+ isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
+ nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
+ return True;
+ }
+
if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,0)) {
/* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
Bool isD = size == X01;
@@ -11166,6 +11548,23 @@
return True;
}
+ if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
+ /* -------- 0,0x,11111: FRECPS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+ /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+ Bool isSQRT = (size & 2) == 2;
+ Bool isD = (size & 1) == 1;
+ if (bitQ == 0 && isD) return False; // implied 1d case
+ IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
+ : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
+ IRTemp res = newTempV128();
+ assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
+ putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
+ const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
+ DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT ? "frsqrts" : "frecps",
+ nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
+ return True;
+ }
+
return False;
# undef INSN
}
@@ -11428,6 +11827,48 @@
return True;
}
+ UInt ix = 0; /*INVALID*/
+ if (size >= X10) {
+ switch (opcode) {
+ case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
+ case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
+ case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
+ default: break;
+ }
+ }
+ if (ix > 0) {
+ /* -------- 0,1x,01100 FCMGT 2d_2d,4s_4s,2s_2s _#0.0 (ix 1) -------- */
+ /* -------- 0,1x,01101 FCMEQ 2d_2d,4s_4s,2s_2s _#0.0 (ix 2) -------- */
+ /* -------- 0,1x,01110 FCMLT 2d_2d,4s_4s,2s_2s _#0.0 (ix 3) -------- */
+ /* -------- 1,1x,01100 FCMGE 2d_2d,4s_4s,2s_2s _#0.0 (ix 4) -------- */
+ /* -------- 1,1x,01101 FCMLE 2d_2d,4s_4s,2s_2s _#0.0 (ix 5) -------- */
+ if (bitQ == 0 && size == X11) return False; // implied 1d case
+ Bool isD = size == X11;
+ IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
+ IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
+ IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
+ IROp opCmp = Iop_INVALID;
+ Bool swap = False;
+ const HChar* nm = "??";
+ switch (ix) {
+ case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
+ case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
+ case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
+ case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
+ case 5: nm = "fcmle"; opCmp = opCmpLE; break;
+ default: vassert(0);
+ }
+ IRExpr* zero = mkV128(0x0000);
+ IRTemp res = newTempV128();
+ assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
+ : binop(opCmp, getQReg128(nn), zero));
+ putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
+ const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
+ DIP("%s %s.%s, %s.%s, #0.0\n", nm,
+ nameQReg128(dd), arr, nameQReg128(nn), arr);
+ return True;
+ }
+
if (size >= X10 && opcode == BITS5(0,1,1,1,1)) {
/* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
/* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
@@ -11517,18 +11958,172 @@
return True;
}
- if (bitU == 0 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
- /* -------- 0,01,10110: FCVTN 2s/4s_2d -------- */
- IRTemp rm = mk_get_IR_rounding_mode();
- IRExpr* srcLo = getQRegLane(nn, 0, Ity_F64);
- IRExpr* srcHi = getQRegLane(nn, 1, Ity_F64);
- putQRegLane(dd, 2 * bitQ + 0, binop(Iop_F64toF32, mkexpr(rm), srcLo));
- putQRegLane(dd, 2 * bitQ + 1, binop(Iop_F64toF32, mkexpr(rm), srcHi));
+ if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,0)) {
+ /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */
+ UInt nLanes = size == X00 ? 4 : 2;
+ IRType srcTy = size == X00 ? Ity_F32 : Ity_F64;
+ IROp opCvt = size == X00 ? Iop_F32toF16 : Iop_F64toF32;
+ IRTemp rm = mk_get_IR_rounding_mode();
+ IRTemp src[nLanes];
+ for (UInt i = 0; i < nLanes; i++) {
+ src[i] = newTemp(srcTy);
+ assign(src[i], getQRegLane(nn, i, srcTy));
+ }
+ for (UInt i = 0; i < nLanes; i++) {
+ putQRegLane(dd, nLanes * bitQ + i,
+ binop(opCvt, mkexpr(rm), mkexpr(src[i])));
+ }
if (bitQ == 0) {
putQRegLane(dd, 1, mkU64(0));
}
- DIP("fcvtn%s %s.%s, %s.2d\n", bitQ ? "2" : "",
- nameQReg128(dd), bitQ ? "4s" : "2s", nameQReg128(nn));
+ const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
+ const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
+ DIP("fcvtn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
+ nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
+ return True;
+ }
+
+ if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) {
+ /* -------- 0,0x,10110: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
+ UInt nLanes = size == X00 ? 4 : 2;
+ IRType srcTy = size == X00 ? Ity_F16 : Ity_F32;
+ IROp opCvt = size == X00 ? Iop_F16toF32 : Iop_F32toF64;
+ IRTemp src[nLanes];
+ for (UInt i = 0; i < nLanes; i++) {
+ src[i] = newTemp(srcTy);
+ assign(src[i], getQRegLane(nn, nLanes * bitQ + i, srcTy));
+ }
+ for (UInt i = 0; i < nLanes; i++) {
+ putQRegLane(dd, i, unop(opCvt, mkexpr(src[i])));
+ }
+ const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
+ const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
+ DIP("fcvtl%s %s.%s, %s.%s\n", bitQ ? "2" : "",
+ nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
+ return True;
+ }
+
+ ix = 0;
+ if (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,0,0,1)) {
+ ix = 1 + ((((bitU & 1) << 2) | ((size & 2) << 0)) | ((opcode & 1) << 0));
+ // = 1 + bitU[0]:size[1]:opcode[0]
+ vassert(ix >= 1 && ix <= 8);
+ if (ix == 7) ix = 0;
+ }
+ if (ix > 0) {
+ /* -------- 0,0x,11000 FRINTN 2d_2d, 4s_4s, 2s_2s (1) -------- */
+ /* -------- 0,0x,11001 FRINTM 2d_2d, 4s_4s, 2s_2s (2) -------- */
+ /* -------- 0,1x,11000 FRINTP 2d_2d, 4s_4s, 2s_2s (3) -------- */
+ /* -------- 0,1x,11001 FRINTZ 2d_2d, 4s_4s, 2s_2s (4) -------- */
+ /* -------- 1,0x,11000 FRINTA 2d_2d, 4s_4s, 2s_2s (5) -------- */
+ /* -------- 1,0x,11001 FRINTX 2d_2d, 4s_4s, 2s_2s (6) -------- */
+ /* -------- 1,1x,11000 (apparently unassigned) (7) -------- */
+ /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
+ /* rm plan:
+ FRINTN: tieeven -- !! FIXME KLUDGED !!
+ FRINTM: -inf
+ FRINTP: +inf
+ FRINTZ: zero
+ FRINTA: tieaway -- !! FIXME KLUDGED !!
+ FRINTX: per FPCR + "exact = TRUE"
+ FRINTI: per FPCR
+ */
+ Bool isD = (size & 1) == 1;
+ if (bitQ == 0 && isD) return False; // implied 1d case
+
+ IRTemp irrmRM = mk_get_IR_rounding_mode();
+
+ UChar ch = '?';
+ IRTemp irrm = newTemp(Ity_I32);
+ switch (ix) {
+ case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break;
+ case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break;
+ case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break;
+ case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break;
+ // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
+ case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break;
+ // I am unsure about the following, due to the "integral exact"
+ // description in the manual. What does it mean? (frintx, that is)
+ case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break;
+ case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break;
+ default: vassert(0);
+ }
+
+ IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
+ if (isD) {
+ for (UInt i = 0; i < 2; i++) {
+ putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
+ getQRegLane(nn, i, Ity_F64)));
+ }
+ } else {
+ UInt n = bitQ==1 ? 4 : 2;
+ for (U...
[truncated message content] |