|
From: Andreas A. <ar...@so...> - 2020-12-08 18:39:40
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=159f132289160ab1a5a5cf4da14fb57ecdb248ca commit 159f132289160ab1a5a5cf4da14fb57ecdb248ca Author: Andreas Arnez <ar...@li...> Date: Mon Dec 7 20:01:26 2020 +0100 Bug 404076 - s390x: Implement z14 vector instructions Implement the new instructions/features that were added to z/Architecture with the vector-enhancements facility 1. Also cover the instructions from the vector-packed-decimal facility that are defined outside the chapter "Vector Decimal Instructions", but not the ones from that chapter itself. For a detailed list of newly supported instructions see the updates to `docs/internals/s390-opcodes.csv'. Since the miscellaneous instruction extensions facility 2 was already addressed by Bug 404406, this completes the support necessary to run general programs built with `--march=z14' under Valgrind. The vector-packed-decimal facility is currently not exploited by the standard toolchain and libraries. Diff: --- NEWS | 1 + VEX/priv/guest_s390_defs.h | 43 +- VEX/priv/guest_s390_helpers.c | 166 +++-- VEX/priv/guest_s390_toIR.c | 1105 +++++++++++++++++++----------- VEX/priv/host_s390_defs.c | 21 +- VEX/priv/host_s390_defs.h | 5 +- VEX/priv/host_s390_isel.c | 61 +- VEX/priv/main_main.c | 1 + VEX/pub/libvex.h | 5 +- VEX/pub/libvex_emnote.h | 4 + auxprogs/s390-check-opcodes.pl | 4 +- coregrind/m_initimg/initimg-linux.c | 10 +- coregrind/m_machine.c | 1 + docs/internals/s390-opcodes.csv | 276 ++++---- include/vki/vki-s390x-linux.h | 1 + none/tests/s390x/vector.h | 13 +- none/tests/s390x/vector_float.c | 57 +- none/tests/s390x/vector_float.stdout.exp | 248 +++---- 18 files changed, 1237 insertions(+), 785 deletions(-) diff --git a/NEWS b/NEWS index cf403ab24c..7217273b28 100644 --- a/NEWS +++ b/NEWS @@ -77,6 +77,7 @@ n-i-bz helgrind: If hg_cli__realloc fails, return NULL. 429864 s390x: C++ atomic test_and_set yields false-positive memcheck diagnostics 408663 Suppression file for musl libc +404076 s390x: z14 vector instructions not implemented Release 3.16.1 (?? June 2020) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h index 9f93cff193..9054290151 100644 --- a/VEX/priv/guest_s390_defs.h +++ b/VEX/priv/guest_s390_defs.h @@ -8,7 +8,7 @@ This file is part of Valgrind, a dynamic binary instrumentation framework. - Copyright IBM Corp. 2010-2017 + Copyright IBM Corp. 2010-2020 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -263,26 +263,27 @@ extern ULong last_execute_target; before S390_VEC_OP_LAST. */ typedef enum { S390_VEC_OP_INVALID = 0, - S390_VEC_OP_VPKS = 1, - S390_VEC_OP_VPKLS = 2, - S390_VEC_OP_VFAE = 3, - S390_VEC_OP_VFEE = 4, - S390_VEC_OP_VFENE = 5, - S390_VEC_OP_VISTR = 6, - S390_VEC_OP_VSTRC = 7, - S390_VEC_OP_VCEQ = 8, - S390_VEC_OP_VTM = 9, - S390_VEC_OP_VGFM = 10, - S390_VEC_OP_VGFMA = 11, - S390_VEC_OP_VMAH = 12, - S390_VEC_OP_VMALH = 13, - S390_VEC_OP_VCH = 14, - S390_VEC_OP_VCHL = 15, - S390_VEC_OP_VFCE = 16, - S390_VEC_OP_VFCH = 17, - S390_VEC_OP_VFCHE = 18, - S390_VEC_OP_VFTCI = 19, - S390_VEC_OP_LAST = 20 // supposed to be the last element in enum + S390_VEC_OP_VPKS, + S390_VEC_OP_VPKLS, + S390_VEC_OP_VFAE, + S390_VEC_OP_VFEE, + S390_VEC_OP_VFENE, + S390_VEC_OP_VISTR, + S390_VEC_OP_VSTRC, + S390_VEC_OP_VCEQ, + S390_VEC_OP_VTM, + S390_VEC_OP_VGFM, + S390_VEC_OP_VGFMA, + S390_VEC_OP_VMAH, + S390_VEC_OP_VMALH, + S390_VEC_OP_VCH, + S390_VEC_OP_VCHL, + S390_VEC_OP_VFTCI, + S390_VEC_OP_VFMIN, + S390_VEC_OP_VFMAX, + S390_VEC_OP_VBPERM, + S390_VEC_OP_VMSL, + S390_VEC_OP_LAST // supposed to be the last element in enum } s390x_vec_op_t; /* Arguments of s390x_dirtyhelper_vec_op(...) which are packed into one diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c index a470d9f8d3..b71b621ae6 100644 --- a/VEX/priv/guest_s390_helpers.c +++ b/VEX/priv/guest_s390_helpers.c @@ -8,7 +8,7 @@ This file is part of Valgrind, a dynamic binary instrumentation framework. - Copyright IBM Corp. 2010-2017 + Copyright IBM Corp. 2010-2020 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -314,20 +314,11 @@ ULong s390x_dirtyhelper_STCKE(ULong *addr) {return 3;} /*--- Dirty helper for Store Facility instruction ---*/ /*------------------------------------------------------------*/ #if defined(VGA_s390x) -static void -s390_set_facility_bit(ULong *addr, UInt bitno, UInt value) -{ - addr += bitno / 64; - bitno = bitno % 64; - - ULong mask = 1; - mask <<= (63 - bitno); - if (value == 1) { - *addr |= mask; // set - } else { - *addr &= ~mask; // clear - } +static ULong +s390_stfle_range(UInt lo, UInt hi) +{ + return ((1UL << (hi + 1 - lo)) - 1) << (63 - (hi % 64)); } ULong @@ -336,6 +327,77 @@ s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr) ULong hoststfle[S390_NUM_FACILITY_DW], cc, num_dw, i; register ULong reg0 asm("0") = guest_state->guest_r0 & 0xF; /* r0[56:63] */ + /* Restrict to facilities that we know about and that we assume to be + compatible with Valgrind. Of course, in this way we may reject features + that Valgrind is not really involved in (and thus would be compatible + with), but quering for such features doesn't seem like a typical use + case. */ + ULong accepted_facility[S390_NUM_FACILITY_DW] = { + /* === 0 .. 63 === */ + (s390_stfle_range(0, 16) + /* 17: message-security-assist, not supported */ + | s390_stfle_range(18, 19) + /* 20: HFP-multiply-and-add/subtract, not supported */ + | s390_stfle_range(21, 22) + /* 23: HFP-unnormalized-extension, not supported */ + | s390_stfle_range(24, 25) + /* 26: parsing-enhancement, not supported */ + | s390_stfle_range(27, 28) + /* 29: unassigned */ + | s390_stfle_range(30, 30) + /* 31: extract-CPU-time, not supported */ + | s390_stfle_range(32, 41) + /* 42-43: DFP, not fully supported */ + /* 44: PFPO, not fully supported */ + | s390_stfle_range(45, 47) + /* 48: DFP zoned-conversion, not supported */ + /* 49: includes PPA, not supported */ + /* 50: constrained transactional-execution, not supported */ + | s390_stfle_range(51, 55) + /* 56: unassigned */ + /* 57: MSA5, not supported */ + | s390_stfle_range(58, 60) + /* 61: miscellaneous-instruction 3, not supported */ + | s390_stfle_range(62, 63)), + + /* === 64 .. 127 === */ + (s390_stfle_range(64, 72) + /* 73: transactional-execution, not supported */ + | s390_stfle_range(74, 75) + /* 76: MSA3, not supported */ + /* 77: MSA4, not supported */ + | s390_stfle_range(78, 78) + /* 80: DFP packed-conversion, not supported */ + /* 81: PPA-in-order, not supported */ + | s390_stfle_range(82, 82) + /* 83-127: unassigned */ ), + + /* === 128 .. 191 === */ + (s390_stfle_range(128, 131) + /* 132: unassigned */ + /* 133: guarded-storage, not supported */ + /* 134: vector packed decimal, not supported */ + | s390_stfle_range(135, 135) + /* 136: unassigned */ + /* 137: unassigned */ + | s390_stfle_range(138, 142) + /* 143: unassigned */ + | s390_stfle_range(144, 145) + /* 146: MSA8, not supported */ + | s390_stfle_range(147, 147) + /* 148: vector-enhancements 2, not supported */ + | s390_stfle_range(149, 149) + /* 150: unassigned */ + /* 151: DEFLATE-conversion, not supported */ + /* 153: unassigned */ + /* 154: unassigned */ + /* 155: MSA9, not supported */ + | s390_stfle_range(156, 156) + /* 157-167: unassigned */ + | s390_stfle_range(168, 168) + /* 168-191: unassigned */ ), + }; + /* We cannot store more than S390_NUM_FACILITY_DW (and it makes not much sense to do so anyhow) */ if (reg0 > S390_NUM_FACILITY_DW - 1) @@ -351,35 +413,9 @@ s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr) /* Update guest register 0 with what STFLE set r0 to */ guest_state->guest_r0 = reg0; - /* Set default: VM facilities = host facilities */ + /* VM facilities = host facilities, filtered by acceptance */ for (i = 0; i < num_dw; ++i) - addr[i] = hoststfle[i]; - - /* Now adjust the VM facilities according to what the VM supports */ - s390_set_facility_bit(addr, S390_FAC_LDISP, 1); - s390_set_facility_bit(addr, S390_FAC_EIMM, 1); - s390_set_facility_bit(addr, S390_FAC_ETF2, 1); - s390_set_facility_bit(addr, S390_FAC_ETF3, 1); - s390_set_facility_bit(addr, S390_FAC_GIE, 1); - s390_set_facility_bit(addr, S390_FAC_EXEXT, 1); - s390_set_facility_bit(addr, S390_FAC_HIGHW, 1); - s390_set_facility_bit(addr, S390_FAC_LSC2, 1); - - s390_set_facility_bit(addr, S390_FAC_HFPMAS, 0); - s390_set_facility_bit(addr, S390_FAC_HFPUNX, 0); - s390_set_facility_bit(addr, S390_FAC_XCPUT, 0); - s390_set_facility_bit(addr, S390_FAC_MSA, 0); - s390_set_facility_bit(addr, S390_FAC_PENH, 0); - s390_set_facility_bit(addr, S390_FAC_DFP, 0); - s390_set_facility_bit(addr, S390_FAC_PFPO, 0); - s390_set_facility_bit(addr, S390_FAC_DFPZC, 0); - s390_set_facility_bit(addr, S390_FAC_MISC, 0); - s390_set_facility_bit(addr, S390_FAC_CTREXE, 0); - s390_set_facility_bit(addr, S390_FAC_TREXE, 0); - s390_set_facility_bit(addr, S390_FAC_MSA4, 0); - s390_set_facility_bit(addr, S390_FAC_VXE, 0); - s390_set_facility_bit(addr, S390_FAC_VXE2, 0); - s390_set_facility_bit(addr, S390_FAC_DFLT, 0); + addr[i] = hoststfle[i] & accepted_facility[i]; return cc; } @@ -2500,25 +2536,26 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, vassert(d->op > S390_VEC_OP_INVALID && d->op < S390_VEC_OP_LAST); static const UChar opcodes[][2] = { {0x00, 0x00}, /* invalid */ - {0xe7, 0x97}, /* VPKS */ - {0xe7, 0x95}, /* VPKLS */ - {0xe7, 0x82}, /* VFAE */ - {0xe7, 0x80}, /* VFEE */ - {0xe7, 0x81}, /* VFENE */ - {0xe7, 0x5c}, /* VISTR */ - {0xe7, 0x8a}, /* VSTRC */ - {0xe7, 0xf8}, /* VCEQ */ - {0xe7, 0xd8}, /* VTM */ - {0xe7, 0xb4}, /* VGFM */ - {0xe7, 0xbc}, /* VGFMA */ - {0xe7, 0xab}, /* VMAH */ - {0xe7, 0xa9}, /* VMALH */ - {0xe7, 0xfb}, /* VCH */ - {0xe7, 0xf9}, /* VCHL */ - {0xe7, 0xe8}, /* VFCE */ - {0xe7, 0xeb}, /* VFCH */ - {0xe7, 0xea}, /* VFCHE */ - {0xe7, 0x4a} /* VFTCI */ + [S390_VEC_OP_VPKS] = {0xe7, 0x97}, + [S390_VEC_OP_VPKLS] = {0xe7, 0x95}, + [S390_VEC_OP_VFAE] = {0xe7, 0x82}, + [S390_VEC_OP_VFEE] = {0xe7, 0x80}, + [S390_VEC_OP_VFENE] = {0xe7, 0x81}, + [S390_VEC_OP_VISTR] = {0xe7, 0x5c}, + [S390_VEC_OP_VSTRC] = {0xe7, 0x8a}, + [S390_VEC_OP_VCEQ] = {0xe7, 0xf8}, + [S390_VEC_OP_VTM] = {0xe7, 0xd8}, + [S390_VEC_OP_VGFM] = {0xe7, 0xb4}, + [S390_VEC_OP_VGFMA] = {0xe7, 0xbc}, + [S390_VEC_OP_VMAH] = {0xe7, 0xab}, + [S390_VEC_OP_VMALH] = {0xe7, 0xa9}, + [S390_VEC_OP_VCH] = {0xe7, 0xfb}, + [S390_VEC_OP_VCHL] = {0xe7, 0xf9}, + [S390_VEC_OP_VFTCI] = {0xe7, 0x4a}, + [S390_VEC_OP_VFMIN] = {0xe7, 0xee}, + [S390_VEC_OP_VFMAX] = {0xe7, 0xef}, + [S390_VEC_OP_VBPERM]= {0xe7, 0x85}, + [S390_VEC_OP_VMSL] = {0xe7, 0xb8}, }; union { @@ -2612,6 +2649,7 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, case S390_VEC_OP_VGFMA: case S390_VEC_OP_VMAH: case S390_VEC_OP_VMALH: + case S390_VEC_OP_VMSL: the_insn.VRRd.v1 = 1; the_insn.VRRd.v2 = 2; the_insn.VRRd.v3 = 3; @@ -2621,9 +2659,9 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, the_insn.VRRd.m6 = d->m5; break; - case S390_VEC_OP_VFCE: - case S390_VEC_OP_VFCH: - case S390_VEC_OP_VFCHE: + case S390_VEC_OP_VFMIN: + case S390_VEC_OP_VFMAX: + case S390_VEC_OP_VBPERM: the_insn.VRRc.v1 = 1; the_insn.VRRc.v2 = 2; the_insn.VRRc.v3 = 3; diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c index 58f532d069..3393770070 100644 --- a/VEX/priv/guest_s390_toIR.c +++ b/VEX/priv/guest_s390_toIR.c @@ -8,7 +8,7 @@ This file is part of Valgrind, a dynamic binary instrumentation framework. - Copyright IBM Corp. 2010-2017 + Copyright IBM Corp. 2010-2020 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -248,6 +248,13 @@ typedef enum { #define VRS_d2(insn) (((insn) >> 32) & 0xfff) #define VRS_m4(insn) (((insn) >> 28) & 0xf) #define VRS_rxb(insn) (((insn) >> 24) & 0xf) +#define VRSd_v1(insn) (((insn) >> 28) & 0xf) +#define VRSd_r3(insn) (((insn) >> 48) & 0xf) +#define VSI_i3(insn) (((insn) >> 48) & 0xff) +#define VSI_b2(insn) (((insn) >> 44) & 0xf) +#define VSI_d2(insn) (((insn) >> 32) & 0xfff) +#define VSI_v1(insn) (((insn) >> 28) & 0xf) +#define VSI_rxb(insn) (((insn) >> 24) & 0xf) /*------------------------------------------------------------*/ @@ -1937,6 +1944,26 @@ s390_vr_get_type(const UChar m) return results[m]; } +/* Determine IRType from instruction's floating-point format field */ +static IRType +s390_vr_get_ftype(const UChar m) +{ + static const IRType results[] = {Ity_F32, Ity_F64, Ity_F128}; + if (m >= 2 && m <= 4) + return results[m - 2]; + return Ity_INVALID; +} + +/* Determine number of elements from instruction's floating-point format + field */ +static UChar +s390_vr_get_n_elem(const UChar m) +{ + if (m >= 2 && m <= 4) + return 1 << (4 - m); + return 0; +} + /* Determine if Condition Code Set (CS) flag is set in m field */ #define s390_vr_is_cs_set(m) (((m) & 0x1) != 0) @@ -2191,12 +2218,15 @@ s390_vr_offset_by_index(UInt archreg,IRType type, UChar index) goto invalidIndex; } return vr_offset(archreg) + sizeof(ULong) * index; + case Ity_V128: + case Ity_F128: if(index == 0) { return vr_qw_offset(archreg); } else { goto invalidIndex; } + default: vpanic("s390_vr_offset_by_index: unknown type"); } @@ -2214,7 +2244,14 @@ put_vr(UInt archreg, IRType type, UChar index, IRExpr *expr) UInt offset = s390_vr_offset_by_index(archreg, type, index); vassert(typeOfIRExpr(irsb->tyenv, expr) == type); - stmt(IRStmt_Put(offset, expr)); + if (type == Ity_F128) { + IRTemp val = newTemp(Ity_F128); + assign(val, expr); + stmt(IRStmt_Put(offset, unop(Iop_F128HItoF64, mkexpr(val)))); + stmt(IRStmt_Put(offset + 8, unop(Iop_F128LOtoF64, mkexpr(val)))); + } else { + stmt(IRStmt_Put(offset, expr)); + } } /* Read type sized part specified by index of a vr register. */ @@ -2222,6 +2259,11 @@ static IRExpr * get_vr(UInt archreg, IRType type, UChar index) { UInt offset = s390_vr_offset_by_index(archreg, type, index); + if (type == Ity_F128) { + return binop(Iop_F64HLtoF128, + IRExpr_Get(offset, Ity_F64), + IRExpr_Get(offset + 8, Ity_F64)); + } return IRExpr_Get(offset, type); } @@ -2297,11 +2339,11 @@ s390_getCountToBlockBoundary(IRTemp op2addr, UChar m) return mkexpr(output); } -/* Load bytes into v1. - maxIndex specifies max index to load and must be Ity_I32. - If maxIndex >= 15, all 16 bytes are loaded. - All bytes after maxIndex are zeroed. */ -static void s390_vr_loadWithLength(UChar v1, IRTemp addr, IRExpr *maxIndex) +/* Starting from addr, load at most maxIndex + 1 bytes into v1. Fill the + leftmost or rightmost bytes of v1, depending on whether `rightmost' is set. + If maxIndex >= 15, load all 16 bytes; otherwise clear the remaining bytes. */ +static void +s390_vr_loadWithLength(UChar v1, IRTemp addr, IRExpr *maxIndex, Bool rightmost) { IRTemp maxIdx = newTemp(Ity_I32); IRTemp cappedMax = newTemp(Ity_I64); @@ -2314,8 +2356,8 @@ static void s390_vr_loadWithLength(UChar v1, IRTemp addr, IRExpr *maxIndex) crossed if and only if the real insn would have crossed it as well. Thus, if the bytes to load are fully contained in an aligned 16-byte chunk, load the whole 16-byte aligned chunk, and otherwise load 16 bytes - from the unaligned address. Then shift the loaded data left-aligned - into the target vector register. */ + from the unaligned address. Then shift the loaded data left- or + right-aligned into the target vector register. */ assign(maxIdx, maxIndex); assign(cappedMax, mkite(binop(Iop_CmpLT32U, mkexpr(maxIdx), mkU32(15)), @@ -2328,20 +2370,60 @@ static void s390_vr_loadWithLength(UChar v1, IRTemp addr, IRExpr *maxIndex) assign(back, mkite(binop(Iop_CmpLE64U, mkexpr(offset), mkexpr(zeroed)), mkexpr(offset), mkU64(0))); - /* How much to shift the loaded 16-byte vector to the right, and then to - the left. Since both 'zeroed' and 'back' range from 0 to 15, the shift - amounts range from 0 to 120. */ - IRExpr *shrAmount = binop(Iop_Shl64, - binop(Iop_Sub64, mkexpr(zeroed), mkexpr(back)), - mkU8(3)); - IRExpr *shlAmount = binop(Iop_Shl64, mkexpr(zeroed), mkU8(3)); + IRExpr* chunk = load(Ity_V128, binop(Iop_Sub64, mkexpr(addr), mkexpr(back))); + + /* Shift the loaded 16-byte vector to the right, then to the left, or vice + versa, where each shift amount ranges from 0 to 120. */ + IRExpr* shift1; + IRExpr* shift2 = unop(Iop_64to8, binop(Iop_Shl64, mkexpr(zeroed), mkU8(3))); + + if (rightmost) { + shift1 = unop(Iop_64to8, binop(Iop_Shl64, mkexpr(back), mkU8(3))); + put_vr_qw(v1, binop(Iop_ShrV128, + binop(Iop_ShlV128, chunk, shift1), + shift2)); + } else { + shift1 = unop(Iop_64to8, + binop(Iop_Shl64, + binop(Iop_Sub64, mkexpr(zeroed), mkexpr(back)), + mkU8(3))); + put_vr_qw(v1, binop(Iop_ShlV128, + binop(Iop_ShrV128, chunk, shift1), + shift2)); + } +} + +/* Store at most maxIndex + 1 bytes from v1 to addr. Store the leftmost or + rightmost bytes of v1, depending on whether `rightmost' is set. If maxIndex + >= 15, store all 16 bytes. */ +static void +s390_vr_storeWithLength(UChar v1, IRTemp addr, IRExpr *maxIndex, Bool rightmost) +{ + IRTemp maxIdx = newTemp(Ity_I32); + IRTemp cappedMax = newTemp(Ity_I64); + IRTemp counter = newTemp(Ity_I64); + IRExpr* offset; + + assign(maxIdx, maxIndex); + assign(cappedMax, mkite(binop(Iop_CmpLT32U, mkexpr(maxIdx), mkU32(15)), + unop(Iop_32Uto64, mkexpr(maxIdx)), mkU64(15))); + + assign(counter, get_counter_dw0()); + + if (rightmost) + offset = binop(Iop_Add64, + binop(Iop_Sub64, mkU64(15), mkexpr(cappedMax)), + mkexpr(counter)); + else + offset = mkexpr(counter); + + store(binop(Iop_Add64, mkexpr(addr), mkexpr(counter)), + binop(Iop_GetElem8x16, get_vr_qw(v1), unop(Iop_64to8, offset))); - put_vr_qw(v1, binop(Iop_ShlV128, - binop(Iop_ShrV128, - load(Ity_V128, - binop(Iop_Sub64, mkexpr(addr), mkexpr(back))), - unop(Iop_64to8, shrAmount)), - unop(Iop_64to8, shlAmount))); + /* Check for end of field */ + put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1))); + iterate_if(binop(Iop_CmpNE64, mkexpr(counter), mkexpr(cappedMax))); + put_counter_dw0(mkU64(0)); } /* Bitwise vCond ? v1 : v2 @@ -3752,6 +3834,28 @@ s390_format_VRS_RRDVM(const HChar *(*irgen)(UChar r1, IRTemp op2addr, UChar v3, s390_disasm(ENC5(MNM, GPR, UDXB, VR, UINT), mnm, r1, d2, 0, b2, v3, m4); } +static void +s390_format_VRS_RRDV(const HChar *(*irgen)(UChar v1, UChar r3, IRTemp op2addr), + UChar v1, UChar r3, UChar b2, UShort d2, UChar rxb) +{ + const HChar *mnm; + IRTemp op2addr = newTemp(Ity_I64); + + if (! s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + assign(op2addr, binop(Iop_Add64, mkU64(d2), b2 != 0 ? get_gpr_dw0(b2) : + mkU64(0))); + + v1 = s390_vr_getVRindex(v1, 4, rxb); + mnm = irgen(v1, r3, op2addr); + + if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) + s390_disasm(ENC4(MNM, VR, GPR, UDXB), mnm, v1, r3, d2, 0, b2); +} + static void s390_format_VRS_VRDVM(const HChar *(*irgen)(UChar v1, IRTemp op2addr, UChar v3, @@ -4084,6 +4188,29 @@ s390_format_VRRa_VVVMMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, mnm, v1, v2, v3, m4, m5, m6); } +static void +s390_format_VSI_URDV(const HChar *(*irgen)(UChar v1, IRTemp op2addr, UChar i3), + UChar v1, UChar b2, UChar d2, UChar i3, UChar rxb) +{ + const HChar *mnm; + IRTemp op2addr = newTemp(Ity_I64); + + if (!s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + v1 = s390_vr_getVRindex(v1, 4, rxb); + + assign(op2addr, binop(Iop_Add64, mkU64(d2), b2 != 0 ? get_gpr_dw0(b2) : + mkU64(0))); + + mnm = irgen(v1, op2addr, i3); + + if (vex_traceflags & VEX_TRACE_FE) + s390_disasm(ENC4(MNM, VR, UDXB, UINT), mnm, v1, d2, 0, b2, i3); +} + /*------------------------------------------------------------*/ /*--- Build IR for opcodes ---*/ /*------------------------------------------------------------*/ @@ -16183,7 +16310,9 @@ s390_irgen_VGM(UChar v1, UShort i2, UChar m3) static const HChar * s390_irgen_VLLEZ(UChar v1, IRTemp op2addr, UChar m3) { - IRType type = s390_vr_get_type(m3); + s390_insn_assert("vllez", m3 <= 3 || m3 == 6); + + IRType type = s390_vr_get_type(m3 & 3); IRExpr* op2 = load(type, mkexpr(op2addr)); IRExpr* op2as64bit; switch (type) { @@ -16203,7 +16332,13 @@ s390_irgen_VLLEZ(UChar v1, IRTemp op2addr, UChar m3) vpanic("s390_irgen_VLLEZ: unknown type"); } - put_vr_dw0(v1, op2as64bit); + if (m3 == 6) { + /* left-aligned */ + put_vr_dw0(v1, binop(Iop_Shl64, op2as64bit, mkU8(32))); + } else { + /* right-aligned */ + put_vr_dw0(v1, op2as64bit); + } put_vr_dw1(v1, mkU64(0)); return "vllez"; } @@ -16612,7 +16747,7 @@ s390_irgen_VLBB(UChar v1, IRTemp addr, UChar m3) s390_getCountToBlockBoundary(addr, m3), mkU32(1)); - s390_vr_loadWithLength(v1, addr, maxIndex); + s390_vr_loadWithLength(v1, addr, maxIndex, False); return "vlbb"; } @@ -16620,41 +16755,50 @@ s390_irgen_VLBB(UChar v1, IRTemp addr, UChar m3) static const HChar * s390_irgen_VLL(UChar v1, IRTemp addr, UChar r3) { - s390_vr_loadWithLength(v1, addr, get_gpr_w1(r3)); + s390_vr_loadWithLength(v1, addr, get_gpr_w1(r3), False); return "vll"; } static const HChar * -s390_irgen_VSTL(UChar v1, IRTemp addr, UChar r3) +s390_irgen_VLRL(UChar v1, IRTemp addr, UChar i3) { - IRTemp counter = newTemp(Ity_I64); - IRTemp maxIndexToStore = newTemp(Ity_I64); - IRTemp gpr3 = newTemp(Ity_I64); - - assign(gpr3, unop(Iop_32Uto64, get_gpr_w1(r3))); - assign(maxIndexToStore, mkite(binop(Iop_CmpLE64U, - mkexpr(gpr3), - mkU64(16) - ), - mkexpr(gpr3), - mkU64(16) - ) - ); + s390_insn_assert("vlrl", (i3 & 0xf0) == 0); + s390_vr_loadWithLength(v1, addr, mkU32((UInt) i3), True); - assign(counter, get_counter_dw0()); + return "vlrl"; +} - store(binop(Iop_Add64, mkexpr(addr), mkexpr(counter)), - binop(Iop_GetElem8x16, get_vr_qw(v1), unop(Iop_64to8, mkexpr(counter)))); +static const HChar * +s390_irgen_VLRLR(UChar v1, UChar r3, IRTemp addr) +{ + s390_vr_loadWithLength(v1, addr, get_gpr_w1(r3), True); - /* Check for end of field */ - put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1))); - iterate_if(binop(Iop_CmpNE64, mkexpr(counter), mkexpr(maxIndexToStore))); - put_counter_dw0(mkU64(0)); + return "vlrlr"; +} +static const HChar * +s390_irgen_VSTL(UChar v1, IRTemp addr, UChar r3) +{ + s390_vr_storeWithLength(v1, addr, get_gpr_w1(r3), False); return "vstl"; } +static const HChar * +s390_irgen_VSTRL(UChar v1, IRTemp addr, UChar i3) +{ + s390_insn_assert("vstrl", (i3 & 0xf0) == 0); + s390_vr_storeWithLength(v1, addr, mkU32((UInt) i3), True); + return "vstrl"; +} + +static const HChar * +s390_irgen_VSTRLR(UChar v1, UChar r3, IRTemp addr) +{ + s390_vr_storeWithLength(v1, addr, get_gpr_w1(r3), True); + return "vstrlr"; +} + static const HChar * s390_irgen_VX(UChar v1, UChar v2, UChar v3) { @@ -16679,6 +16823,24 @@ s390_irgen_VO(UChar v1, UChar v2, UChar v3) return "vo"; } +static const HChar * +s390_irgen_VOC(UChar v1, UChar v2, UChar v3) +{ + put_vr_qw(v1, binop(Iop_OrV128, get_vr_qw(v2), + unop(Iop_NotV128, get_vr_qw(v3)))); + + return "voc"; +} + +static const HChar * +s390_irgen_VNN(UChar v1, UChar v2, UChar v3) +{ + put_vr_qw(v1, unop(Iop_NotV128, + binop(Iop_AndV128, get_vr_qw(v2), get_vr_qw(v3)))); + + return "vnn"; +} + static const HChar * s390_irgen_VNO(UChar v1, UChar v2, UChar v3) { @@ -16688,6 +16850,15 @@ s390_irgen_VNO(UChar v1, UChar v2, UChar v3) return "vno"; } +static const HChar * +s390_irgen_VNX(UChar v1, UChar v2, UChar v3) +{ + put_vr_qw(v1, unop(Iop_NotV128, + binop(Iop_XorV128, get_vr_qw(v2), get_vr_qw(v3)))); + + return "vnx"; +} + static const HChar * s390_irgen_LZRF(UChar r1, IRTemp op2addr) { @@ -17496,9 +17667,19 @@ s390_irgen_VCTZ(UChar v1, UChar v2, UChar m3) static const HChar * s390_irgen_VPOPCT(UChar v1, UChar v2, UChar m3) { - vassert(m3 == 0); + s390_insn_assert("vpopct", m3 <= 3); + + IRExpr* cnt = unop(Iop_Cnt8x16, get_vr_qw(v2)); - put_vr_qw(v1, unop(Iop_Cnt8x16, get_vr_qw(v2))); + if (m3 >= 1) { + cnt = unop(Iop_PwAddL8Ux16, cnt); + if (m3 >= 2) { + cnt = unop(Iop_PwAddL16Ux8, cnt); + if (m3 == 3) + cnt = unop(Iop_PwAddL32Ux4, cnt); + } + } + put_vr_qw(v1, cnt); return "vpopct"; } @@ -18332,12 +18513,53 @@ s390_irgen_VMALH(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5) return "vmalh"; } +static const HChar * +s390_irgen_VMSL(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) +{ + s390_insn_assert("vmsl", m5 == 3 && (m6 & 3) == 0); + + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VMSL; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.v4 = v4; + details.m4 = m5; + details.m5 = m6; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + d->nFxState = 4; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = sizeof(V128); + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = sizeof(V128); + d->fxState[2].fx = Ifx_Read; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v4 * sizeof(V128); + d->fxState[2].size = sizeof(V128); + d->fxState[3].fx = Ifx_Write; + d->fxState[3].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[3].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + + return "vmsl"; +} + static void -s390_vector_fp_convert(IROp op, IRType fromType, IRType toType, +s390_vector_fp_convert(IROp op, IRType fromType, IRType toType, Bool rounding, UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) { Bool isSingleElementOp = s390_vr_is_single_element_control_set(m4); - UChar maxIndex = isSingleElementOp ? 0 : 1; /* For Iop_F32toF64 we do this: f32[0] -> f64[0] @@ -18350,14 +18572,21 @@ s390_vector_fp_convert(IROp op, IRType fromType, IRType toType, The magic below with scaling factors is used to achieve the logic described above. */ - const UChar sourceIndexScaleFactor = (op == Iop_F32toF64) ? 2 : 1; - const UChar destinationIndexScaleFactor = (op == Iop_F64toF32) ? 2 : 1; - - const Bool isUnary = (op == Iop_F32toF64); - for (UChar i = 0; i <= maxIndex; i++) { + Int size_diff = sizeofIRType(toType) - sizeofIRType(fromType); + const UChar sourceIndexScaleFactor = size_diff > 0 ? 2 : 1; + const UChar destinationIndexScaleFactor = size_diff < 0 ? 2 : 1; + UChar n_elem = (isSingleElementOp ? 1 : + 16 / (size_diff > 0 ? + sizeofIRType(toType) : sizeofIRType(fromType))); + + for (UChar i = 0; i < n_elem; i++) { IRExpr* argument = get_vr(v2, fromType, i * sourceIndexScaleFactor); IRExpr* result; - if (!isUnary) { + if (rounding) { + if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { + emulation_warning(EmWarn_S390X_fpext_rounding); + m5 = S390_BFP_ROUND_PER_FPC; + } result = binop(op, mkexpr(encode_bfp_rounding_mode(m5)), argument); @@ -18366,10 +18595,6 @@ s390_vector_fp_convert(IROp op, IRType fromType, IRType toType, } put_vr(v1, toType, i * destinationIndexScaleFactor, result); } - - if (isSingleElementOp) { - put_vr_dw1(v1, mkU64(0)); - } } static const HChar * @@ -18377,12 +18602,8 @@ s390_irgen_VCDG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) { s390_insn_assert("vcdg", m3 == 3); - if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { - emulation_warning(EmWarn_S390X_fpext_rounding); - m5 = S390_BFP_ROUND_PER_FPC; - } - - s390_vector_fp_convert(Iop_I64StoF64, Ity_I64, Ity_F64, v1, v2, m3, m4, m5); + s390_vector_fp_convert(Iop_I64StoF64, Ity_I64, Ity_F64, True, + v1, v2, m3, m4, m5); return "vcdg"; } @@ -18392,12 +18613,8 @@ s390_irgen_VCDLG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) { s390_insn_assert("vcdlg", m3 == 3); - if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { - emulation_warning(EmWarn_S390X_fpext_rounding); - m5 = S390_BFP_ROUND_PER_FPC; - } - - s390_vector_fp_convert(Iop_I64UtoF64, Ity_I64, Ity_F64, v1, v2, m3, m4, m5); + s390_vector_fp_convert(Iop_I64UtoF64, Ity_I64, Ity_F64, True, + v1, v2, m3, m4, m5); return "vcdlg"; } @@ -18407,12 +18624,8 @@ s390_irgen_VCGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) { s390_insn_assert("vcgd", m3 == 3); - if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { - emulation_warning(EmWarn_S390X_fpext_rounding); - m5 = S390_BFP_ROUND_PER_FPC; - } - - s390_vector_fp_convert(Iop_F64toI64S, Ity_F64, Ity_I64, v1, v2, m3, m4, m5); + s390_vector_fp_convert(Iop_F64toI64S, Ity_F64, Ity_I64, True, + v1, v2, m3, m4, m5); return "vcgd"; } @@ -18422,12 +18635,8 @@ s390_irgen_VCLGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) { s390_insn_assert("vclgd", m3 == 3); - if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { - emulation_warning(EmWarn_S390X_fpext_rounding); - m5 = S390_BFP_ROUND_PER_FPC; - } - - s390_vector_fp_convert(Iop_F64toI64U, Ity_F64, Ity_I64, v1, v2, m3, m4, m5); + s390_vector_fp_convert(Iop_F64toI64U, Ity_F64, Ity_I64, True, + v1, v2, m3, m4, m5); return "vclgd"; } @@ -18435,246 +18644,262 @@ s390_irgen_VCLGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) static const HChar * s390_irgen_VFI(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) { - s390_insn_assert("vfi", m3 == 3); + s390_insn_assert("vfi", + (m3 == 3 || (s390_host_has_vxe && m3 >= 2 && m3 <= 4))); - if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { - emulation_warning(EmWarn_S390X_fpext_rounding); - m5 = S390_BFP_ROUND_PER_FPC; + switch (m3) { + case 2: s390_vector_fp_convert(Iop_RoundF32toInt, Ity_F32, Ity_F32, True, + v1, v2, m3, m4, m5); break; + case 3: s390_vector_fp_convert(Iop_RoundF64toInt, Ity_F64, Ity_F64, True, + v1, v2, m3, m4, m5); break; + case 4: s390_vector_fp_convert(Iop_RoundF128toInt, Ity_F128, Ity_F128, True, + v1, v2, m3, m4, m5); break; } - s390_vector_fp_convert(Iop_RoundF64toInt, Ity_F64, Ity_F64, - v1, v2, m3, m4, m5); - - return "vcgld"; + return "vfi"; } static const HChar * -s390_irgen_VLDE(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +s390_irgen_VFLL(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) { - s390_insn_assert("vlde", m3 == 2); + s390_insn_assert("vfll", m3 == 2 || (s390_host_has_vxe && m3 == 3)); - s390_vector_fp_convert(Iop_F32toF64, Ity_F32, Ity_F64, v1, v2, m3, m4, m5); + if (m3 == 2) + s390_vector_fp_convert(Iop_F32toF64, Ity_F32, Ity_F64, False, + v1, v2, m3, m4, m5); + else + s390_vector_fp_convert(Iop_F64toF128, Ity_F64, Ity_F128, False, + v1, v2, m3, m4, m5); - return "vlde"; + return "vfll"; } static const HChar * -s390_irgen_VLED(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +s390_irgen_VFLR(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) { - s390_insn_assert("vled", m3 == 3); - - if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { - m5 = S390_BFP_ROUND_PER_FPC; - } + s390_insn_assert("vflr", m3 == 3 || (s390_host_has_vxe && m3 == 2)); - s390_vector_fp_convert(Iop_F64toF32, Ity_F64, Ity_F32, v1, v2, m3, m4, m5); + if (m3 == 3) + s390_vector_fp_convert(Iop_F64toF32, Ity_F64, Ity_F32, True, + v1, v2, m3, m4, m5); + else + s390_vector_fp_convert(Iop_F128toF64, Ity_F128, Ity_F64, True, + v1, v2, m3, m4, m5); - return "vled"; + return "vflr"; } static const HChar * s390_irgen_VFPSO(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) { - s390_insn_assert("vfpso", m3 == 3); - - IRExpr* result; - switch (m5) { - case 0: { - /* Invert sign */ - if (!s390_vr_is_single_element_control_set(m4)) { - result = unop(Iop_Neg64Fx2, get_vr_qw(v2)); - } - else { - result = binop(Iop_64HLtoV128, - unop(Iop_ReinterpF64asI64, - unop(Iop_NegF64, get_vr(v2, Ity_F64, 0))), - mkU64(0)); - } - break; - } + s390_insn_assert("vfpso", m5 <= 2 && + (m3 == 3 || (s390_host_has_vxe && m3 >= 2 && m3 <= 4))); - case 1: { - /* Set sign to negative */ - IRExpr* highHalf = mkU64(0x8000000000000000ULL); - if (!s390_vr_is_single_element_control_set(m4)) { - IRExpr* lowHalf = highHalf; - IRExpr* mask = binop(Iop_64HLtoV128, highHalf, lowHalf); - result = binop(Iop_OrV128, get_vr_qw(v2), mask); - } - else { - result = binop(Iop_64HLtoV128, - binop(Iop_Or64, get_vr_dw0(v2), highHalf), - mkU64(0ULL)); - } + Bool single = s390_vr_is_single_element_control_set(m4) || m3 == 4; + IRType type = single ? s390_vr_get_ftype(m3) : Ity_V128; + int idx = 2 * (m3 - 2) + (single ? 0 : 1); - break; - } - - case 2: { - /* Set sign to positive */ - if (!s390_vr_is_single_element_control_set(m4)) { - result = unop(Iop_Abs64Fx2, get_vr_qw(v2)); - } - else { - result = binop(Iop_64HLtoV128, - unop(Iop_ReinterpF64asI64, - unop(Iop_AbsF64, get_vr(v2, Ity_F64, 0))), - mkU64(0)); - } - - break; - } - - default: - vpanic("s390_irgen_VFPSO: Invalid m5 value"); - } + static const IROp negate_ops[] = { + Iop_NegF32, Iop_Neg32Fx4, + Iop_NegF64, Iop_Neg64Fx2, + Iop_NegF128 + }; + static const IROp abs_ops[] = { + Iop_AbsF32, Iop_Abs32Fx4, + Iop_AbsF64, Iop_Abs64Fx2, + Iop_AbsF128 + }; - put_vr_qw(v1, result); - if (s390_vr_is_single_element_control_set(m4)) { - put_vr_dw1(v1, mkU64(0ULL)); + if (m5 == 1) { + /* Set sign to negative */ + put_vr(v1, type, 0, + unop(negate_ops[idx], + unop(abs_ops[idx], get_vr(v2, type, 0)))); + } else { + /* m5 == 0: invert sign; m5 == 2: set sign to positive */ + const IROp *ops = m5 == 2 ? abs_ops : negate_ops; + put_vr(v1, type, 0, unop(ops[idx], get_vr(v2, type, 0))); } return "vfpso"; } -static void s390x_vec_fp_binary_op(IROp generalOp, IROp singleElementOp, - UChar v1, UChar v2, UChar v3, UChar m4, - UChar m5) +static const HChar * +s390x_vec_fp_binary_op(const HChar* mnm, const IROp ops[], + UChar v1, UChar v2, UChar v3, + UChar m4, UChar m5) { - IRExpr* result; - if (!s390_vr_is_single_element_control_set(m5)) { - result = triop(generalOp, get_bfp_rounding_mode_from_fpc(), - get_vr_qw(v2), get_vr_qw(v3)); + s390_insn_assert(mnm, (m5 & 7) == 0 && + (m4 == 3 || (s390_host_has_vxe && m4 >= 2 && m4 <= 4))); + + int idx = 2 * (m4 - 2); + + if (m4 == 4 || s390_vr_is_single_element_control_set(m5)) { + IRType type = s390_vr_get_ftype(m4); + put_vr(v1, type, 0, + triop(ops[idx], get_bfp_rounding_mode_from_fpc(), + get_vr(v2, type, 0), get_vr(v3, type, 0))); } else { - IRExpr* highHalf = triop(singleElementOp, - get_bfp_rounding_mode_from_fpc(), - get_vr(v2, Ity_F64, 0), - get_vr(v3, Ity_F64, 0)); - result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), - mkU64(0ULL)); + put_vr_qw(v1, triop(ops[idx + 1], get_bfp_rounding_mode_from_fpc(), + get_vr_qw(v2), get_vr_qw(v3))); } - put_vr_qw(v1, result); + return mnm; } -static void s390x_vec_fp_unary_op(IROp generalOp, IROp singleElementOp, - UChar v1, UChar v2, UChar m3, UChar m4) +static const HChar * +s390x_vec_fp_unary_op(const HChar* mnm, const IROp ops[], + UChar v1, UChar v2, UChar m3, UChar m4) { - IRExpr* result; - if (!s390_vr_is_single_element_control_set(m4)) { - result = binop(generalOp, get_bfp_rounding_mode_from_fpc(), - get_vr_qw(v2)); + s390_insn_assert(mnm, (m4 & 7) == 0 && + (m3 == 3 || (s390_host_has_vxe && m3 >= 2 && m3 <= 4))); + + int idx = 2 * (m3 - 2); + + if (m3 == 4 || s390_vr_is_single_element_control_set(m4)) { + IRType type = s390_vr_get_ftype(m3); + put_vr(v1, type, 0, + binop(ops[idx], get_bfp_rounding_mode_from_fpc(), + get_vr(v2, type, 0))); } else { - IRExpr* highHalf = binop(singleElementOp, - get_bfp_rounding_mode_from_fpc(), - get_vr(v2, Ity_F64, 0)); - result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), - mkU64(0ULL)); + put_vr_qw(v1, binop(ops[idx + 1], get_bfp_rounding_mode_from_fpc(), + get_vr_qw(v2))); } - put_vr_qw(v1, result); + return mnm; } -static void -s390_vector_fp_mulAddOrSub(IROp singleElementOp, - UChar v1, UChar v2, UChar v3, UChar v4, - UChar m5, UChar m6) +static const HChar * +s390_vector_fp_mulAddOrSub(UChar v1, UChar v2, UChar v3, UChar v4, + UChar m5, UChar m6, + const HChar* mnm, const IROp single_ops[], + Bool negate) { - Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); + s390_insn_assert(mnm, m6 == 3 || (s390_host_has_vxe && m6 >= 2 && m6 <= 4)); + + static const IROp negate_ops[] = { Iop_NegF32, Iop_NegF64, Iop_NegF128 }; + IRType type = s390_vr_get_ftype(m6); + Bool single = s390_vr_is_single_element_control_set(m5) || m6 == 4; + UChar n_elem = single ? 1 : s390_vr_get_n_elem(m6); IRTemp irrm_temp = newTemp(Ity_I32); assign(irrm_temp, get_bfp_rounding_mode_from_fpc()); IRExpr* irrm = mkexpr(irrm_temp); - IRExpr* result; - IRExpr* highHalf = qop(singleElementOp, - irrm, - get_vr(v2, Ity_F64, 0), - get_vr(v3, Ity_F64, 0), - get_vr(v4, Ity_F64, 0)); - - if (isSingleElementOp) { - result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), - mkU64(0ULL)); - } else { - IRExpr* lowHalf = qop(singleElementOp, - irrm, - get_vr(v2, Ity_F64, 1), - get_vr(v3, Ity_F64, 1), - get_vr(v4, Ity_F64, 1)); - result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), - unop(Iop_ReinterpF64asI64, lowHalf)); - } - put_vr_qw(v1, result); + for (UChar idx = 0; idx < n_elem; idx++) { + IRExpr* result = qop(single_ops[m6 - 2], + irrm, + get_vr(v2, type, idx), + get_vr(v3, type, idx), + get_vr(v4, type, idx)); + put_vr(v1, type, idx, negate ? unop(negate_ops[m6 - 2], result) : result); + } + return mnm; } static const HChar * s390_irgen_VFA(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) { - s390_insn_assert("vfa", m4 == 3); - s390x_vec_fp_binary_op(Iop_Add64Fx2, Iop_AddF64, v1, v2, v3, m4, m5); - return "vfa"; + static const IROp vfa_ops[] = { + Iop_AddF32, Iop_Add32Fx4, + Iop_AddF64, Iop_Add64Fx2, + Iop_AddF128, + }; + return s390x_vec_fp_binary_op("vfa", vfa_ops, v1, v2, v3, m4, m5); } static const HChar * s390_irgen_VFS(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) { - s390_insn_assert("vfs", m4 == 3); - s390x_vec_fp_binary_op(Iop_Sub64Fx2, Iop_SubF64, v1, v2, v3, m4, m5); - return "vfs"; + static const IROp vfs_ops[] = { + Iop_SubF32, Iop_Sub32Fx4, + Iop_SubF64, Iop_Sub64Fx2, + Iop_SubF128, + }; + return s390x_vec_fp_binary_op("vfs", vfs_ops, v1, v2, v3, m4, m5); } static const HChar * s390_irgen_VFM(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) { - s390_insn_assert("vfm", m4 == 3); - s390x_vec_fp_binary_op(Iop_Mul64Fx2, Iop_MulF64, v1, v2, v3, m4, m5); - return "vfm"; + static const IROp vfm_ops[] = { + Iop_MulF32, Iop_Mul32Fx4, + Iop_MulF64, Iop_Mul64Fx2, + Iop_MulF128, + }; + return s390x_vec_fp_binary_op("vfm", vfm_ops, v1, v2, v3, m4, m5); } static const HChar * s390_irgen_VFD(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) { - s390_insn_assert("vfd", m4 == 3); - s390x_vec_fp_binary_op(Iop_Div64Fx2, Iop_DivF64, v1, v2, v3, m4, m5); - return "vfd"; + static const IROp vfd_ops[] = { + Iop_DivF32, Iop_Div32Fx4, + Iop_DivF64, Iop_Div64Fx2, + Iop_DivF128, + }; + return s390x_vec_fp_binary_op("vfd", vfd_ops, v1, v2, v3, m4, m5); } static const HChar * s390_irgen_VFSQ(UChar v1, UChar v2, UChar m3, UChar m4) { - s390_insn_assert("vfsq", m3 == 3); - s390x_vec_fp_unary_op(Iop_Sqrt64Fx2, Iop_SqrtF64, v1, v2, m3, m4); - - return "vfsq"; + static const IROp vfsq_ops[] = { + Iop_SqrtF32, Iop_Sqrt32Fx4, + Iop_SqrtF64, Iop_Sqrt64Fx2, + Iop_SqrtF128 + }; + return s390x_vec_fp_unary_op("vfsq", vfsq_ops, v1, v2, m3, m4); } +static const IROp FMA_single_ops[] = { + Iop_MAddF32, Iop_MAddF64, Iop_MAddF128 +}; + static const HChar * s390_irgen_VFMA(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) { - s390_insn_assert("vfma", m6 == 3); - s390_vector_fp_mulAddOrSub(Iop_MAddF64, v1, v2, v3, v4, m5, m6); - return "vfma"; + return s390_vector_fp_mulAddOrSub(v1, v2, v3, v4, m5, m6, + "vfma", FMA_single_ops, False); } +static const HChar * +s390_irgen_VFNMA(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) +{ + return s390_vector_fp_mulAddOrSub(v1, v2, v3, v4, m5, m6, + "vfnma", FMA_single_ops, True); +} + +static const IROp FMS_single_ops[] = { + Iop_MSubF32, Iop_MSubF64, Iop_MSubF128 +}; + static const HChar * s390_irgen_VFMS(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) { - s390_insn_assert("vfms", m6 == 3); - s390_vector_fp_mulAddOrSub(Iop_MSubF64, v1, v2, v3, v4, m5, m6); - return "vfms"; + return s390_vector_fp_mulAddOrSub(v1, v2, v3, v4, m5, m6, + "vfms", FMS_single_ops, False); +} + +static const HChar * +s390_irgen_VFNMS(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) +{ + return s390_vector_fp_mulAddOrSub(v1, v2, v3, v4, m5, m6, + "vfnms", FMS_single_ops, True); } static const HChar * s390_irgen_WFC(UChar v1, UChar v2, UChar m3, UChar m4) { - s390_insn_assert("wfc", m3 == 3); - s390_insn_assert("wfc", m4 == 0); + s390_insn_assert("wfc", m4 == 0 && + (m3 == 3 || (s390_host_has_vxe && m3 >= 2 && m3 <= 4))); + + static const IROp ops[] = { Iop_CmpF32, Iop_CmpF64, Iop_CmpF128 }; + IRType type = s390_vr_get_ftype(m3); IRTemp cc_vex = newTemp(Ity_I32); - assign(cc_vex, binop(Iop_CmpF64, - get_vr(v1, Ity_F64, 0), get_vr(v2, Ity_F64, 0))); + assign(cc_vex, binop(ops[m3 - 2], get_vr(v1, type, 0), get_vr(v2, type, 0))); IRTemp cc_s390 = newTemp(Ity_I32); assign(cc_s390, convert_vex_bfpcc_to_s390(cc_vex)); @@ -18692,213 +18917,253 @@ s390_irgen_WFK(UChar v1, UChar v2, UChar m3, UChar m4) } static const HChar * -s390_irgen_VFCE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) +s390_irgen_VFCx(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6, + const HChar *mnem, IRCmpFResult cmp, Bool equal_ok, + IROp cmp32, IROp cmp64) { - s390_insn_assert("vfce", m4 == 3); + s390_insn_assert(mnem, (m5 & 3) == 0 && (m6 & 14) == 0 && + (m4 == 3 || (s390_host_has_vxe && m4 >= 2 && m4 <= 4))); - Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); - if (!s390_vr_is_cs_set(m6)) { - if (!isSingleElementOp) { - put_vr_qw(v1, binop(Iop_CmpEQ64Fx2, get_vr_qw(v2), get_vr_qw(v3))); + Bool single = s390_vr_is_single_element_control_set(m5) || m4 == 4; + + if (single) { + static const IROp ops[] = { Iop_CmpF32, Iop_CmpF64, Iop_CmpF128 }; + IRType type = s390_vr_get_ftype(m4); + IRTemp result = newTemp(Ity_I32); + IRTemp cond = newTemp(Ity_I1); + + assign(result, binop(ops[m4 - 2], + get_vr(v2, type, 0), get_vr(v3, type, 0))); + if (equal_ok) { + assign(cond, + binop(Iop_Or1, + binop(Iop_CmpEQ32, mkexpr(result), mkU32(cmp)), + binop(Iop_CmpEQ32, mkexpr(result), mkU32(Ircr_EQ)))); } else { - IRExpr* comparisonResult = binop(Iop_CmpF64, get_vr(v2, Ity_F64, 0), - get_vr(v3, Ity_F64, 0)); - IRExpr* result = mkite(binop(Iop_CmpEQ32, comparisonResult, - mkU32(Ircr_EQ)), - mkU64(0xffffffffffffffffULL), - mkU64(0ULL)); - put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL))); + assign(cond, binop(Iop_CmpEQ32, mkexpr(result), mkU32(cmp))); + } + put_vr_qw(v1, mkite(mkexpr(cond), + IRExpr_Const(IRConst_V128(0xffff)), + IRExpr_Const(IRConst_V128(0)))); + if (s390_vr_is_cs_set(m6)) { + IRTemp cc = newTemp(Ity_I64); + assign(cc, mkite(mkexpr(cond), mkU64(0), mkU64(3))); + s390_cc_set(cc); } } else { - IRDirty* d; - IRTemp cc = newTemp(Ity_I64); - - s390x_vec_op_details_t details = { .serialized = 0ULL }; - details.op = S390_VEC_OP_VFCE; - details.v1 = v1; - details.v2 = v2; - details.v3 = v3; - details.m4 = m4; - details.m5 = m5; - details.m6 = m6; + IRTemp result = newTemp(Ity_V128); + + assign(result, binop(m4 == 2 ? cmp32 : cmp64, + get_vr_qw(v2), get_vr_qw(v3))); + put_vr_qw(v1, mkexpr(result)); + if (s390_vr_is_cs_set(m6)) { + IRTemp cc = newTemp(Ity_I64); + assign(cc, + mkite(binop(Iop_CmpEQ64, + binop(Iop_And64, + unop(Iop_V128to64, mkexpr(result)), + unop(Iop_V128HIto64, mkexpr(result))), + mkU64(-1ULL)), + mkU64(0), /* all comparison results are true */ + mkite(binop(Iop_CmpEQ64, + binop(Iop_Or64, + unop(Iop_V128to64, mkexpr(result)), + unop(Iop_V128HIto64, mkexpr(result))), + mkU64(0)), + mkU64(3), /* all false */ + mkU64(1)))); /* mixed true/false */ + s390_cc_set(cc); + } + } - d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", - &s390x_dirtyhelper_vec_op, - mkIRExprVec_2(IRExpr_GSPTR(), - mkU64(details.serialized))); + return mnem; +} - const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128); - d->nFxState = 3; - vex_bzero(&d->fxState, sizeof(d->fxState)); - d->fxState[0].fx = Ifx_Read; - d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); - d->fxState[0].size = elementSize; - d->fxState[1].fx = Ifx_Read; - d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); - d->fxState[1].size = elementSize; - d->fxState[2].fx = Ifx_Write; - d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); - d->fxState[2].size = sizeof(V128); +static const HChar * +s390_irgen_VFCE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) +{ + return s390_irgen_VFCx(v1, v2, v3, m4, m5, m6, "vfce", Ircr_EQ, + False, Iop_CmpEQ32Fx4, Iop_CmpEQ64Fx2); +} - stmt(IRStmt_Dirty(d)); - s390_cc_set(cc); - } +static const HChar * +s390_irgen_VFCH(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) +{ + /* Swap arguments and compare "low" instead. */ + return s390_irgen_VFCx(v1, v3, v2, m4, m5, m6, "vfch", Ircr_LT, + False, Iop_CmpLT32Fx4, Iop_CmpLT64Fx2); +} - return "vfce"; +static const HChar * +s390_irgen_VFCHE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) +{ + /* Swap arguments and compare "low or equal" instead. */ + return s390_irgen_VFCx(v1, v3, v2, m4, m5, m6, "vfche", Ircr_LT, + True, Iop_CmpLE32Fx4, Iop_CmpLE64Fx2); } static const HChar * -s390_irgen_VFCH(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) +s390_irgen_VFTCI(UChar v1, UChar v2, UShort i3, UChar m4, UChar m5) { - vassert(m4 == 3); + s390_insn_assert("vftci", + (m4 == 3 || (s390_host_has_vxe && m4 >= 2 && m4 <= 4))); Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); - if (!s390_vr_is_cs_set(m6)) { - if (!isSingleElementOp) { - put_vr_qw(v1, binop(Iop_CmpLE64Fx2, get_vr_qw(v3), get_vr_qw(v2))); - } else { - IRExpr* comparisonResult = binop(Iop_CmpF64, get_vr(v2, Ity_F64, 0), - get_vr(v3, Ity_F64, 0)); - IRExpr* result = mkite(binop(Iop_CmpEQ32, comparisonResult, - mkU32(Ircr_GT)), - mkU64(0xffffffffffffffffULL), - mkU64(0ULL)); - put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL))); - } - } - else { - IRDirty* d; - IRTemp cc = newTemp(Ity_I64); - s390x_vec_op_details_t details = { .serialized = 0ULL }; - details.op = S390_VEC_OP_VFCH; - details.v1 = v1; - details.v2 = v2; - details.v3 = v3; - details.m4 = m4; - details.m5 = m5; - details.m6 = m6; + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); - d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", - &s390x_dirtyhelper_vec_op, - mkIRExprVec_2(IRExpr_GSPTR(), - mkU64(details.serialized))); + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VFTCI; + details.v1 = v1; + details.v2 = v2; + details.i3 = i3; + details.m4 = m4; + details.m5 = m5; - const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128); - d->nFxState = 3; - vex_bzero(&d->fxState, sizeof(d->fxState)); - d->fxState[0].fx = Ifx_Read; - d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); - d->fxState[0].size = elementSize; - d->fxState[1].fx = Ifx_Read; - d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); - d->fxState[1].size = elementSize; - d->fxState[2].fx = Ifx_Write; - d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); - d->fxState[2].size = sizeof(V128); + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); - stmt(IRStmt_Dirty(d)); - s390_cc_set(cc); - } + const UChar elementSize = isSingleElementOp ? + sizeofIRType(s390_vr_get_ftype(m4)) : sizeof(V128); + d->nFxState = 2; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = elementSize; + d->fxState[1].fx = Ifx_Write; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[1].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + s390_cc_set(cc); - return "vfch"; + return "vftci"; } static const HChar * -s390_irgen_VFCHE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) +s390_irgen_VFMIN(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) { - s390_insn_assert("vfche", m4 == 3); + s390_insn_assert("vfmin", + (m4 == 3 || (s390_host_has_vxe && m4 >= 2 && m4 <= 4))); Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); - if (!s390_vr_is_cs_set(m6)) { - if (!isSingleElementOp) { - put_vr_qw(v1, binop(Iop_CmpLT64Fx2, get_vr_qw(v3), get_vr_qw(v2))); - } - else { - IRExpr* comparisonResult = binop(Iop_CmpF64, get_vr(v3, Ity_F64, 0), - get_vr(v2, Ity_F64, 0)); - IRExpr* result = mkite(binop(Iop_CmpEQ32, comparisonResult, - mkU32(Ircr_LT)), - mkU64(0xffffffffffffffffULL), - mkU64(0ULL)); - put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL))); - } - } - else { - IRDirty* d; - IRTemp cc = newTemp(Ity_I64); - - s390x_vec_op_details_t details = { .serialized = 0ULL }; - details.op = S390_VEC_OP_VFCHE; - details.v1 = v1; - details.v2 = v2; - details.v3 = v3; - details.m4 = m4; - details.m5 = m5; - details.m6 = m6; + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); - d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", - &s390x_dirtyhelper_vec_op, - mkIRExprVec_2(IRExpr_GSPTR(), - mkU64(details.serialized))); + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VFMIN; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.m4 = m4; + details.m5 = m5; + details.m6 = m6; - const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128); - d->nFxState = 3; - vex_bzero(&d->fxState, sizeof(d->fxState)); - d->fxState[0].fx = Ifx_Read; - d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); - d->fxState[0].size = elementSize; - d->fxState[1].fx = Ifx_Read; - d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); - d->fxState[1].size = elementSize; - d->fxState[2].fx = Ifx_Write; - d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); - d->fxState[2].size = sizeof(V128); + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); - stmt(IRStmt_Dirty(d)); - s390_cc_set(cc); - } + const UChar elementSize = isSingleElementOp ? + sizeofIRType(s390_vr_get_ftype(m4)) : sizeof(V128); + d->nFxState = 3; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = elementSize; + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = elementSize; + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[2].size = sizeof(V128); - return "vfche"; + stmt(IRStmt_Dirty(d)); + s390_cc_set(cc); + return "vfmin"; } static const HChar * -s390_irgen_VFTCI(UChar v1, UChar v2, UShort i3, UChar m4, UChar m5) +s390_irgen_VFMAX(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) { - s390_insn_assert("vftci", m4 == 3); + s390_insn_assert("vfmax", + (m4 == 3 || (s390_host_has_vxe && m4 >= 2 && m4 <= 4))); Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); - IRDirty* d; IRTemp cc = newTemp(Ity_I64); s390x_vec_op_details_t details = { .serialized = 0ULL }; - details.op = S390_VEC_OP_VFTCI; + details.op = S390_VEC_OP_VFMAX; details.v1 = v1; details.v2 = v2; - details.i3 = i3; + details.v3 = v3; details.m4 = m4; details.m5 = m5; + details.m6 = m6; d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", &s390x_dirtyhelper_vec_op, mkIRExprVec_2(IRExpr_GSPTR(), mkU64(details.serialized))); - const UChar ... [truncated message content] |