From: Florian K. <fk...@so...> - 2025-07-31 20:50:17
|
https://sourceware.org/cgit/valgrind/commit/?id=96dfb3b4753a93db3fa0eed1c8bf73c568d3984a commit 96dfb3b4753a93db3fa0eed1c8bf73c568d3984a Author: Florian Krohm <fl...@ei...> Date: Thu Jul 31 20:45:29 2025 +0000 amd64 specific changes for BZ 507033 Rework code to use Iop_ClzNat64 instead of the deprecated Iop_Clz64. Likewise for Iop_Ctz64. Part of fixing https://bugs.kde.org/show_bug.cgi?id=507033 Diff: --- VEX/priv/guest_amd64_toIR.c | 39 +++++++++++++++++++-------------------- VEX/priv/host_amd64_isel.c | 4 ++-- memcheck/tests/vbit-test/irops.c | 8 ++++---- 3 files changed, 25 insertions(+), 26 deletions(-) diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c index 57a8a434b8..c98b55c063 100644 --- a/VEX/priv/guest_amd64_toIR.c +++ b/VEX/priv/guest_amd64_toIR.c @@ -5075,14 +5075,14 @@ static IRTemp gen_LZCNT ( IRType ty, IRTemp src ) binop(Iop_Shl64, mkexpr(src64), mkU8(64 - 8 * sizeofIRType(ty)))); - // Clz64 has undefined semantics when its input is zero, so - // special-case around that. + /* Guard against 0 input value. Use ClzNat64 operator for all other + values */ IRTemp res64 = newTemp(Ity_I64); assign(res64, IRExpr_ITE( binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0)), mkU64(8 * sizeofIRType(ty)), - unop(Iop_Clz64, mkexpr(src64x)) + unop(Iop_ClzNat64, mkexpr(src64x)) )); IRTemp res = newTemp(ty); @@ -5103,14 +5103,14 @@ static IRTemp gen_TZCNT ( IRType ty, IRTemp src ) IRTemp src64 = newTemp(Ity_I64); assign(src64, widenUto64( mkexpr(src) )); - // Ctz64 has undefined semantics when its input is zero, so - // special-case around that. + /* Guard against 0 input value. Use CtzNat64 operator for all other + values */ IRTemp res64 = newTemp(Ity_I64); assign(res64, IRExpr_ITE( binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0)), mkU64(8 * sizeofIRType(ty)), - unop(Iop_Ctz64, mkexpr(src64)) + unop(Iop_CtzNat64, mkexpr(src64)) )); IRTemp res = newTemp(ty); @@ -8421,30 +8421,28 @@ ULong dis_bs_E_G ( const VexAbiInfo* vbi, elimination of previous stores to this field work better. */ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); - /* Result: iff source value is zero, we can't use - Iop_Clz64/Iop_Ctz64 as they have no defined result in that case. - But anyway, amd64 semantics say the result is undefined in - such situations. Hence handle the zero case specially. */ + /* amd64 semantics say the result is undefined iff source value is + zero. Hence handle the zero case specially. */ /* Bleh. What we compute: bsf64: if src == 0 then {dst is unchanged} - else Ctz64(src) + else CtzNat64(src) bsr64: if src == 0 then {dst is unchanged} - else 63 - Clz64(src) + else 63 - ClzNat64(src) bsf32: if src == 0 then {dst is unchanged} - else Ctz64(32Uto64(src)) + else CtzNat64(32Uto64(src)) bsr32: if src == 0 then {dst is unchanged} - else 63 - Clz64(32Uto64(src)) + else 63 - ClzNat64(32Uto64(src)) bsf16: if src == 0 then {dst is unchanged} - else Ctz64(32Uto64(16Uto32(src))) + else CtzNat64(32Uto64(16Uto32(src))) bsr16: if src == 0 then {dst is unchanged} - else 63 - Clz64(32Uto64(16Uto32(src))) + else 63 - ClzNat64(32Uto64(16Uto32(src))) */ /* The main computation, guarding against zero. */ @@ -8452,10 +8450,10 @@ ULong dis_bs_E_G ( const VexAbiInfo* vbi, IRExpr_ITE( mkexpr(srcB), /* src != 0 */ - fwds ? unop(Iop_Ctz64, mkexpr(src64)) + fwds ? unop(Iop_CtzNat64, mkexpr(src64)) : binop(Iop_Sub64, mkU64(63), - unop(Iop_Clz64, mkexpr(src64))), + unop(Iop_ClzNat64, mkexpr(src64))), /* src == 0 -- leave dst unchanged */ widenUto64( getIRegG( sz, pfx, modrm ) ) ) @@ -18606,8 +18604,9 @@ static Long dis_PEXTRQ ( const VexAbiInfo* vbi, Prefix pfx, static IRExpr* math_CTZ32(IRExpr *exp) { - /* Iop_Ctz32 isn't implemented by the amd64 back end, so use Iop_Ctz64. */ - return unop(Iop_64to32, unop(Iop_Ctz64, unop(Iop_32Uto64, exp))); + /* Iop_CtzNat32 isn't implemented by the amd64 back end, so use + Iop_CtzNat64. */ + return unop(Iop_64to32, unop(Iop_CtzNat64, unop(Iop_32Uto64, exp))); } static Long dis_PCMPISTRI_3A ( UChar modrm, UInt regNoL, UInt regNoR, diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c index 21d20c77f0..f0e21ab983 100644 --- a/VEX/priv/host_amd64_isel.c +++ b/VEX/priv/host_amd64_isel.c @@ -1628,14 +1628,14 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e ) addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst)); return dst; } - case Iop_Ctz64: { + case Iop_CtzNat64: { /* Count trailing zeroes, implemented by amd64 'bsfq' */ HReg dst = newVRegI(env); HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); addInstr(env, AMD64Instr_Bsfr64(True,src,dst)); return dst; } - case Iop_Clz64: { + case Iop_ClzNat64: { /* Count leading zeroes. Do 'bsrq' to establish the index of the highest set bit, and subtract that value from 63. */ diff --git a/memcheck/tests/vbit-test/irops.c b/memcheck/tests/vbit-test/irops.c index 33c78fef1a..1794191a7f 100644 --- a/memcheck/tests/vbit-test/irops.c +++ b/memcheck/tests/vbit-test/irops.c @@ -108,13 +108,13 @@ static irop_t irops[] = { { DEFOP(Iop_MullU16, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, { DEFOP(Iop_MullU32, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 1 }, // mips asserts { DEFOP(Iop_MullU64, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 1 }, // ppc32, mips assert - { DEFOP(Iop_Clz64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 1 }, // ppc32 asserts + { DEFOP(Iop_Clz64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 1 }, // ppc32 asserts { DEFOP(Iop_Clz32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 }, - { DEFOP(Iop_Ctz64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, + { DEFOP(Iop_Ctz64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, { DEFOP(Iop_Ctz32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, - { DEFOP(Iop_ClzNat64, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, // ppc32 asserts + { DEFOP(Iop_ClzNat64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, // ppc32 asserts { DEFOP(Iop_ClzNat32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0 }, - { DEFOP(Iop_CtzNat64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, + { DEFOP(Iop_CtzNat64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, { DEFOP(Iop_CtzNat32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 1, .mips32 =0, .mips64 = 0 }, { DEFOP(Iop_PopCount64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, { DEFOP(Iop_PopCount32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0 }, |