|
From: Eyal S. <eya...@gm...> - 2021-03-04 21:58:00
|
From: eyal0 <109...@us...> This fixes https://bugs.kde.org/show_bug.cgi?id=432801 To test: ```sh make && perl tests/vg_regtest memcheck/tests/x86/pcmpgtd ``` --- .gitignore | 1 + memcheck/mc_translate.c | 101 +++++++++++++++++- memcheck/tests/amd64/Makefile.am | 6 +- memcheck/tests/amd64/pcmpgtd.c | 134 ++++++++++++++++++++++++ memcheck/tests/amd64/pcmpgtd.stderr.exp | 44 ++++++++ memcheck/tests/amd64/pcmpgtd.vgtest | 2 + 6 files changed, 285 insertions(+), 3 deletions(-) create mode 100644 memcheck/tests/amd64/pcmpgtd.c create mode 100644 memcheck/tests/amd64/pcmpgtd.stderr.exp create mode 100644 memcheck/tests/amd64/pcmpgtd.vgtest diff --git a/.gitignore b/.gitignore index b9fca3de3..fd1cf9ae5 100644 --- a/.gitignore +++ b/.gitignore @@ -974,6 +974,7 @@ /memcheck/tests/amd64/insn-bsfl /memcheck/tests/amd64/insn-pmovmskb /memcheck/tests/amd64/insn-pcmpistri +/memcheck/tests/amd64/pcmpgtd /memcheck/tests/amd64/sh-mem-vec128 /memcheck/tests/amd64/sh-mem-vec256 /memcheck/tests/amd64/xsave-avx diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c index 516988bdd..51e94427e 100644 --- a/memcheck/mc_translate.c +++ b/memcheck/mc_translate.c @@ -1287,6 +1287,101 @@ static IRAtom* expensiveCmpEQorNE ( MCEnv* mce, return final_cast; } +/* Check if we can know, despite the uncertain bits, that xx is greater than yy. + Notice that it's xx > yy and not the other way around. This is Intel syntax + with destination first. It will appear reversed in gdb disassembly (AT&T + syntax). + */ +static IRAtom* expensiveCmpGT ( MCEnv* mce, + unsigned int word_size, + Bool is_signed, + unsigned int count, + IRAtom* vxx, IRAtom* vyy, + IRAtom* xx, IRAtom* yy ) +{ + IROp opAND, opOR, opXOR, opNOT, opEQ, opSHL, opGT; + IRType ty; + + tl_assert(isShadowAtom(mce,vxx)); + tl_assert(isShadowAtom(mce,vyy)); + tl_assert(isOriginalAtom(mce,xx)); + tl_assert(isOriginalAtom(mce,yy)); + tl_assert(sameKindedAtoms(vxx,xx)); + tl_assert(sameKindedAtoms(vyy,yy)); + + switch (word_size * count) { + case 128: + ty = Ity_V128; + opAND = Iop_AndV128; + opOR = Iop_OrV128; + opXOR = Iop_XorV128; + opNOT = Iop_NotV128; + break; + default: + VG_(tool_panic)("expensiveCmpGT"); + } + if (word_size == 32 && count == 4) { + opEQ = Iop_CmpEQ32x4; + opSHL = Iop_ShlN32x4; + if (is_signed) { + opGT = Iop_CmpGT32Sx4; + } else { + opGT = Iop_CmpGT32Ux4; + } + } else { + VG_(tool_panic)("expensiveCmpGT"); + } + IRAtom *MSBs; + if (is_signed) { + // For unsigned it's easy to make the min and max: Just set the unknown + // bits all to 0s or 1s. For signed it's harder because having a 1 in the + // MSB makes a number smaller, not larger! We can work around this by + // flipping the MSB before and after computing the min and max values. + IRAtom *const0 = mkV128(0); + IRAtom *all_ones = assignNew('V', mce, ty, binop(opEQ, const0, const0)); + MSBs = assignNew('V', mce, ty, binop(opSHL, all_ones, mkU8(31))); + xx = assignNew('V', mce, ty, binop(opXOR, xx, MSBs)); + yy = assignNew('V', mce, ty, binop(opXOR, yy, MSBs)); + // From here on out, we're dealing with MSB-flipped integers. + } + // We can combine xx and vxx to create two values: the largest that xx could + // possibly be and the smallest that xx could possibly be. Likewise, we can + // do the same for yy. We'll call those max_xx and min_xx and max_yy and + // min_yy. + IRAtom *not_vxx = assignNew('V', mce, ty, unop(opNOT, vxx)); + IRAtom *not_vyy = assignNew('V', mce, ty, unop(opNOT, vyy)); + IRAtom *max_xx = assignNew('V', mce, ty, binop(opOR, xx, vxx)); + IRAtom *min_xx = assignNew('V', mce, ty, binop(opAND, xx, not_vxx)); + IRAtom *max_yy = assignNew('V', mce, ty, binop(opOR, yy, vyy)); + IRAtom *min_yy = assignNew('V', mce, ty, binop(opAND, yy, not_vyy)); + if (is_signed) { + // Unflip the MSBs. + max_xx = assignNew('V', mce, ty, binop(opXOR, max_xx, MSBs)); + min_xx = assignNew('V', mce, ty, binop(opXOR, min_xx, MSBs)); + max_yy = assignNew('V', mce, ty, binop(opXOR, max_yy, MSBs)); + min_yy = assignNew('V', mce, ty, binop(opXOR, min_yy, MSBs)); + } + IRAtom *min_xx_gt_max_yy = assignNew('V', mce, ty, binop(opGT, min_xx, max_yy)); + IRAtom *max_xx_gt_min_yy = assignNew('V', mce, ty, binop(opGT, max_xx, min_yy)); + // If min_xx is greater than max_yy then xx is surely greater than yy so we know + // our answer for sure. If max_xx is not greater than min_yy then xx can't + // possible be greater than yy so again we know the answer for sure. For all + // other cases, we can't know. + // + // So the result is defined if: + // + // min_xx_gt_max_yy | ~max_xx_gt_min_yy + // + // Because defined in vbits is 0s and not 1s, we need to invert that: + // + // ~(min_xx_gt_max_yy | ~max_xx_gt_min_yy) + // + // We can use DeMorgan's Law to simplify the above: + // + // ~min_xx_gt_max_yy & max_xx_gt_min_yy + IRAtom *not_min_xx_gt_max_yy = assignNew('V', mce, ty, unop(opNOT, min_xx_gt_max_yy)); + return assignNew('V', mce, ty, binop(opAND, not_min_xx_gt_max_yy, max_xx_gt_min_yy)); +} /* --------- Semi-accurate interpretation of CmpORD. --------- */ @@ -3947,9 +4042,13 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, case Iop_PwExtUSMulQAdd8x16: return binary16Ix8(mce, vatom1, vatom2); - case Iop_Sub32x4: case Iop_CmpGT32Sx4: + return expensiveCmpGT(mce, 32, True /* signed */, + 4, vatom1, vatom2, atom1, atom2); case Iop_CmpGT32Ux4: + return expensiveCmpGT(mce, 32, False /* unsigned */, + 4, vatom1, vatom2, atom1, atom2); + case Iop_Sub32x4: case Iop_CmpEQ32x4: case Iop_QAdd32Sx4: case Iop_QAdd32Ux4: diff --git a/memcheck/tests/amd64/Makefile.am b/memcheck/tests/amd64/Makefile.am index da15cf797..e81ea74da 100644 --- a/memcheck/tests/amd64/Makefile.am +++ b/memcheck/tests/amd64/Makefile.am @@ -18,6 +18,7 @@ EXTRA_DIST = \ insn-pcmpistri.vgtest insn-pcmpistri.stdout.exp insn-pcmpistri.stderr.exp \ insn-pmovmskb.vgtest insn-pmovmskb.stdout.exp insn-pmovmskb.stderr.exp \ more_x87_fp.stderr.exp more_x87_fp.stdout.exp more_x87_fp.vgtest \ + pcmpgtd.stderr.exp pcmpgtd.vgtest \ sh-mem-vec128-plo-no.vgtest \ sh-mem-vec128-plo-no.stderr.exp \ sh-mem-vec128-plo-no.stdout.exp \ @@ -43,6 +44,7 @@ check_PROGRAMS = \ fxsave-amd64 \ insn-bsfl \ insn-pmovmskb \ + pcmpgtd \ sh-mem-vec128 \ sse_memory \ xor-undef-amd64 @@ -55,8 +57,8 @@ endif # clang 3.5.0 barfs about -mfancy-math-387 if !COMPILER_IS_CLANG check_PROGRAMS += \ - more_x87_fp \ - shr_edx + more_x87_fp \ + shr_edx endif AM_CFLAGS += @FLAG_M64@ diff --git a/memcheck/tests/amd64/pcmpgtd.c b/memcheck/tests/amd64/pcmpgtd.c new file mode 100644 index 000000000..891ebad35 --- /dev/null +++ b/memcheck/tests/amd64/pcmpgtd.c @@ -0,0 +1,134 @@ +/* https://bugs.kde.org/show_bug.cgi?id=432801 */ + +#include <signal.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <ctype.h> + +#include "../../memcheck.h" + +// This function fails when compiled on clang version 10 or greater with -O2. +// It's unused by the test but left here as a copy of the error in the bug +// report https://bugs.kde.org/show_bug.cgi?id=432801 +void standalone() { + struct sigaction act; + if (sigaction(SIGTERM, 0, &act) == 1) { + return; + } + if (sigaction(SIGTERM, 0, &act) == 1) { + return; + } + + char pattern[] = "\x1\x2\x3\x4\x5\x6\x7\x8\x9"; + const unsigned long plen = strlen(pattern); + pattern[1] = 0; + size_t hp=0; + for (size_t i = 0; i < plen; ++i) + hp += pattern[i]; + volatile int j = 0; + if (j == hp % 10) { + j++; + } + printf("%ld\n", hp); +} + +typedef unsigned long ULong; + +typedef struct { + ULong w64[2]; /* Note: little-endian */ +} V128; + +static int cmpGT32Sx4(V128 x, V128 y) +{ + int result; + __asm__("movups %1,%%xmm6\n" + "\tmovups %2,%%xmm7\n" + // order swapped for AT&T style which has destination second. + "\tpcmpgtd %%xmm7,%%xmm6\n" + "\tmovd %%xmm6, %0" + : "=r" (result) : "m" (x), "m" (y) : "xmm6"); + return result; +} + +/* Set the V bits on the data at "addr". Note the convention: A zero + bit means "defined"; 1 means "undefined". */ +static void set_vbits(V128 *addr, V128 vbits) +{ + int i; + for (i=0 ; i<2 ; ++i) { + (void)VALGRIND_SET_VBITS(&addr->w64[i], &vbits.w64[i], sizeof(vbits.w64[i])); + } +} + +/* Use a value that we know is invalid. */ +static void use(char *x, char* y, int invalid) +{ + /* Convince GCC it does not know what is in "invalid" so it cannot + possibly optimize away the conditional branch below. */ + __asm__ ("" : "=r" (invalid) : "0" (invalid)); + + /* Create a conditional branch on which our output depends, so that + memcheck cannot possibly optimize it away, either. */ + if (invalid) { + fprintf(stderr, "%s > %s == true\n", x, y); + } else { + fprintf(stderr, "%s > %s == false\n", x, y); + } +} + +// Convert a string like "123XXX45" to a value and vbits. +V128 string_to_v128(char *s) { + ULong x = 0; + ULong vx = 0; + + for (; *s; s++) { + int lowered_c = tolower(*s); + x <<= 4; + vx <<= 4; + if (lowered_c == 'x') { + vx |= 0xf; + } else if (isdigit(lowered_c)) { + x |= lowered_c - '0'; + } else if (lowered_c >= 'a' && lowered_c <= 'f') { + x |= lowered_c - 'a' + 0xa; + } else { + fprintf(stderr, "Not a hex digit: %c\n", *s); + exit(1); + } + } + + V128 vx128 = { { vx, 0 } }; + V128 x128 = { { x, 0 } }; + set_vbits(&x128, vx128); + return x128; +} + +static void doit(char *x, char *y) { + int result = cmpGT32Sx4(string_to_v128(x), string_to_v128(y)); + use(x, y, result); +} + +int main() { + // completely undefined + doit("xxxxxxxx", "xxxxxxxx"); + + // completely defined + doit("00000000", "00000000"); + doit("00000000", "f0000000"); + doit("f0000000", "00000000"); + + doit("00000000", "fxxxxxxx"); // defined: 0 > all negatives + doit("0xxxxxxx", "fxxxxxxx"); // defined: non-negatives > all negatives + doit("xxxxxxx0", "f0000000"); // undefined + doit("xxxxxxx1", "80000000"); // defined: ends with 1 > MIN_INT + doit("5xxxxxxx", "6xxxxxxx"); // defined + doit("8xxxxxxx", "9xxxxxxx"); // defined + + doit("1234567x", "12345678"); // undefined + doit("1234567x", "1234567f"); // defined: x can't be more than f + doit("1234567x", "1234567e"); // undefined: x can be more than e + + return 0; +} diff --git a/memcheck/tests/amd64/pcmpgtd.stderr.exp b/memcheck/tests/amd64/pcmpgtd.stderr.exp new file mode 100644 index 000000000..bb248c60c --- /dev/null +++ b/memcheck/tests/amd64/pcmpgtd.stderr.exp @@ -0,0 +1,44 @@ + +Conditional jump or move depends on uninitialised value(s) + at 0x........: use (pcmpgtd.c:74) + by 0x........: doit (pcmpgtd.c:110) + by 0x........: main (pcmpgtd.c:115) + +xxxxxxxx > xxxxxxxx == false +00000000 > 00000000 == false +00000000 > f0000000 == true +f0000000 > 00000000 == false +00000000 > fxxxxxxx == true +0xxxxxxx > fxxxxxxx == true +Conditional jump or move depends on uninitialised value(s) + at 0x........: use (pcmpgtd.c:74) + by 0x........: doit (pcmpgtd.c:110) + by 0x........: main (pcmpgtd.c:124) + +xxxxxxx0 > f0000000 == true +xxxxxxx1 > 80000000 == true +5xxxxxxx > 6xxxxxxx == false +8xxxxxxx > 9xxxxxxx == false +Conditional jump or move depends on uninitialised value(s) + at 0x........: use (pcmpgtd.c:74) + by 0x........: doit (pcmpgtd.c:110) + by 0x........: main (pcmpgtd.c:129) + +1234567x > 12345678 == false +1234567x > 1234567f == false +Conditional jump or move depends on uninitialised value(s) + at 0x........: use (pcmpgtd.c:74) + by 0x........: doit (pcmpgtd.c:110) + by 0x........: main (pcmpgtd.c:131) + +1234567x > 1234567e == false + +HEAP SUMMARY: + in use at exit: 0 bytes in 0 blocks + total heap usage: 0 allocs, 0 frees, 0 bytes allocated + +For a detailed leak analysis, rerun with: --leak-check=full + +Use --track-origins=yes to see where uninitialised values come from +For lists of detected and suppressed errors, rerun with: -s +ERROR SUMMARY: 4 errors from 4 contexts (suppressed: 0 from 0) diff --git a/memcheck/tests/amd64/pcmpgtd.vgtest b/memcheck/tests/amd64/pcmpgtd.vgtest new file mode 100644 index 000000000..08fabeb76 --- /dev/null +++ b/memcheck/tests/amd64/pcmpgtd.vgtest @@ -0,0 +1,2 @@ +prog: pcmpgtd -q +prereq: test -e pcmpgtd -- 2.20.1 |