From: eyal0 <109...@us...>
This fixes https://bugs.kde.org/show_bug.cgi?id=432801
To test:
```sh
make && perl tests/vg_regtest memcheck/tests/x86/pcmpgtd
```
---
.gitignore | 1 +
memcheck/mc_translate.c | 101 +++++++++++++++++-
memcheck/tests/amd64/Makefile.am | 6 +-
memcheck/tests/amd64/pcmpgtd.c | 134 ++++++++++++++++++++++++
memcheck/tests/amd64/pcmpgtd.stderr.exp | 44 ++++++++
memcheck/tests/amd64/pcmpgtd.vgtest | 2 +
6 files changed, 285 insertions(+), 3 deletions(-)
create mode 100644 memcheck/tests/amd64/pcmpgtd.c
create mode 100644 memcheck/tests/amd64/pcmpgtd.stderr.exp
create mode 100644 memcheck/tests/amd64/pcmpgtd.vgtest
diff --git a/.gitignore b/.gitignore
index b9fca3de3..fd1cf9ae5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -974,6 +974,7 @@
/memcheck/tests/amd64/insn-bsfl
/memcheck/tests/amd64/insn-pmovmskb
/memcheck/tests/amd64/insn-pcmpistri
+/memcheck/tests/amd64/pcmpgtd
/memcheck/tests/amd64/sh-mem-vec128
/memcheck/tests/amd64/sh-mem-vec256
/memcheck/tests/amd64/xsave-avx
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
index 516988bdd..51e94427e 100644
--- a/memcheck/mc_translate.c
+++ b/memcheck/mc_translate.c
@@ -1287,6 +1287,101 @@ static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
return final_cast;
}
+/* Check if we can know, despite the uncertain bits, that xx is greater than yy.
+ Notice that it's xx > yy and not the other way around. This is Intel syntax
+ with destination first. It will appear reversed in gdb disassembly (AT&T
+ syntax).
+ */
+static IRAtom* expensiveCmpGT ( MCEnv* mce,
+ unsigned int word_size,
+ Bool is_signed,
+ unsigned int count,
+ IRAtom* vxx, IRAtom* vyy,
+ IRAtom* xx, IRAtom* yy )
+{
+ IROp opAND, opOR, opXOR, opNOT, opEQ, opSHL, opGT;
+ IRType ty;
+
+ tl_assert(isShadowAtom(mce,vxx));
+ tl_assert(isShadowAtom(mce,vyy));
+ tl_assert(isOriginalAtom(mce,xx));
+ tl_assert(isOriginalAtom(mce,yy));
+ tl_assert(sameKindedAtoms(vxx,xx));
+ tl_assert(sameKindedAtoms(vyy,yy));
+
+ switch (word_size * count) {
+ case 128:
+ ty = Ity_V128;
+ opAND = Iop_AndV128;
+ opOR = Iop_OrV128;
+ opXOR = Iop_XorV128;
+ opNOT = Iop_NotV128;
+ break;
+ default:
+ VG_(tool_panic)("expensiveCmpGT");
+ }
+ if (word_size == 32 && count == 4) {
+ opEQ = Iop_CmpEQ32x4;
+ opSHL = Iop_ShlN32x4;
+ if (is_signed) {
+ opGT = Iop_CmpGT32Sx4;
+ } else {
+ opGT = Iop_CmpGT32Ux4;
+ }
+ } else {
+ VG_(tool_panic)("expensiveCmpGT");
+ }
+ IRAtom *MSBs;
+ if (is_signed) {
+ // For unsigned it's easy to make the min and max: Just set the unknown
+ // bits all to 0s or 1s. For signed it's harder because having a 1 in the
+ // MSB makes a number smaller, not larger! We can work around this by
+ // flipping the MSB before and after computing the min and max values.
+ IRAtom *const0 = mkV128(0);
+ IRAtom *all_ones = assignNew('V', mce, ty, binop(opEQ, const0, const0));
+ MSBs = assignNew('V', mce, ty, binop(opSHL, all_ones, mkU8(31)));
+ xx = assignNew('V', mce, ty, binop(opXOR, xx, MSBs));
+ yy = assignNew('V', mce, ty, binop(opXOR, yy, MSBs));
+ // From here on out, we're dealing with MSB-flipped integers.
+ }
+ // We can combine xx and vxx to create two values: the largest that xx could
+ // possibly be and the smallest that xx could possibly be. Likewise, we can
+ // do the same for yy. We'll call those max_xx and min_xx and max_yy and
+ // min_yy.
+ IRAtom *not_vxx = assignNew('V', mce, ty, unop(opNOT, vxx));
+ IRAtom *not_vyy = assignNew('V', mce, ty, unop(opNOT, vyy));
+ IRAtom *max_xx = assignNew('V', mce, ty, binop(opOR, xx, vxx));
+ IRAtom *min_xx = assignNew('V', mce, ty, binop(opAND, xx, not_vxx));
+ IRAtom *max_yy = assignNew('V', mce, ty, binop(opOR, yy, vyy));
+ IRAtom *min_yy = assignNew('V', mce, ty, binop(opAND, yy, not_vyy));
+ if (is_signed) {
+ // Unflip the MSBs.
+ max_xx = assignNew('V', mce, ty, binop(opXOR, max_xx, MSBs));
+ min_xx = assignNew('V', mce, ty, binop(opXOR, min_xx, MSBs));
+ max_yy = assignNew('V', mce, ty, binop(opXOR, max_yy, MSBs));
+ min_yy = assignNew('V', mce, ty, binop(opXOR, min_yy, MSBs));
+ }
+ IRAtom *min_xx_gt_max_yy = assignNew('V', mce, ty, binop(opGT, min_xx, max_yy));
+ IRAtom *max_xx_gt_min_yy = assignNew('V', mce, ty, binop(opGT, max_xx, min_yy));
+ // If min_xx is greater than max_yy then xx is surely greater than yy so we know
+ // our answer for sure. If max_xx is not greater than min_yy then xx can't
+ // possible be greater than yy so again we know the answer for sure. For all
+ // other cases, we can't know.
+ //
+ // So the result is defined if:
+ //
+ // min_xx_gt_max_yy | ~max_xx_gt_min_yy
+ //
+ // Because defined in vbits is 0s and not 1s, we need to invert that:
+ //
+ // ~(min_xx_gt_max_yy | ~max_xx_gt_min_yy)
+ //
+ // We can use DeMorgan's Law to simplify the above:
+ //
+ // ~min_xx_gt_max_yy & max_xx_gt_min_yy
+ IRAtom *not_min_xx_gt_max_yy = assignNew('V', mce, ty, unop(opNOT, min_xx_gt_max_yy));
+ return assignNew('V', mce, ty, binop(opAND, not_min_xx_gt_max_yy, max_xx_gt_min_yy));
+}
/* --------- Semi-accurate interpretation of CmpORD. --------- */
@@ -3947,9 +4042,13 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce,
case Iop_PwExtUSMulQAdd8x16:
return binary16Ix8(mce, vatom1, vatom2);
- case Iop_Sub32x4:
case Iop_CmpGT32Sx4:
+ return expensiveCmpGT(mce, 32, True /* signed */,
+ 4, vatom1, vatom2, atom1, atom2);
case Iop_CmpGT32Ux4:
+ return expensiveCmpGT(mce, 32, False /* unsigned */,
+ 4, vatom1, vatom2, atom1, atom2);
+ case Iop_Sub32x4:
case Iop_CmpEQ32x4:
case Iop_QAdd32Sx4:
case Iop_QAdd32Ux4:
diff --git a/memcheck/tests/amd64/Makefile.am b/memcheck/tests/amd64/Makefile.am
index da15cf797..e81ea74da 100644
--- a/memcheck/tests/amd64/Makefile.am
+++ b/memcheck/tests/amd64/Makefile.am
@@ -18,6 +18,7 @@ EXTRA_DIST = \
insn-pcmpistri.vgtest insn-pcmpistri.stdout.exp insn-pcmpistri.stderr.exp \
insn-pmovmskb.vgtest insn-pmovmskb.stdout.exp insn-pmovmskb.stderr.exp \
more_x87_fp.stderr.exp more_x87_fp.stdout.exp more_x87_fp.vgtest \
+ pcmpgtd.stderr.exp pcmpgtd.vgtest \
sh-mem-vec128-plo-no.vgtest \
sh-mem-vec128-plo-no.stderr.exp \
sh-mem-vec128-plo-no.stdout.exp \
@@ -43,6 +44,7 @@ check_PROGRAMS = \
fxsave-amd64 \
insn-bsfl \
insn-pmovmskb \
+ pcmpgtd \
sh-mem-vec128 \
sse_memory \
xor-undef-amd64
@@ -55,8 +57,8 @@ endif
# clang 3.5.0 barfs about -mfancy-math-387
if !COMPILER_IS_CLANG
check_PROGRAMS += \
- more_x87_fp \
- shr_edx
+ more_x87_fp \
+ shr_edx
endif
AM_CFLAGS += @FLAG_M64@
diff --git a/memcheck/tests/amd64/pcmpgtd.c b/memcheck/tests/amd64/pcmpgtd.c
new file mode 100644
index 000000000..891ebad35
--- /dev/null
+++ b/memcheck/tests/amd64/pcmpgtd.c
@@ -0,0 +1,134 @@
+/* https://bugs.kde.org/show_bug.cgi?id=432801 */
+
+#include <signal.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <ctype.h>
+
+#include "../../memcheck.h"
+
+// This function fails when compiled on clang version 10 or greater with -O2.
+// It's unused by the test but left here as a copy of the error in the bug
+// report https://bugs.kde.org/show_bug.cgi?id=432801
+void standalone() {
+ struct sigaction act;
+ if (sigaction(SIGTERM, 0, &act) == 1) {
+ return;
+ }
+ if (sigaction(SIGTERM, 0, &act) == 1) {
+ return;
+ }
+
+ char pattern[] = "\x1\x2\x3\x4\x5\x6\x7\x8\x9";
+ const unsigned long plen = strlen(pattern);
+ pattern[1] = 0;
+ size_t hp=0;
+ for (size_t i = 0; i < plen; ++i)
+ hp += pattern[i];
+ volatile int j = 0;
+ if (j == hp % 10) {
+ j++;
+ }
+ printf("%ld\n", hp);
+}
+
+typedef unsigned long ULong;
+
+typedef struct {
+ ULong w64[2]; /* Note: little-endian */
+} V128;
+
+static int cmpGT32Sx4(V128 x, V128 y)
+{
+ int result;
+ __asm__("movups %1,%%xmm6\n"
+ "\tmovups %2,%%xmm7\n"
+ // order swapped for AT&T style which has destination second.
+ "\tpcmpgtd %%xmm7,%%xmm6\n"
+ "\tmovd %%xmm6, %0"
+ : "=r" (result) : "m" (x), "m" (y) : "xmm6");
+ return result;
+}
+
+/* Set the V bits on the data at "addr". Note the convention: A zero
+ bit means "defined"; 1 means "undefined". */
+static void set_vbits(V128 *addr, V128 vbits)
+{
+ int i;
+ for (i=0 ; i<2 ; ++i) {
+ (void)VALGRIND_SET_VBITS(&addr->w64[i], &vbits.w64[i], sizeof(vbits.w64[i]));
+ }
+}
+
+/* Use a value that we know is invalid. */
+static void use(char *x, char* y, int invalid)
+{
+ /* Convince GCC it does not know what is in "invalid" so it cannot
+ possibly optimize away the conditional branch below. */
+ __asm__ ("" : "=r" (invalid) : "0" (invalid));
+
+ /* Create a conditional branch on which our output depends, so that
+ memcheck cannot possibly optimize it away, either. */
+ if (invalid) {
+ fprintf(stderr, "%s > %s == true\n", x, y);
+ } else {
+ fprintf(stderr, "%s > %s == false\n", x, y);
+ }
+}
+
+// Convert a string like "123XXX45" to a value and vbits.
+V128 string_to_v128(char *s) {
+ ULong x = 0;
+ ULong vx = 0;
+
+ for (; *s; s++) {
+ int lowered_c = tolower(*s);
+ x <<= 4;
+ vx <<= 4;
+ if (lowered_c == 'x') {
+ vx |= 0xf;
+ } else if (isdigit(lowered_c)) {
+ x |= lowered_c - '0';
+ } else if (lowered_c >= 'a' && lowered_c <= 'f') {
+ x |= lowered_c - 'a' + 0xa;
+ } else {
+ fprintf(stderr, "Not a hex digit: %c\n", *s);
+ exit(1);
+ }
+ }
+
+ V128 vx128 = { { vx, 0 } };
+ V128 x128 = { { x, 0 } };
+ set_vbits(&x128, vx128);
+ return x128;
+}
+
+static void doit(char *x, char *y) {
+ int result = cmpGT32Sx4(string_to_v128(x), string_to_v128(y));
+ use(x, y, result);
+}
+
+int main() {
+ // completely undefined
+ doit("xxxxxxxx", "xxxxxxxx");
+
+ // completely defined
+ doit("00000000", "00000000");
+ doit("00000000", "f0000000");
+ doit("f0000000", "00000000");
+
+ doit("00000000", "fxxxxxxx"); // defined: 0 > all negatives
+ doit("0xxxxxxx", "fxxxxxxx"); // defined: non-negatives > all negatives
+ doit("xxxxxxx0", "f0000000"); // undefined
+ doit("xxxxxxx1", "80000000"); // defined: ends with 1 > MIN_INT
+ doit("5xxxxxxx", "6xxxxxxx"); // defined
+ doit("8xxxxxxx", "9xxxxxxx"); // defined
+
+ doit("1234567x", "12345678"); // undefined
+ doit("1234567x", "1234567f"); // defined: x can't be more than f
+ doit("1234567x", "1234567e"); // undefined: x can be more than e
+
+ return 0;
+}
diff --git a/memcheck/tests/amd64/pcmpgtd.stderr.exp b/memcheck/tests/amd64/pcmpgtd.stderr.exp
new file mode 100644
index 000000000..bb248c60c
--- /dev/null
+++ b/memcheck/tests/amd64/pcmpgtd.stderr.exp
@@ -0,0 +1,44 @@
+
+Conditional jump or move depends on uninitialised value(s)
+ at 0x........: use (pcmpgtd.c:74)
+ by 0x........: doit (pcmpgtd.c:110)
+ by 0x........: main (pcmpgtd.c:115)
+
+xxxxxxxx > xxxxxxxx == false
+00000000 > 00000000 == false
+00000000 > f0000000 == true
+f0000000 > 00000000 == false
+00000000 > fxxxxxxx == true
+0xxxxxxx > fxxxxxxx == true
+Conditional jump or move depends on uninitialised value(s)
+ at 0x........: use (pcmpgtd.c:74)
+ by 0x........: doit (pcmpgtd.c:110)
+ by 0x........: main (pcmpgtd.c:124)
+
+xxxxxxx0 > f0000000 == true
+xxxxxxx1 > 80000000 == true
+5xxxxxxx > 6xxxxxxx == false
+8xxxxxxx > 9xxxxxxx == false
+Conditional jump or move depends on uninitialised value(s)
+ at 0x........: use (pcmpgtd.c:74)
+ by 0x........: doit (pcmpgtd.c:110)
+ by 0x........: main (pcmpgtd.c:129)
+
+1234567x > 12345678 == false
+1234567x > 1234567f == false
+Conditional jump or move depends on uninitialised value(s)
+ at 0x........: use (pcmpgtd.c:74)
+ by 0x........: doit (pcmpgtd.c:110)
+ by 0x........: main (pcmpgtd.c:131)
+
+1234567x > 1234567e == false
+
+HEAP SUMMARY:
+ in use at exit: 0 bytes in 0 blocks
+ total heap usage: 0 allocs, 0 frees, 0 bytes allocated
+
+For a detailed leak analysis, rerun with: --leak-check=full
+
+Use --track-origins=yes to see where uninitialised values come from
+For lists of detected and suppressed errors, rerun with: -s
+ERROR SUMMARY: 4 errors from 4 contexts (suppressed: 0 from 0)
diff --git a/memcheck/tests/amd64/pcmpgtd.vgtest b/memcheck/tests/amd64/pcmpgtd.vgtest
new file mode 100644
index 000000000..08fabeb76
--- /dev/null
+++ b/memcheck/tests/amd64/pcmpgtd.vgtest
@@ -0,0 +1,2 @@
+prog: pcmpgtd -q
+prereq: test -e pcmpgtd
--
2.20.1
|