You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
1
(5) |
2
(10) |
3
(9) |
4
(8) |
5
(2) |
6
|
|
7
|
8
(1) |
9
(4) |
10
(2) |
11
|
12
(1) |
13
(2) |
|
14
|
15
(7) |
16
(1) |
17
(9) |
18
(1) |
19
(4) |
20
(4) |
|
21
(1) |
22
(3) |
23
(1) |
24
|
25
|
26
|
27
|
|
28
|
29
(2) |
30
(2) |
31
(6) |
|
|
|
|
From: Eyal S. <eya...@gm...> - 2021-03-04 22:08:20
|
From: eyal0 <109...@us...> This fixes https://bugs.kde.org/show_bug.cgi?id=432801 To test: ```sh make && perl tests/vg_regtest memcheck/tests/x86/pcmpgtd ``` --- .gitignore | 1 + memcheck/mc_translate.c | 101 +++++++++++++++++- memcheck/tests/amd64/Makefile.am | 6 +- memcheck/tests/amd64/pcmpgtd.c | 134 ++++++++++++++++++++++++ memcheck/tests/amd64/pcmpgtd.stderr.exp | 44 ++++++++ memcheck/tests/amd64/pcmpgtd.vgtest | 2 + 6 files changed, 285 insertions(+), 3 deletions(-) create mode 100644 memcheck/tests/amd64/pcmpgtd.c create mode 100644 memcheck/tests/amd64/pcmpgtd.stderr.exp create mode 100644 memcheck/tests/amd64/pcmpgtd.vgtest diff --git a/.gitignore b/.gitignore index b9fca3de3..fd1cf9ae5 100644 --- a/.gitignore +++ b/.gitignore @@ -974,6 +974,7 @@ /memcheck/tests/amd64/insn-bsfl /memcheck/tests/amd64/insn-pmovmskb /memcheck/tests/amd64/insn-pcmpistri +/memcheck/tests/amd64/pcmpgtd /memcheck/tests/amd64/sh-mem-vec128 /memcheck/tests/amd64/sh-mem-vec256 /memcheck/tests/amd64/xsave-avx diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c index 516988bdd..51e94427e 100644 --- a/memcheck/mc_translate.c +++ b/memcheck/mc_translate.c @@ -1287,6 +1287,101 @@ static IRAtom* expensiveCmpEQorNE ( MCEnv* mce, return final_cast; } +/* Check if we can know, despite the uncertain bits, that xx is greater than yy. + Notice that it's xx > yy and not the other way around. This is Intel syntax + with destination first. It will appear reversed in gdb disassembly (AT&T + syntax). + */ +static IRAtom* expensiveCmpGT ( MCEnv* mce, + unsigned int word_size, + Bool is_signed, + unsigned int count, + IRAtom* vxx, IRAtom* vyy, + IRAtom* xx, IRAtom* yy ) +{ + IROp opAND, opOR, opXOR, opNOT, opEQ, opSHL, opGT; + IRType ty; + + tl_assert(isShadowAtom(mce,vxx)); + tl_assert(isShadowAtom(mce,vyy)); + tl_assert(isOriginalAtom(mce,xx)); + tl_assert(isOriginalAtom(mce,yy)); + tl_assert(sameKindedAtoms(vxx,xx)); + tl_assert(sameKindedAtoms(vyy,yy)); + + switch (word_size * count) { + case 128: + ty = Ity_V128; + opAND = Iop_AndV128; + opOR = Iop_OrV128; + opXOR = Iop_XorV128; + opNOT = Iop_NotV128; + break; + default: + VG_(tool_panic)("expensiveCmpGT"); + } + if (word_size == 32 && count == 4) { + opEQ = Iop_CmpEQ32x4; + opSHL = Iop_ShlN32x4; + if (is_signed) { + opGT = Iop_CmpGT32Sx4; + } else { + opGT = Iop_CmpGT32Ux4; + } + } else { + VG_(tool_panic)("expensiveCmpGT"); + } + IRAtom *MSBs; + if (is_signed) { + // For unsigned it's easy to make the min and max: Just set the unknown + // bits all to 0s or 1s. For signed it's harder because having a 1 in the + // MSB makes a number smaller, not larger! We can work around this by + // flipping the MSB before and after computing the min and max values. + IRAtom *const0 = mkV128(0); + IRAtom *all_ones = assignNew('V', mce, ty, binop(opEQ, const0, const0)); + MSBs = assignNew('V', mce, ty, binop(opSHL, all_ones, mkU8(31))); + xx = assignNew('V', mce, ty, binop(opXOR, xx, MSBs)); + yy = assignNew('V', mce, ty, binop(opXOR, yy, MSBs)); + // From here on out, we're dealing with MSB-flipped integers. + } + // We can combine xx and vxx to create two values: the largest that xx could + // possibly be and the smallest that xx could possibly be. Likewise, we can + // do the same for yy. We'll call those max_xx and min_xx and max_yy and + // min_yy. + IRAtom *not_vxx = assignNew('V', mce, ty, unop(opNOT, vxx)); + IRAtom *not_vyy = assignNew('V', mce, ty, unop(opNOT, vyy)); + IRAtom *max_xx = assignNew('V', mce, ty, binop(opOR, xx, vxx)); + IRAtom *min_xx = assignNew('V', mce, ty, binop(opAND, xx, not_vxx)); + IRAtom *max_yy = assignNew('V', mce, ty, binop(opOR, yy, vyy)); + IRAtom *min_yy = assignNew('V', mce, ty, binop(opAND, yy, not_vyy)); + if (is_signed) { + // Unflip the MSBs. + max_xx = assignNew('V', mce, ty, binop(opXOR, max_xx, MSBs)); + min_xx = assignNew('V', mce, ty, binop(opXOR, min_xx, MSBs)); + max_yy = assignNew('V', mce, ty, binop(opXOR, max_yy, MSBs)); + min_yy = assignNew('V', mce, ty, binop(opXOR, min_yy, MSBs)); + } + IRAtom *min_xx_gt_max_yy = assignNew('V', mce, ty, binop(opGT, min_xx, max_yy)); + IRAtom *max_xx_gt_min_yy = assignNew('V', mce, ty, binop(opGT, max_xx, min_yy)); + // If min_xx is greater than max_yy then xx is surely greater than yy so we know + // our answer for sure. If max_xx is not greater than min_yy then xx can't + // possible be greater than yy so again we know the answer for sure. For all + // other cases, we can't know. + // + // So the result is defined if: + // + // min_xx_gt_max_yy | ~max_xx_gt_min_yy + // + // Because defined in vbits is 0s and not 1s, we need to invert that: + // + // ~(min_xx_gt_max_yy | ~max_xx_gt_min_yy) + // + // We can use DeMorgan's Law to simplify the above: + // + // ~min_xx_gt_max_yy & max_xx_gt_min_yy + IRAtom *not_min_xx_gt_max_yy = assignNew('V', mce, ty, unop(opNOT, min_xx_gt_max_yy)); + return assignNew('V', mce, ty, binop(opAND, not_min_xx_gt_max_yy, max_xx_gt_min_yy)); +} /* --------- Semi-accurate interpretation of CmpORD. --------- */ @@ -3947,9 +4042,13 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, case Iop_PwExtUSMulQAdd8x16: return binary16Ix8(mce, vatom1, vatom2); - case Iop_Sub32x4: case Iop_CmpGT32Sx4: + return expensiveCmpGT(mce, 32, True /* signed */, + 4, vatom1, vatom2, atom1, atom2); case Iop_CmpGT32Ux4: + return expensiveCmpGT(mce, 32, False /* unsigned */, + 4, vatom1, vatom2, atom1, atom2); + case Iop_Sub32x4: case Iop_CmpEQ32x4: case Iop_QAdd32Sx4: case Iop_QAdd32Ux4: diff --git a/memcheck/tests/amd64/Makefile.am b/memcheck/tests/amd64/Makefile.am index da15cf797..e81ea74da 100644 --- a/memcheck/tests/amd64/Makefile.am +++ b/memcheck/tests/amd64/Makefile.am @@ -18,6 +18,7 @@ EXTRA_DIST = \ insn-pcmpistri.vgtest insn-pcmpistri.stdout.exp insn-pcmpistri.stderr.exp \ insn-pmovmskb.vgtest insn-pmovmskb.stdout.exp insn-pmovmskb.stderr.exp \ more_x87_fp.stderr.exp more_x87_fp.stdout.exp more_x87_fp.vgtest \ + pcmpgtd.stderr.exp pcmpgtd.vgtest \ sh-mem-vec128-plo-no.vgtest \ sh-mem-vec128-plo-no.stderr.exp \ sh-mem-vec128-plo-no.stdout.exp \ @@ -43,6 +44,7 @@ check_PROGRAMS = \ fxsave-amd64 \ insn-bsfl \ insn-pmovmskb \ + pcmpgtd \ sh-mem-vec128 \ sse_memory \ xor-undef-amd64 @@ -55,8 +57,8 @@ endif # clang 3.5.0 barfs about -mfancy-math-387 if !COMPILER_IS_CLANG check_PROGRAMS += \ - more_x87_fp \ - shr_edx + more_x87_fp \ + shr_edx endif AM_CFLAGS += @FLAG_M64@ diff --git a/memcheck/tests/amd64/pcmpgtd.c b/memcheck/tests/amd64/pcmpgtd.c new file mode 100644 index 000000000..891ebad35 --- /dev/null +++ b/memcheck/tests/amd64/pcmpgtd.c @@ -0,0 +1,134 @@ +/* https://bugs.kde.org/show_bug.cgi?id=432801 */ + +#include <signal.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <ctype.h> + +#include "../../memcheck.h" + +// This function fails when compiled on clang version 10 or greater with -O2. +// It's unused by the test but left here as a copy of the error in the bug +// report https://bugs.kde.org/show_bug.cgi?id=432801 +void standalone() { + struct sigaction act; + if (sigaction(SIGTERM, 0, &act) == 1) { + return; + } + if (sigaction(SIGTERM, 0, &act) == 1) { + return; + } + + char pattern[] = "\x1\x2\x3\x4\x5\x6\x7\x8\x9"; + const unsigned long plen = strlen(pattern); + pattern[1] = 0; + size_t hp=0; + for (size_t i = 0; i < plen; ++i) + hp += pattern[i]; + volatile int j = 0; + if (j == hp % 10) { + j++; + } + printf("%ld\n", hp); +} + +typedef unsigned long ULong; + +typedef struct { + ULong w64[2]; /* Note: little-endian */ +} V128; + +static int cmpGT32Sx4(V128 x, V128 y) +{ + int result; + __asm__("movups %1,%%xmm6\n" + "\tmovups %2,%%xmm7\n" + // order swapped for AT&T style which has destination second. + "\tpcmpgtd %%xmm7,%%xmm6\n" + "\tmovd %%xmm6, %0" + : "=r" (result) : "m" (x), "m" (y) : "xmm6"); + return result; +} + +/* Set the V bits on the data at "addr". Note the convention: A zero + bit means "defined"; 1 means "undefined". */ +static void set_vbits(V128 *addr, V128 vbits) +{ + int i; + for (i=0 ; i<2 ; ++i) { + (void)VALGRIND_SET_VBITS(&addr->w64[i], &vbits.w64[i], sizeof(vbits.w64[i])); + } +} + +/* Use a value that we know is invalid. */ +static void use(char *x, char* y, int invalid) +{ + /* Convince GCC it does not know what is in "invalid" so it cannot + possibly optimize away the conditional branch below. */ + __asm__ ("" : "=r" (invalid) : "0" (invalid)); + + /* Create a conditional branch on which our output depends, so that + memcheck cannot possibly optimize it away, either. */ + if (invalid) { + fprintf(stderr, "%s > %s == true\n", x, y); + } else { + fprintf(stderr, "%s > %s == false\n", x, y); + } +} + +// Convert a string like "123XXX45" to a value and vbits. +V128 string_to_v128(char *s) { + ULong x = 0; + ULong vx = 0; + + for (; *s; s++) { + int lowered_c = tolower(*s); + x <<= 4; + vx <<= 4; + if (lowered_c == 'x') { + vx |= 0xf; + } else if (isdigit(lowered_c)) { + x |= lowered_c - '0'; + } else if (lowered_c >= 'a' && lowered_c <= 'f') { + x |= lowered_c - 'a' + 0xa; + } else { + fprintf(stderr, "Not a hex digit: %c\n", *s); + exit(1); + } + } + + V128 vx128 = { { vx, 0 } }; + V128 x128 = { { x, 0 } }; + set_vbits(&x128, vx128); + return x128; +} + +static void doit(char *x, char *y) { + int result = cmpGT32Sx4(string_to_v128(x), string_to_v128(y)); + use(x, y, result); +} + +int main() { + // completely undefined + doit("xxxxxxxx", "xxxxxxxx"); + + // completely defined + doit("00000000", "00000000"); + doit("00000000", "f0000000"); + doit("f0000000", "00000000"); + + doit("00000000", "fxxxxxxx"); // defined: 0 > all negatives + doit("0xxxxxxx", "fxxxxxxx"); // defined: non-negatives > all negatives + doit("xxxxxxx0", "f0000000"); // undefined + doit("xxxxxxx1", "80000000"); // defined: ends with 1 > MIN_INT + doit("5xxxxxxx", "6xxxxxxx"); // defined + doit("8xxxxxxx", "9xxxxxxx"); // defined + + doit("1234567x", "12345678"); // undefined + doit("1234567x", "1234567f"); // defined: x can't be more than f + doit("1234567x", "1234567e"); // undefined: x can be more than e + + return 0; +} diff --git a/memcheck/tests/amd64/pcmpgtd.stderr.exp b/memcheck/tests/amd64/pcmpgtd.stderr.exp new file mode 100644 index 000000000..bb248c60c --- /dev/null +++ b/memcheck/tests/amd64/pcmpgtd.stderr.exp @@ -0,0 +1,44 @@ + +Conditional jump or move depends on uninitialised value(s) + at 0x........: use (pcmpgtd.c:74) + by 0x........: doit (pcmpgtd.c:110) + by 0x........: main (pcmpgtd.c:115) + +xxxxxxxx > xxxxxxxx == false +00000000 > 00000000 == false +00000000 > f0000000 == true +f0000000 > 00000000 == false +00000000 > fxxxxxxx == true +0xxxxxxx > fxxxxxxx == true +Conditional jump or move depends on uninitialised value(s) + at 0x........: use (pcmpgtd.c:74) + by 0x........: doit (pcmpgtd.c:110) + by 0x........: main (pcmpgtd.c:124) + +xxxxxxx0 > f0000000 == true +xxxxxxx1 > 80000000 == true +5xxxxxxx > 6xxxxxxx == false +8xxxxxxx > 9xxxxxxx == false +Conditional jump or move depends on uninitialised value(s) + at 0x........: use (pcmpgtd.c:74) + by 0x........: doit (pcmpgtd.c:110) + by 0x........: main (pcmpgtd.c:129) + +1234567x > 12345678 == false +1234567x > 1234567f == false +Conditional jump or move depends on uninitialised value(s) + at 0x........: use (pcmpgtd.c:74) + by 0x........: doit (pcmpgtd.c:110) + by 0x........: main (pcmpgtd.c:131) + +1234567x > 1234567e == false + +HEAP SUMMARY: + in use at exit: 0 bytes in 0 blocks + total heap usage: 0 allocs, 0 frees, 0 bytes allocated + +For a detailed leak analysis, rerun with: --leak-check=full + +Use --track-origins=yes to see where uninitialised values come from +For lists of detected and suppressed errors, rerun with: -s +ERROR SUMMARY: 4 errors from 4 contexts (suppressed: 0 from 0) diff --git a/memcheck/tests/amd64/pcmpgtd.vgtest b/memcheck/tests/amd64/pcmpgtd.vgtest new file mode 100644 index 000000000..08fabeb76 --- /dev/null +++ b/memcheck/tests/amd64/pcmpgtd.vgtest @@ -0,0 +1,2 @@ +prog: pcmpgtd -q +prereq: test -e pcmpgtd -- 2.20.1 |
|
From: Eyal S. <eya...@gm...> - 2021-03-04 21:58:00
|
Incorporating suggests from Julian: https://bugs.kde.org/show_bug.cgi?id=432801#c17 |
|
From: Eyal S. <eya...@gm...> - 2021-03-04 21:58:00
|
From: eyal0 <109...@us...> This fixes https://bugs.kde.org/show_bug.cgi?id=432801 To test: ```sh make && perl tests/vg_regtest memcheck/tests/x86/pcmpgtd ``` --- .gitignore | 1 + memcheck/mc_translate.c | 101 +++++++++++++++++- memcheck/tests/amd64/Makefile.am | 6 +- memcheck/tests/amd64/pcmpgtd.c | 134 ++++++++++++++++++++++++ memcheck/tests/amd64/pcmpgtd.stderr.exp | 44 ++++++++ memcheck/tests/amd64/pcmpgtd.vgtest | 2 + 6 files changed, 285 insertions(+), 3 deletions(-) create mode 100644 memcheck/tests/amd64/pcmpgtd.c create mode 100644 memcheck/tests/amd64/pcmpgtd.stderr.exp create mode 100644 memcheck/tests/amd64/pcmpgtd.vgtest diff --git a/.gitignore b/.gitignore index b9fca3de3..fd1cf9ae5 100644 --- a/.gitignore +++ b/.gitignore @@ -974,6 +974,7 @@ /memcheck/tests/amd64/insn-bsfl /memcheck/tests/amd64/insn-pmovmskb /memcheck/tests/amd64/insn-pcmpistri +/memcheck/tests/amd64/pcmpgtd /memcheck/tests/amd64/sh-mem-vec128 /memcheck/tests/amd64/sh-mem-vec256 /memcheck/tests/amd64/xsave-avx diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c index 516988bdd..51e94427e 100644 --- a/memcheck/mc_translate.c +++ b/memcheck/mc_translate.c @@ -1287,6 +1287,101 @@ static IRAtom* expensiveCmpEQorNE ( MCEnv* mce, return final_cast; } +/* Check if we can know, despite the uncertain bits, that xx is greater than yy. + Notice that it's xx > yy and not the other way around. This is Intel syntax + with destination first. It will appear reversed in gdb disassembly (AT&T + syntax). + */ +static IRAtom* expensiveCmpGT ( MCEnv* mce, + unsigned int word_size, + Bool is_signed, + unsigned int count, + IRAtom* vxx, IRAtom* vyy, + IRAtom* xx, IRAtom* yy ) +{ + IROp opAND, opOR, opXOR, opNOT, opEQ, opSHL, opGT; + IRType ty; + + tl_assert(isShadowAtom(mce,vxx)); + tl_assert(isShadowAtom(mce,vyy)); + tl_assert(isOriginalAtom(mce,xx)); + tl_assert(isOriginalAtom(mce,yy)); + tl_assert(sameKindedAtoms(vxx,xx)); + tl_assert(sameKindedAtoms(vyy,yy)); + + switch (word_size * count) { + case 128: + ty = Ity_V128; + opAND = Iop_AndV128; + opOR = Iop_OrV128; + opXOR = Iop_XorV128; + opNOT = Iop_NotV128; + break; + default: + VG_(tool_panic)("expensiveCmpGT"); + } + if (word_size == 32 && count == 4) { + opEQ = Iop_CmpEQ32x4; + opSHL = Iop_ShlN32x4; + if (is_signed) { + opGT = Iop_CmpGT32Sx4; + } else { + opGT = Iop_CmpGT32Ux4; + } + } else { + VG_(tool_panic)("expensiveCmpGT"); + } + IRAtom *MSBs; + if (is_signed) { + // For unsigned it's easy to make the min and max: Just set the unknown + // bits all to 0s or 1s. For signed it's harder because having a 1 in the + // MSB makes a number smaller, not larger! We can work around this by + // flipping the MSB before and after computing the min and max values. + IRAtom *const0 = mkV128(0); + IRAtom *all_ones = assignNew('V', mce, ty, binop(opEQ, const0, const0)); + MSBs = assignNew('V', mce, ty, binop(opSHL, all_ones, mkU8(31))); + xx = assignNew('V', mce, ty, binop(opXOR, xx, MSBs)); + yy = assignNew('V', mce, ty, binop(opXOR, yy, MSBs)); + // From here on out, we're dealing with MSB-flipped integers. + } + // We can combine xx and vxx to create two values: the largest that xx could + // possibly be and the smallest that xx could possibly be. Likewise, we can + // do the same for yy. We'll call those max_xx and min_xx and max_yy and + // min_yy. + IRAtom *not_vxx = assignNew('V', mce, ty, unop(opNOT, vxx)); + IRAtom *not_vyy = assignNew('V', mce, ty, unop(opNOT, vyy)); + IRAtom *max_xx = assignNew('V', mce, ty, binop(opOR, xx, vxx)); + IRAtom *min_xx = assignNew('V', mce, ty, binop(opAND, xx, not_vxx)); + IRAtom *max_yy = assignNew('V', mce, ty, binop(opOR, yy, vyy)); + IRAtom *min_yy = assignNew('V', mce, ty, binop(opAND, yy, not_vyy)); + if (is_signed) { + // Unflip the MSBs. + max_xx = assignNew('V', mce, ty, binop(opXOR, max_xx, MSBs)); + min_xx = assignNew('V', mce, ty, binop(opXOR, min_xx, MSBs)); + max_yy = assignNew('V', mce, ty, binop(opXOR, max_yy, MSBs)); + min_yy = assignNew('V', mce, ty, binop(opXOR, min_yy, MSBs)); + } + IRAtom *min_xx_gt_max_yy = assignNew('V', mce, ty, binop(opGT, min_xx, max_yy)); + IRAtom *max_xx_gt_min_yy = assignNew('V', mce, ty, binop(opGT, max_xx, min_yy)); + // If min_xx is greater than max_yy then xx is surely greater than yy so we know + // our answer for sure. If max_xx is not greater than min_yy then xx can't + // possible be greater than yy so again we know the answer for sure. For all + // other cases, we can't know. + // + // So the result is defined if: + // + // min_xx_gt_max_yy | ~max_xx_gt_min_yy + // + // Because defined in vbits is 0s and not 1s, we need to invert that: + // + // ~(min_xx_gt_max_yy | ~max_xx_gt_min_yy) + // + // We can use DeMorgan's Law to simplify the above: + // + // ~min_xx_gt_max_yy & max_xx_gt_min_yy + IRAtom *not_min_xx_gt_max_yy = assignNew('V', mce, ty, unop(opNOT, min_xx_gt_max_yy)); + return assignNew('V', mce, ty, binop(opAND, not_min_xx_gt_max_yy, max_xx_gt_min_yy)); +} /* --------- Semi-accurate interpretation of CmpORD. --------- */ @@ -3947,9 +4042,13 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, case Iop_PwExtUSMulQAdd8x16: return binary16Ix8(mce, vatom1, vatom2); - case Iop_Sub32x4: case Iop_CmpGT32Sx4: + return expensiveCmpGT(mce, 32, True /* signed */, + 4, vatom1, vatom2, atom1, atom2); case Iop_CmpGT32Ux4: + return expensiveCmpGT(mce, 32, False /* unsigned */, + 4, vatom1, vatom2, atom1, atom2); + case Iop_Sub32x4: case Iop_CmpEQ32x4: case Iop_QAdd32Sx4: case Iop_QAdd32Ux4: diff --git a/memcheck/tests/amd64/Makefile.am b/memcheck/tests/amd64/Makefile.am index da15cf797..e81ea74da 100644 --- a/memcheck/tests/amd64/Makefile.am +++ b/memcheck/tests/amd64/Makefile.am @@ -18,6 +18,7 @@ EXTRA_DIST = \ insn-pcmpistri.vgtest insn-pcmpistri.stdout.exp insn-pcmpistri.stderr.exp \ insn-pmovmskb.vgtest insn-pmovmskb.stdout.exp insn-pmovmskb.stderr.exp \ more_x87_fp.stderr.exp more_x87_fp.stdout.exp more_x87_fp.vgtest \ + pcmpgtd.stderr.exp pcmpgtd.vgtest \ sh-mem-vec128-plo-no.vgtest \ sh-mem-vec128-plo-no.stderr.exp \ sh-mem-vec128-plo-no.stdout.exp \ @@ -43,6 +44,7 @@ check_PROGRAMS = \ fxsave-amd64 \ insn-bsfl \ insn-pmovmskb \ + pcmpgtd \ sh-mem-vec128 \ sse_memory \ xor-undef-amd64 @@ -55,8 +57,8 @@ endif # clang 3.5.0 barfs about -mfancy-math-387 if !COMPILER_IS_CLANG check_PROGRAMS += \ - more_x87_fp \ - shr_edx + more_x87_fp \ + shr_edx endif AM_CFLAGS += @FLAG_M64@ diff --git a/memcheck/tests/amd64/pcmpgtd.c b/memcheck/tests/amd64/pcmpgtd.c new file mode 100644 index 000000000..891ebad35 --- /dev/null +++ b/memcheck/tests/amd64/pcmpgtd.c @@ -0,0 +1,134 @@ +/* https://bugs.kde.org/show_bug.cgi?id=432801 */ + +#include <signal.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <ctype.h> + +#include "../../memcheck.h" + +// This function fails when compiled on clang version 10 or greater with -O2. +// It's unused by the test but left here as a copy of the error in the bug +// report https://bugs.kde.org/show_bug.cgi?id=432801 +void standalone() { + struct sigaction act; + if (sigaction(SIGTERM, 0, &act) == 1) { + return; + } + if (sigaction(SIGTERM, 0, &act) == 1) { + return; + } + + char pattern[] = "\x1\x2\x3\x4\x5\x6\x7\x8\x9"; + const unsigned long plen = strlen(pattern); + pattern[1] = 0; + size_t hp=0; + for (size_t i = 0; i < plen; ++i) + hp += pattern[i]; + volatile int j = 0; + if (j == hp % 10) { + j++; + } + printf("%ld\n", hp); +} + +typedef unsigned long ULong; + +typedef struct { + ULong w64[2]; /* Note: little-endian */ +} V128; + +static int cmpGT32Sx4(V128 x, V128 y) +{ + int result; + __asm__("movups %1,%%xmm6\n" + "\tmovups %2,%%xmm7\n" + // order swapped for AT&T style which has destination second. + "\tpcmpgtd %%xmm7,%%xmm6\n" + "\tmovd %%xmm6, %0" + : "=r" (result) : "m" (x), "m" (y) : "xmm6"); + return result; +} + +/* Set the V bits on the data at "addr". Note the convention: A zero + bit means "defined"; 1 means "undefined". */ +static void set_vbits(V128 *addr, V128 vbits) +{ + int i; + for (i=0 ; i<2 ; ++i) { + (void)VALGRIND_SET_VBITS(&addr->w64[i], &vbits.w64[i], sizeof(vbits.w64[i])); + } +} + +/* Use a value that we know is invalid. */ +static void use(char *x, char* y, int invalid) +{ + /* Convince GCC it does not know what is in "invalid" so it cannot + possibly optimize away the conditional branch below. */ + __asm__ ("" : "=r" (invalid) : "0" (invalid)); + + /* Create a conditional branch on which our output depends, so that + memcheck cannot possibly optimize it away, either. */ + if (invalid) { + fprintf(stderr, "%s > %s == true\n", x, y); + } else { + fprintf(stderr, "%s > %s == false\n", x, y); + } +} + +// Convert a string like "123XXX45" to a value and vbits. +V128 string_to_v128(char *s) { + ULong x = 0; + ULong vx = 0; + + for (; *s; s++) { + int lowered_c = tolower(*s); + x <<= 4; + vx <<= 4; + if (lowered_c == 'x') { + vx |= 0xf; + } else if (isdigit(lowered_c)) { + x |= lowered_c - '0'; + } else if (lowered_c >= 'a' && lowered_c <= 'f') { + x |= lowered_c - 'a' + 0xa; + } else { + fprintf(stderr, "Not a hex digit: %c\n", *s); + exit(1); + } + } + + V128 vx128 = { { vx, 0 } }; + V128 x128 = { { x, 0 } }; + set_vbits(&x128, vx128); + return x128; +} + +static void doit(char *x, char *y) { + int result = cmpGT32Sx4(string_to_v128(x), string_to_v128(y)); + use(x, y, result); +} + +int main() { + // completely undefined + doit("xxxxxxxx", "xxxxxxxx"); + + // completely defined + doit("00000000", "00000000"); + doit("00000000", "f0000000"); + doit("f0000000", "00000000"); + + doit("00000000", "fxxxxxxx"); // defined: 0 > all negatives + doit("0xxxxxxx", "fxxxxxxx"); // defined: non-negatives > all negatives + doit("xxxxxxx0", "f0000000"); // undefined + doit("xxxxxxx1", "80000000"); // defined: ends with 1 > MIN_INT + doit("5xxxxxxx", "6xxxxxxx"); // defined + doit("8xxxxxxx", "9xxxxxxx"); // defined + + doit("1234567x", "12345678"); // undefined + doit("1234567x", "1234567f"); // defined: x can't be more than f + doit("1234567x", "1234567e"); // undefined: x can be more than e + + return 0; +} diff --git a/memcheck/tests/amd64/pcmpgtd.stderr.exp b/memcheck/tests/amd64/pcmpgtd.stderr.exp new file mode 100644 index 000000000..bb248c60c --- /dev/null +++ b/memcheck/tests/amd64/pcmpgtd.stderr.exp @@ -0,0 +1,44 @@ + +Conditional jump or move depends on uninitialised value(s) + at 0x........: use (pcmpgtd.c:74) + by 0x........: doit (pcmpgtd.c:110) + by 0x........: main (pcmpgtd.c:115) + +xxxxxxxx > xxxxxxxx == false +00000000 > 00000000 == false +00000000 > f0000000 == true +f0000000 > 00000000 == false +00000000 > fxxxxxxx == true +0xxxxxxx > fxxxxxxx == true +Conditional jump or move depends on uninitialised value(s) + at 0x........: use (pcmpgtd.c:74) + by 0x........: doit (pcmpgtd.c:110) + by 0x........: main (pcmpgtd.c:124) + +xxxxxxx0 > f0000000 == true +xxxxxxx1 > 80000000 == true +5xxxxxxx > 6xxxxxxx == false +8xxxxxxx > 9xxxxxxx == false +Conditional jump or move depends on uninitialised value(s) + at 0x........: use (pcmpgtd.c:74) + by 0x........: doit (pcmpgtd.c:110) + by 0x........: main (pcmpgtd.c:129) + +1234567x > 12345678 == false +1234567x > 1234567f == false +Conditional jump or move depends on uninitialised value(s) + at 0x........: use (pcmpgtd.c:74) + by 0x........: doit (pcmpgtd.c:110) + by 0x........: main (pcmpgtd.c:131) + +1234567x > 1234567e == false + +HEAP SUMMARY: + in use at exit: 0 bytes in 0 blocks + total heap usage: 0 allocs, 0 frees, 0 bytes allocated + +For a detailed leak analysis, rerun with: --leak-check=full + +Use --track-origins=yes to see where uninitialised values come from +For lists of detected and suppressed errors, rerun with: -s +ERROR SUMMARY: 4 errors from 4 contexts (suppressed: 0 from 0) diff --git a/memcheck/tests/amd64/pcmpgtd.vgtest b/memcheck/tests/amd64/pcmpgtd.vgtest new file mode 100644 index 000000000..08fabeb76 --- /dev/null +++ b/memcheck/tests/amd64/pcmpgtd.vgtest @@ -0,0 +1,2 @@ +prog: pcmpgtd -q +prereq: test -e pcmpgtd -- 2.20.1 |
|
From: Carl L. <ca...@so...> - 2021-03-04 19:29:58
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=3c31707cd3d795bcdd3afc540b5aad75d1e50008 commit 3c31707cd3d795bcdd3afc540b5aad75d1e50008 Author: Carl Love <ce...@us...> Date: Mon Nov 16 19:53:22 2020 -0600 VSX Mask Manipulation operation tests. Diff: --- NEWS | 11 ++ none/tests/ppc64/test_isa_3_1_RT.c | 52 ++++++++ none/tests/ppc64/test_isa_3_1_RT.stdout.exp | 184 ++++++++++++++++++++++++++- none/tests/ppc64/test_isa_3_1_VRT.c | 52 ++++++++ none/tests/ppc64/test_isa_3_1_VRT.stdout.exp | 108 +++++++++++++++- 5 files changed, 405 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index 6cb270c1c4..c8fe48f709 100644 --- a/NEWS +++ b/NEWS @@ -35,6 +35,16 @@ support for X86/macOS 10.13, AMD64/macOS 10.13 and nanoMIPS/Linux. Valgrind. The vector-packed-decimal facility is currently not exploited by the standard toolchain and libraries. +* ppc64: + + - Various bug fixes. Fix for the sync field to limit setting just two + of the two bits in the L-field. Fix the write size for the stxsibx and + stxsihx instructions. Fix the modsw and modsd instructions. + + - Partial support for ISA 3.1 has been added. Support for the VSX PCV + mask instructions, bfloat16 GER instructions, and bfloat16 to/from float + 32-bit conversion instructions are still missing. + * Valgrind now supports debuginfod, an HTTP server for distributing ELF/DWARF debugging information. When a debuginfo file cannot be found locally, Valgrind is able to query debuginfod servers for the @@ -134,6 +144,7 @@ where XXXXXX is the bug number as listed below. 428716 cppcheck detects potential leak in VEX/useful/smchash.c 428909 helgrind: need to intercept duplicate libc definitions for Fedora 33 429352 PPC ISA 3.1 support is missing, part 7 +429354 PPC ISA 3.1 support is missing, part 8 429692 unhandled ppc64le-linux syscall: 147 (getsid) 429864 s390x: C++ atomic test_and_set yields false-positive memcheck diagnostics diff --git a/none/tests/ppc64/test_isa_3_1_RT.c b/none/tests/ppc64/test_isa_3_1_RT.c index ce70c7ac0f..9e2e352f89 100644 --- a/none/tests/ppc64/test_isa_3_1_RT.c +++ b/none/tests/ppc64/test_isa_3_1_RT.c @@ -689,6 +689,45 @@ static void test_pstq_off32 (void) { static void test_pstq_off64 (void) { __asm__ __volatile__ ("pstq 24, 64(%0), 0" :: "r" (ra) ); } +static void test_vcntmbb_0 (void) { + __asm__ __volatile__ ("vcntmbb %0, %1, 0" : "=r" (rt) : "v" (vrb) ); +} +static void test_vcntmbb_1 (void) { + __asm__ __volatile__ ("vcntmbb %0, %1, 1" : "=r" (rt) : "v" (vrb) ); +} +static void test_vcntmbh_0 (void) { + __asm__ __volatile__ ("vcntmbh %0, %1, 0" : "=r" (rt) : "v" (vrb) ); +} +static void test_vcntmbh_1 (void) { + __asm__ __volatile__ ("vcntmbh %0, %1, 1" : "=r" (rt) : "v" (vrb) ); +} +static void test_vcntmbw_0 (void) { + __asm__ __volatile__ ("vcntmbw %0, %1, 0" : "=r" (rt) : "v" (vrb) ); +} +static void test_vcntmbw_1 (void) { + __asm__ __volatile__ ("vcntmbw %0, %1, 1" : "=r" (rt) : "v" (vrb) ); +} +static void test_vcntmbd_0 (void) { + __asm__ __volatile__ ("vcntmbd %0, %1, 0" : "=r" (rt) : "v" (vrb) ); +} +static void test_vcntmbd_1 (void) { + __asm__ __volatile__ ("vcntmbd %0, %1, 1" : "=r" (rt) : "v" (vrb) ); +} +static void test_vextractbm (void) { + __asm__ __volatile__ ("vextractbm %0, %1 " : "=r" (rt) : "v" (vrb) ); +} +static void test_vextracthm (void) { + __asm__ __volatile__ ("vextracthm %0, %1 " : "=r" (rt) : "v" (vrb) ); +} +static void test_vextractwm (void) { + __asm__ __volatile__ ("vextractwm %0, %1 " : "=r" (rt) : "v" (vrb) ); +} +static void test_vextractdm (void) { + __asm__ __volatile__ ("vextractdm %0, %1 " : "=r" (rt) : "v" (vrb) ); +} +static void test_vextractqm (void) { + __asm__ __volatile__ ("vextractqm %0, %1 " : "=r" (rt) : "v" (vrb) ); +} static test_list_t testgroup_generic[] = { { &test_brd, "brd", "RA,RS"}, /* bcs */ @@ -845,6 +884,19 @@ static test_list_t testgroup_generic[] = { { &test_setnbc_31_cr1s, "setnbc 31_cr1s", "RT,BI"}, /* bcwp */ { &test_setnbc_31_creb, "setnbc 31_creb", "RT,BI"}, /* bcwp */ { &test_setnbc_31_crob, "setnbc 31_crob", "RT,BI"}, /* bcwp */ + { &test_vcntmbb_0, "vcntmbb 0", "RT,VRB,MP"}, /* bcwp */ + { &test_vcntmbb_1, "vcntmbb 1", "RT,VRB,MP"}, /* bcwp */ + { &test_vcntmbd_0, "vcntmbd 0", "RT,VRB,MP"}, /* bcwp */ + { &test_vcntmbd_1, "vcntmbd 1", "RT,VRB,MP"}, /* bcwp */ + { &test_vcntmbh_0, "vcntmbh 0", "RT,VRB,MP"}, /* bcwp */ + { &test_vcntmbh_1, "vcntmbh 1", "RT,VRB,MP"}, /* bcwp */ + { &test_vcntmbw_0, "vcntmbw 0", "RT,VRB,MP"}, /* bcwp */ + { &test_vcntmbw_1, "vcntmbw 1", "RT,VRB,MP"}, /* bcwp */ + { &test_vextractbm, "vextractbm", "RT,VRB"}, /* bcs */ + { &test_vextractdm, "vextractdm", "RT,VRB"}, /* bcs */ + { &test_vextracthm, "vextracthm", "RT,VRB"}, /* bcs */ + { &test_vextractqm, "vextractqm", "RT,VRB"}, /* bcs */ + { &test_vextractwm, "vextractwm", "RT,VRB"}, /* bcs */ { &test_vgnb_2, "vgnb 2", "RT,VRB,N"}, /* bcwp */ { &test_vgnb_3, "vgnb 3", "RT,VRB,N"}, /* bcwp */ { &test_vgnb_4, "vgnb 4", "RT,VRB,N"}, /* bcwp */ diff --git a/none/tests/ppc64/test_isa_3_1_RT.stdout.exp b/none/tests/ppc64/test_isa_3_1_RT.stdout.exp index 7cacb36440..132ae31196 100644 --- a/none/tests/ppc64/test_isa_3_1_RT.stdout.exp +++ b/none/tests/ppc64/test_isa_3_1_RT.stdout.exp @@ -521,6 +521,188 @@ setnbc 31_creb => [aaaaaaaa] 0 setnbc 31_crob => [55555555] ffffffffffffffff +vcntmbb 0 7f800000ff800000,ff8000007f800000 => a00000000000000 +vcntmbb 0 ff8000007f800000,ff7ffffe7f7ffffe => 800000000000000 +vcntmbb 0 ff7ffffe7f7ffffe,0080000e8080000e => 800000000000000 +vcntmbb 0 0080000e8080000e,0180055e0180077e => b00000000000000 +vcntmbb 0 0180055e0180077e,0000111e8000222e => d00000000000000 +vcntmbb 0 0000111e8000222e,7ff0000000000000 => e00000000000000 +vcntmbb 0 7ff0000000000000,fff0000000000000 => d00000000000000 +vcntmbb 0 fff0000000000000,2208400000000000 => e00000000000000 +vcntmbb 0 2208400000000000,0000000000000009 => 1000000000000000 +vcntmbb 0 0000000000000009,ffff000180000001 => d00000000000000 +vcntmbb 0 ffff000180000001,0000000000000000 => d00000000000000 +vcntmbb 0 0000000000000000,8000000000000000 => f00000000000000 +vcntmbb 0 8000000000000000,7f800000ff800000 => c00000000000000 + +vcntmbb 1 7f800000ff800000,ff8000007f800000 => 600000000000000 +vcntmbb 1 ff8000007f800000,ff7ffffe7f7ffffe => 800000000000000 +vcntmbb 1 ff7ffffe7f7ffffe,0080000e8080000e => 800000000000000 +vcntmbb 1 0080000e8080000e,0180055e0180077e => 500000000000000 +vcntmbb 1 0180055e0180077e,0000111e8000222e => 300000000000000 +vcntmbb 1 0000111e8000222e,7ff0000000000000 => 200000000000000 +vcntmbb 1 7ff0000000000000,fff0000000000000 => 300000000000000 +vcntmbb 1 fff0000000000000,2208400000000000 => 200000000000000 +vcntmbb 1 2208400000000000,0000000000000009 => 0 +vcntmbb 1 0000000000000009,ffff000180000001 => 300000000000000 +vcntmbb 1 ffff000180000001,0000000000000000 => 300000000000000 +vcntmbb 1 0000000000000000,8000000000000000 => 100000000000000 +vcntmbb 1 8000000000000000,7f800000ff800000 => 400000000000000 + +vcntmbd 0 7f800000ff800000,ff8000007f800000 => 800000000000000 +vcntmbd 0 ff8000007f800000,ff7ffffe7f7ffffe => 0 +vcntmbd 0 ff7ffffe7f7ffffe,0080000e8080000e => 800000000000000 +vcntmbd 0 0080000e8080000e,0180055e0180077e => 1000000000000000 +vcntmbd 0 0180055e0180077e,0000111e8000222e => 1000000000000000 +vcntmbd 0 0000111e8000222e,7ff0000000000000 => 1000000000000000 +vcntmbd 0 7ff0000000000000,fff0000000000000 => 800000000000000 +vcntmbd 0 fff0000000000000,2208400000000000 => 800000000000000 +vcntmbd 0 2208400000000000,0000000000000009 => 1000000000000000 +vcntmbd 0 0000000000000009,ffff000180000001 => 800000000000000 +vcntmbd 0 ffff000180000001,0000000000000000 => 800000000000000 +vcntmbd 0 0000000000000000,8000000000000000 => 800000000000000 +vcntmbd 0 8000000000000000,7f800000ff800000 => 800000000000000 + +vcntmbd 1 7f800000ff800000,ff8000007f800000 => 800000000000000 +vcntmbd 1 ff8000007f800000,ff7ffffe7f7ffffe => 1000000000000000 +vcntmbd 1 ff7ffffe7f7ffffe,0080000e8080000e => 800000000000000 +vcntmbd 1 0080000e8080000e,0180055e0180077e => 0 +vcntmbd 1 0180055e0180077e,0000111e8000222e => 0 +vcntmbd 1 0000111e8000222e,7ff0000000000000 => 0 +vcntmbd 1 7ff0000000000000,fff0000000000000 => 800000000000000 +vcntmbd 1 fff0000000000000,2208400000000000 => 800000000000000 +vcntmbd 1 2208400000000000,0000000000000009 => 0 +vcntmbd 1 0000000000000009,ffff000180000001 => 800000000000000 +vcntmbd 1 ffff000180000001,0000000000000000 => 800000000000000 +vcntmbd 1 0000000000000000,8000000000000000 => 800000000000000 +vcntmbd 1 8000000000000000,7f800000ff800000 => 800000000000000 + +vcntmbh 0 7f800000ff800000,ff8000007f800000 => c00000000000000 +vcntmbh 0 ff8000007f800000,ff7ffffe7f7ffffe => 800000000000000 +vcntmbh 0 ff7ffffe7f7ffffe,0080000e8080000e => 800000000000000 +vcntmbh 0 0080000e8080000e,0180055e0180077e => e00000000000000 +vcntmbh 0 0180055e0180077e,0000111e8000222e => e00000000000000 +vcntmbh 0 0000111e8000222e,7ff0000000000000 => e00000000000000 +vcntmbh 0 7ff0000000000000,fff0000000000000 => e00000000000000 +vcntmbh 0 fff0000000000000,2208400000000000 => e00000000000000 +vcntmbh 0 2208400000000000,0000000000000009 => 1000000000000000 +vcntmbh 0 0000000000000009,ffff000180000001 => c00000000000000 +vcntmbh 0 ffff000180000001,0000000000000000 => c00000000000000 +vcntmbh 0 0000000000000000,8000000000000000 => e00000000000000 +vcntmbh 0 8000000000000000,7f800000ff800000 => c00000000000000 + +vcntmbh 1 7f800000ff800000,ff8000007f800000 => 400000000000000 +vcntmbh 1 ff8000007f800000,ff7ffffe7f7ffffe => 800000000000000 +vcntmbh 1 ff7ffffe7f7ffffe,0080000e8080000e => 800000000000000 +vcntmbh 1 0080000e8080000e,0180055e0180077e => 200000000000000 +vcntmbh 1 0180055e0180077e,0000111e8000222e => 200000000000000 +vcntmbh 1 0000111e8000222e,7ff0000000000000 => 200000000000000 +vcntmbh 1 7ff0000000000000,fff0000000000000 => 200000000000000 +vcntmbh 1 fff0000000000000,2208400000000000 => 200000000000000 +vcntmbh 1 2208400000000000,0000000000000009 => 0 +vcntmbh 1 0000000000000009,ffff000180000001 => 400000000000000 +vcntmbh 1 ffff000180000001,0000000000000000 => 400000000000000 +vcntmbh 1 0000000000000000,8000000000000000 => 200000000000000 +vcntmbh 1 8000000000000000,7f800000ff800000 => 400000000000000 + +vcntmbw 0 7f800000ff800000,ff8000007f800000 => 800000000000000 +vcntmbw 0 ff8000007f800000,ff7ffffe7f7ffffe => 800000000000000 +vcntmbw 0 ff7ffffe7f7ffffe,0080000e8080000e => 800000000000000 +vcntmbw 0 0080000e8080000e,0180055e0180077e => c00000000000000 +vcntmbw 0 0180055e0180077e,0000111e8000222e => c00000000000000 +vcntmbw 0 0000111e8000222e,7ff0000000000000 => c00000000000000 +vcntmbw 0 7ff0000000000000,fff0000000000000 => c00000000000000 +vcntmbw 0 fff0000000000000,2208400000000000 => c00000000000000 +vcntmbw 0 2208400000000000,0000000000000009 => 1000000000000000 +vcntmbw 0 0000000000000009,ffff000180000001 => 800000000000000 +vcntmbw 0 ffff000180000001,0000000000000000 => 800000000000000 +vcntmbw 0 0000000000000000,8000000000000000 => c00000000000000 +vcntmbw 0 8000000000000000,7f800000ff800000 => 800000000000000 + +vcntmbw 1 7f800000ff800000,ff8000007f800000 => 800000000000000 +vcntmbw 1 ff8000007f800000,ff7ffffe7f7ffffe => 800000000000000 +vcntmbw 1 ff7ffffe7f7ffffe,0080000e8080000e => 800000000000000 +vcntmbw 1 0080000e8080000e,0180055e0180077e => 400000000000000 +vcntmbw 1 0180055e0180077e,0000111e8000222e => 400000000000000 +vcntmbw 1 0000111e8000222e,7ff0000000000000 => 400000000000000 +vcntmbw 1 7ff0000000000000,fff0000000000000 => 400000000000000 +vcntmbw 1 fff0000000000000,2208400000000000 => 400000000000000 +vcntmbw 1 2208400000000000,0000000000000009 => 0 +vcntmbw 1 0000000000000009,ffff000180000001 => 800000000000000 +vcntmbw 1 ffff000180000001,0000000000000000 => 800000000000000 +vcntmbw 1 0000000000000000,8000000000000000 => 400000000000000 +vcntmbw 1 8000000000000000,7f800000ff800000 => 800000000000000 + +vextractbm 7f800000ff800000,ff8000007f800000 => c44c +vextractbm ff8000007f800000,ff7ffffe7f7ffffe => b3c4 +vextractbm ff7ffffe7f7ffffe,0080000e8080000e => 4cb3 +vextractbm 0080000e8080000e,0180055e0180077e => 444c +vextractbm 0180055e0180077e,0000111e8000222e => 844 +vextractbm 0000111e8000222e,7ff0000000000000 => 4008 +vextractbm 7ff0000000000000,fff0000000000000 => c040 +vextractbm fff0000000000000,2208400000000000 => c0 +vextractbm 2208400000000000,0000000000000009 => 0 +vextractbm 0000000000000009,ffff000180000001 => c800 +vextractbm ffff000180000001,0000000000000000 => c8 +vextractbm 0000000000000000,8000000000000000 => 8000 +vextractbm 8000000000000000,7f800000ff800000 => 4c80 + +vextractdm 7f800000ff800000,ff8000007f800000 => 2 +vextractdm ff8000007f800000,ff7ffffe7f7ffffe => 3 +vextractdm ff7ffffe7f7ffffe,0080000e8080000e => 1 +vextractdm 0080000e8080000e,0180055e0180077e => 0 +vextractdm 0180055e0180077e,0000111e8000222e => 0 +vextractdm 0000111e8000222e,7ff0000000000000 => 0 +vextractdm 7ff0000000000000,fff0000000000000 => 2 +vextractdm fff0000000000000,2208400000000000 => 1 +vextractdm 2208400000000000,0000000000000009 => 0 +vextractdm 0000000000000009,ffff000180000001 => 2 +vextractdm ffff000180000001,0000000000000000 => 1 +vextractdm 0000000000000000,8000000000000000 => 2 +vextractdm 8000000000000000,7f800000ff800000 => 1 + +vextracthm 7f800000ff800000,ff8000007f800000 => 82 +vextracthm ff8000007f800000,ff7ffffe7f7ffffe => d8 +vextracthm ff7ffffe7f7ffffe,0080000e8080000e => 2d +vextracthm 0080000e8080000e,0180055e0180077e => 2 +vextracthm 0180055e0180077e,0000111e8000222e => 20 +vextracthm 0000111e8000222e,7ff0000000000000 => 2 +vextracthm 7ff0000000000000,fff0000000000000 => 80 +vextracthm fff0000000000000,2208400000000000 => 8 +vextracthm 2208400000000000,0000000000000009 => 0 +vextracthm 0000000000000009,ffff000180000001 => a0 +vextracthm ffff000180000001,0000000000000000 => a +vextracthm 0000000000000000,8000000000000000 => 80 +vextracthm 8000000000000000,7f800000ff800000 => 28 + +vextractqm 7f800000ff800000,ff8000007f800000 => 1 +vextractqm ff8000007f800000,ff7ffffe7f7ffffe => 1 +vextractqm ff7ffffe7f7ffffe,0080000e8080000e => 0 +vextractqm 0080000e8080000e,0180055e0180077e => 0 +vextractqm 0180055e0180077e,0000111e8000222e => 0 +vextractqm 0000111e8000222e,7ff0000000000000 => 0 +vextractqm 7ff0000000000000,fff0000000000000 => 1 +vextractqm fff0000000000000,2208400000000000 => 0 +vextractqm 2208400000000000,0000000000000009 => 0 +vextractqm 0000000000000009,ffff000180000001 => 1 +vextractqm ffff000180000001,0000000000000000 => 0 +vextractqm 0000000000000000,8000000000000000 => 1 +vextractqm 8000000000000000,7f800000ff800000 => 0 + +vextractwm 7f800000ff800000,ff8000007f800000 => 9 +vextractwm ff8000007f800000,ff7ffffe7f7ffffe => a +vextractwm ff7ffffe7f7ffffe,0080000e8080000e => 6 +vextractwm 0080000e8080000e,0180055e0180077e => 1 +vextractwm 0180055e0180077e,0000111e8000222e => 4 +vextractwm 0000111e8000222e,7ff0000000000000 => 1 +vextractwm 7ff0000000000000,fff0000000000000 => 8 +vextractwm fff0000000000000,2208400000000000 => 2 +vextractwm 2208400000000000,0000000000000009 => 0 +vextractwm 0000000000000009,ffff000180000001 => c +vextractwm ffff000180000001,0000000000000000 => 3 +vextractwm 0000000000000000,8000000000000000 => 8 +vextractwm 8000000000000000,7f800000ff800000 => 6 + vgnb 2 7f800000ff800000,ff8000007f800000 => f80078007800f800 vgnb 2 ff8000007f800000,ff7ffffe7f7ffffe => f7ff77fff8007800 vgnb 2 ff7ffffe7f7ffffe,0080000e8080000e => 8038803f7ff77ff @@ -605,4 +787,4 @@ vgnb 7 ffff000180000001,0000000000000000 => 30000000000000 vgnb 7 0000000000000000,8000000000000000 => 8000000000000000 vgnb 7 8000000000000000,7f800000ff800000 => 4400000000000000 -All done. Tested 160 different instruction groups +All done. Tested 173 different instruction groups diff --git a/none/tests/ppc64/test_isa_3_1_VRT.c b/none/tests/ppc64/test_isa_3_1_VRT.c index f5f5536d8e..fb5d3d10f1 100644 --- a/none/tests/ppc64/test_isa_3_1_VRT.c +++ b/none/tests/ppc64/test_isa_3_1_VRT.c @@ -435,6 +435,45 @@ static void test_xsmincqp (void) { __asm__ __volatile__ ("xsmincqp %0, %1, %2" : "=v" (vrt) : "v" (vra), "v" (vrb) ); } +static void test_mtvsrbm (void) { + __asm__ __volatile__ ("mtvsrbm %0, %1" : "=v" (vrt) : "r" (rb) ); +} +static void test_mtvsrhm (void) { + __asm__ __volatile__ ("mtvsrhm %0, %1" : "=v" (vrt) : "r" (rb) ); +} +static void test_mtvsrwm (void) { + __asm__ __volatile__ ("mtvsrwm %0, %1" : "=v" (vrt) : "r" (rb) ); +} +static void test_mtvsrdm (void) { + __asm__ __volatile__ ("mtvsrdm %0, %1" : "=v" (vrt) : "r" (rb) ); +} +static void test_mtvsrqm (void) { + __asm__ __volatile__ ("mtvsrqm %0, %1" : "=v" (vrt) : "r" (rb) ); +} +static void test_mtvsrbmi_0 (void) { + __asm__ __volatile__ ("mtvsrbmi %0, 0" : "=v" (vrt) ); +} +static void test_mtvsrbmi_3 (void) { + __asm__ __volatile__ ("mtvsrbmi %0, 3" : "=v" (vrt) ); +} +static void test_mtvsrbmi_7 (void) { + __asm__ __volatile__ ("mtvsrbmi %0, 7" : "=v" (vrt) ); +} +static void test_vexpandbm (void) { + __asm__ __volatile__ ("vexpandbm %0, %1 " : "=v" (vrt) : "v" (vrb) ); +} +static void test_vexpandhm (void) { + __asm__ __volatile__ ("vexpandhm %0, %1 " : "=v" (vrt) : "v" (vrb) ); +} +static void test_vexpandwm (void) { + __asm__ __volatile__ ("vexpandwm %0, %1 " : "=v" (vrt) : "v" (vrb) ); +} +static void test_vexpanddm (void) { + __asm__ __volatile__ ("vexpanddm %0, %1 " : "=v" (vrt) : "v" (vrb) ); +} +static void test_vexpandqm (void) { + __asm__ __volatile__ ("vexpandqm %0, %1 " : "=v" (vrt) : "v" (vrb) ); +} static test_list_t testgroup_generic[] = { { &test_dcffixqq, "dcffixqq", "FRTp,VRB"}, /* bcs */ @@ -446,6 +485,14 @@ static test_list_t testgroup_generic[] = { { &test_dotted_vstribr, "vstribr.", "VRT,VRB"}, /* bcs */ { &test_dotted_vstrihl, "vstrihl.", "VRT,VRB"}, /* bcs */ { &test_dotted_vstrihr, "vstrihr.", "VRT,VRB"}, /* bcs */ + { &test_mtvsrbmi_0, "mtvsrbmi 0", "VRT,bm"}, /* bcwp */ + { &test_mtvsrbmi_3, "mtvsrbmi 3", "VRT,bm"}, /* bcwp */ + { &test_mtvsrbmi_7, "mtvsrbmi 7", "VRT,bm"}, /* bcwp */ + { &test_mtvsrbm, "mtvsrbm", "VRT,RB"}, /* bcs */ + { &test_mtvsrdm, "mtvsrdm", "VRT,RB"}, /* bcs */ + { &test_mtvsrhm, "mtvsrhm", "VRT,RB"}, /* bcs */ + { &test_mtvsrqm, "mtvsrqm", "VRT,RB"}, /* bcs */ + { &test_mtvsrwm, "mtvsrwm", "VRT,RB"}, /* bcs */ { &test_vcfuged, "vcfuged", "VRT,VRA,VRB"}, /* bcs */ { &test_vclrlb, "vclrlb", "VRT,VRA,RB"}, /* bcs */ { &test_vclrrb, "vclrrb", "VRT,VRA,RB"}, /* bcs */ @@ -466,6 +513,11 @@ static test_list_t testgroup_generic[] = { { &test_vdivud, "vdivud", "VRT,VRA,VRB"}, /* bcs */ { &test_vdivuq, "vdivuq", "VRT,VRA,VRB"}, /* bcs */ { &test_vdivuw, "vdivuw", "VRT,VRA,VRB"}, /* bcs */ + { &test_vexpandbm, "vexpandbm", "VRT,VRB"}, /* bcs */ + { &test_vexpanddm, "vexpanddm", "VRT,VRB"}, /* bcs */ + { &test_vexpandhm, "vexpandhm", "VRT,VRB"}, /* bcs */ + { &test_vexpandqm, "vexpandqm", "VRT,VRB"}, /* bcs */ + { &test_vexpandwm, "vexpandwm", "VRT,VRB"}, /* bcs */ { &test_vextddvlx, "vextddvlx", "VRT,VRA,VRB,RC"}, /* bcs */ { &test_vextddvrx, "vextddvrx", "VRT,VRA,VRB,RC"}, /* bcs */ { &test_vextdubvlx, "vextdubvlx", "VRT,VRA,VRB,RC"}, /* bcs */ diff --git a/none/tests/ppc64/test_isa_3_1_VRT.stdout.exp b/none/tests/ppc64/test_isa_3_1_VRT.stdout.exp index 70e8feeb1f..39c4d86197 100644 --- a/none/tests/ppc64/test_isa_3_1_VRT.stdout.exp +++ b/none/tests/ppc64/test_isa_3_1_VRT.stdout.exp @@ -586,6 +586,42 @@ vstrihr. ffff000180000001,0000000000000000 => [00000020]6:[0010] vstrihr. 0000000000000000,8000000000000000 => [00000020]6:[0010] 0,0000000000000000 vstrihr. 8000000000000000,7f800000ff800000 => [00000020]6:[0010] 0,0000000000000000 +mtvsrbmi 0 => 0,0000000000000000 + +mtvsrbmi 3 => 0,000000000000ffff + +mtvsrbmi 7 => 0,0000000000ffffff + +mtvsrbm 0 => 0,0000000000000000 +mtvsrbm ffffffff => ffffffffffffffff,ffffffffffffffff +mtvsrbm ffffffff55555555 => ff00ff00ff00ff,00ff00ff00ff00ff +mtvsrbm 5555aaaaaaaa5555 => ff00ff00ff00ff,00ff00ff00ff00ff +mtvsrbm aaaa00000000aaaa => ff00ff00ff00ff00,ff00ff00ff00ff00 + +mtvsrdm 0 => 0,0000000000000000 +mtvsrdm ffffffff => ffffffffffffffff,ffffffffffffffff +mtvsrdm ffffffff55555555 => 0,ffffffffffffffff +mtvsrdm 5555aaaaaaaa5555 => 0,ffffffffffffffff +mtvsrdm aaaa00000000aaaa => ffffffffffffffff,0000000000000000 + +mtvsrhm 0 => 0,0000000000000000 +mtvsrhm ffffffff => ffffffffffffffff,ffffffffffffffff +mtvsrhm ffffffff55555555 => ffff0000ffff,0000ffff0000ffff +mtvsrhm 5555aaaaaaaa5555 => ffff0000ffff,0000ffff0000ffff +mtvsrhm aaaa00000000aaaa => ffff0000ffff0000,ffff0000ffff0000 + +mtvsrqm 0 => 0,0000000000000000 +mtvsrqm ffffffff => ffffffffffffffff,ffffffffffffffff +mtvsrqm ffffffff55555555 => ffffffffffffffff,ffffffffffffffff +mtvsrqm 5555aaaaaaaa5555 => ffffffffffffffff,ffffffffffffffff +mtvsrqm aaaa00000000aaaa => 0,0000000000000000 + +mtvsrwm 0 => 0,0000000000000000 +mtvsrwm ffffffff => ffffffffffffffff,ffffffffffffffff +mtvsrwm ffffffff55555555 => ffffffff,00000000ffffffff +mtvsrwm 5555aaaaaaaa5555 => ffffffff,00000000ffffffff +mtvsrwm aaaa00000000aaaa => ffffffff00000000,ffffffff00000000 + vcfuged 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => 1ffff,000000000001ffff vcfuged 7f800000ff800000,ff8000007f800000 ff8000007f800000,ff7ffffe7f7ffffe => 97f800001fc00000,000001000000ffff vcfuged 7f800000ff800000,ff8000007f800000 ff7ffffe7f7ffffe,0080000e8080000e => ff00000fe0000108,b3f800001fc00000 @@ -2508,6 +2544,76 @@ vdivuw 7f800000ff800000,ff8000007f800000 ff7ffffe7f7ffffe,0080000e8080000e => vdivuw 7f800000ff800000,ff8000007f800000 0080000e8080000e,0180055e0180077e => aa00000054,000000fe00000001 vdivuw 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => eed3000000000,00000054000000aa +vexpandbm 7f800000ff800000,ff8000007f800000 => ffff000000ff0000,00ff0000ffff0000 +vexpandbm ff8000007f800000,ff7ffffe7f7ffffe => ff00ffff0000ffff,ffff000000ff0000 +vexpandbm ff7ffffe7f7ffffe,0080000e8080000e => ff0000ffff0000,ff00ffff0000ffff +vexpandbm 0080000e8080000e,0180055e0180077e => ff000000ff0000,00ff0000ffff0000 +vexpandbm 0180055e0180077e,0000111e8000222e => ff000000,00ff000000ff0000 +vexpandbm 0000111e8000222e,7ff0000000000000 => ff000000000000,00000000ff000000 +vexpandbm 7ff0000000000000,fff0000000000000 => ffff000000000000,00ff000000000000 +vexpandbm fff0000000000000,2208400000000000 => 0,ffff000000000000 +vexpandbm 2208400000000000,0000000000000009 => 0,0000000000000000 +vexpandbm 0000000000000009,ffff000180000001 => ffff0000ff000000,0000000000000000 +vexpandbm ffff000180000001,0000000000000000 => 0,ffff0000ff000000 +vexpandbm 0000000000000000,8000000000000000 => ff00000000000000,0000000000000000 +vexpandbm 8000000000000000,7f800000ff800000 => ff0000ffff0000,ff00000000000000 + +vexpanddm 7f800000ff800000,ff8000007f800000 => ffffffffffffffff,0000000000000000 +vexpanddm ff8000007f800000,ff7ffffe7f7ffffe => ffffffffffffffff,ffffffffffffffff +vexpanddm ff7ffffe7f7ffffe,0080000e8080000e => 0,ffffffffffffffff +vexpanddm 0080000e8080000e,0180055e0180077e => 0,0000000000000000 +vexpanddm 0180055e0180077e,0000111e8000222e => 0,0000000000000000 +vexpanddm 0000111e8000222e,7ff0000000000000 => 0,0000000000000000 +vexpanddm 7ff0000000000000,fff0000000000000 => ffffffffffffffff,0000000000000000 +vexpanddm fff0000000000000,2208400000000000 => 0,ffffffffffffffff +vexpanddm 2208400000000000,0000000000000009 => 0,0000000000000000 +vexpanddm 0000000000000009,ffff000180000001 => ffffffffffffffff,0000000000000000 +vexpanddm ffff000180000001,0000000000000000 => 0,ffffffffffffffff +vexpanddm 0000000000000000,8000000000000000 => ffffffffffffffff,0000000000000000 +vexpanddm 8000000000000000,7f800000ff800000 => 0,ffffffffffffffff + +vexpandhm 7f800000ff800000,ff8000007f800000 => ffff000000000000,00000000ffff0000 +vexpandhm ff8000007f800000,ff7ffffe7f7ffffe => ffffffff0000ffff,ffff000000000000 +vexpandhm ff7ffffe7f7ffffe,0080000e8080000e => ffff0000,ffffffff0000ffff +vexpandhm 0080000e8080000e,0180055e0180077e => 0,00000000ffff0000 +vexpandhm 0180055e0180077e,0000111e8000222e => ffff0000,0000000000000000 +vexpandhm 0000111e8000222e,7ff0000000000000 => 0,00000000ffff0000 +vexpandhm 7ff0000000000000,fff0000000000000 => ffff000000000000,0000000000000000 +vexpandhm fff0000000000000,2208400000000000 => 0,ffff000000000000 +vexpandhm 2208400000000000,0000000000000009 => 0,0000000000000000 +vexpandhm 0000000000000009,ffff000180000001 => ffff0000ffff0000,0000000000000000 +vexpandhm ffff000180000001,0000000000000000 => 0,ffff0000ffff0000 +vexpandhm 0000000000000000,8000000000000000 => ffff000000000000,0000000000000000 +vexpandhm 8000000000000000,7f800000ff800000 => ffff0000,ffff000000000000 + +vexpandqm 7f800000ff800000,ff8000007f800000 => ffffffffffffffff,ffffffffffffffff +vexpandqm ff8000007f800000,ff7ffffe7f7ffffe => ffffffffffffffff,ffffffffffffffff +vexpandqm ff7ffffe7f7ffffe,0080000e8080000e => 0,0000000000000000 +vexpandqm 0080000e8080000e,0180055e0180077e => 0,0000000000000000 +vexpandqm 0180055e0180077e,0000111e8000222e => 0,0000000000000000 +vexpandqm 0000111e8000222e,7ff0000000000000 => 0,0000000000000000 +vexpandqm 7ff0000000000000,fff0000000000000 => ffffffffffffffff,ffffffffffffffff +vexpandqm fff0000000000000,2208400000000000 => 0,0000000000000000 +vexpandqm 2208400000000000,0000000000000009 => 0,0000000000000000 +vexpandqm 0000000000000009,ffff000180000001 => ffffffffffffffff,ffffffffffffffff +vexpandqm ffff000180000001,0000000000000000 => 0,0000000000000000 +vexpandqm 0000000000000000,8000000000000000 => ffffffffffffffff,ffffffffffffffff +vexpandqm 8000000000000000,7f800000ff800000 => 0,0000000000000000 + +vexpandwm 7f800000ff800000,ff8000007f800000 => ffffffff00000000,00000000ffffffff +vexpandwm ff8000007f800000,ff7ffffe7f7ffffe => ffffffff00000000,ffffffff00000000 +vexpandwm ff7ffffe7f7ffffe,0080000e8080000e => ffffffff,ffffffff00000000 +vexpandwm 0080000e8080000e,0180055e0180077e => 0,00000000ffffffff +vexpandwm 0180055e0180077e,0000111e8000222e => ffffffff,0000000000000000 +vexpandwm 0000111e8000222e,7ff0000000000000 => 0,00000000ffffffff +vexpandwm 7ff0000000000000,fff0000000000000 => ffffffff00000000,0000000000000000 +vexpandwm fff0000000000000,2208400000000000 => 0,ffffffff00000000 +vexpandwm 2208400000000000,0000000000000009 => 0,0000000000000000 +vexpandwm 0000000000000009,ffff000180000001 => ffffffffffffffff,0000000000000000 +vexpandwm ffff000180000001,0000000000000000 => 0,ffffffffffffffff +vexpandwm 0000000000000000,8000000000000000 => ffffffff00000000,0000000000000000 +vexpandwm 8000000000000000,7f800000ff800000 => ffffffff,ffffffff00000000 + vextddvlx 0 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ff8000007f800000,0000000000000000 vextddvlx 4 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => 7f8000007f800000,0000000000000000 vextddvlx 8 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => 7f800000ff800000,0000000000000000 @@ -12013,4 +12119,4 @@ xsmincqp 8000000000000000,7f800000ff800000 ffff000180000001,0000000000000000 => xsmincqp 8000000000000000,7f800000ff800000 0000000000000000,8000000000000000 => 8000000000000000,0000000000000000 xsmincqp 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => 7f800000ff800000,8000000000000000 -All done. Tested 97 different instruction groups +All done. Tested 110 different instruction groups |
|
From: Carl L. <ca...@so...> - 2021-03-04 19:29:46
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=82777ee4080a339fb16d970411b743598b00590c commit 82777ee4080a339fb16d970411b743598b00590c Author: Carl Love <ce...@us...> Date: Fri May 1 23:49:33 2020 -0500 ISA 3.1 VSX Mask Manipulation Operations Add support for: mtvsrbmMove to VSR Byte Mask mtvsrbmiMove To VSR Byte Mask Immediate mtvsrdmMove to VSR Doubleword Mask mtvsrhmMove to VSR Halfword Mask mtvsrqmMove to VSR Quadword Mask mtvsrwmMove to VSR Word Mask vcntmbbVector Count Mask Bits Byte vcntmbdVector Count Mask Bits Doubleword vcntmbhVector Count Mask Bits Halfword vcntmbwVector Count Mask Bits Word vexpandbmVector Expand Byte Mask vexpanddmVector Expand Doubleword Mask vexpandhmVector Expand Halfword Mask vexpandqmVector Expand Quadword Mask vexpandwmVector Expand Word Mask vextractbmVector Extract Byte Mask vextractdmVector Extract Doubleword Mask vextracthmVector Extract Halfword Mask vextractqmVector Extract Quadword Mask vextractwmVector Extract Word Mask Re-implemented the copy_MSB_bit_fields() function. It can be done similarly to the implementation of the vgnb instruction leveraging the clean helpers used for the vgnb instruction. Reimplemented the vexpandXm instructions eliminating the call to copy_MSB_bit_fileds() and the need for the for(i = 0; i< max; i++) loop. Reimplemented the mtvsrXm instructions to remove the need for the for(i = 0; i< max; i++) loop. The computations for vexpandXm and mtvsrXm instructions can be done much more efficiently. Diff: --- VEX/priv/guest_ppc_toIR.c | 614 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 614 insertions(+) diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c index 3b58f883e4..bcabf69dd7 100644 --- a/VEX/priv/guest_ppc_toIR.c +++ b/VEX/priv/guest_ppc_toIR.c @@ -31208,6 +31208,596 @@ static IRTemp _get_quad_modulo_or_carry(IRExpr * vecA, IRExpr * vecB, return carry; } +static IRExpr * copy_MSB_bit_fields ( IRExpr *src, UInt size, + const VexAbiInfo* vbi ) +{ + IRTemp src_hi = newTemp( Ity_I64 ); + IRTemp src_lo = newTemp( Ity_I64 ); + IRTemp ones_hi, ones_lo; + ULong extract_mask_hi, extract_mask_lo; + UInt num_bits; + + ones_hi = newTemp( Ity_I64 ); + ones_lo = newTemp( Ity_I64 ); + + /* Create 64-bit extract mask, with a 1 in the MSB for each vector element + size. */ + + switch (size) { + case 8: + extract_mask_hi = 0x8080808080808080ULL; + extract_mask_lo = 0x8080808080808080ULL; + num_bits = 8; + break; + + case 16: + extract_mask_hi = 0x8000800080008000ULL; + extract_mask_lo = 0x8000800080008000ULL; + num_bits = 4; + break; + + case 32: + extract_mask_hi = 0x8000000080000000ULL; + extract_mask_lo = 0x8000000080000000ULL; + num_bits = 2; + break; + + case 64: + extract_mask_hi = 0x8000000000000000ULL; + extract_mask_lo = 0x8000000000000000ULL; + num_bits = 1; + break; + + default: + /* unsupported element size */ + vassert(0); + } + + assign( src_hi, unop( Iop_V128HIto64, src ) ); + assign( src_lo, unop( Iop_V128to64, src ) ); + + assign( ones_hi, extract_bits_under_mask ( vbi, mkexpr( src_hi ), + mkU64( extract_mask_hi ), + mkU64( 1 ) ) ); + assign( ones_lo, extract_bits_under_mask ( vbi, mkexpr( src_lo ), + mkU64( extract_mask_lo ), + mkU64( 1 ) ) ); + + /* Concatenate the extracted bits from ones_hi and ones_lo and + store in GPR. Make sure the hi and low bits are left aligned per + IBM numbering */ + return binop( Iop_Or64, + binop( Iop_Shl64, + mkexpr( ones_hi ), + mkU8( num_bits ) ), + mkexpr( ones_lo ) ); +} + +static Bool dis_VSR_byte_mask ( UInt prefix, UInt theInstr, + const VexAbiInfo* vbi ) +{ + UChar RT_addr = ifieldRegDS(theInstr); + UChar B_addr = ifieldRegB(theInstr); + IRTemp src = newTemp(Ity_I64); + + UInt inst_select = IFIELD( theInstr, 16, 5); + IRTemp vRT = newTemp( Ity_V128 ); + UInt size; + ULong extract_mask, shift_by; + + + /* The various instructions handled by this function use bits[11:15] to + specify the instruction in addition to the opc1 (bits[0:5]) and opc2 + (bits21:31]). The exception is the mtvsrbmi which uses bits[11:15] + for part of the immediate value. Assign mtvsrbmi a unique inst_select + so it can be handled similarly to the other instructions. This helps + simplify the code control flow. */ + if (IFIELD(theInstr, 1, 5) == 0xA) //mtvsrbmi + inst_select = 0x9999; + + switch(inst_select) { + case 0x0: // vexpandbm + DIP("vexpandbm v%u,r%u\n", RT_addr, B_addr); + + extract_mask = 0x8080808080808080ULL; + shift_by = 0x0707070707070707ULL; + + /* Use extract mask to select the MSB from each byte field. Then + use the arithmetic right shift to replicate the MSB into each + bit of the element field. */ + assign( vRT, + binop( Iop_Sar8x16, + binop( Iop_AndV128, + getVReg(B_addr), + binop( Iop_64HLtoV128, mkU64( extract_mask ), + mkU64( extract_mask ) ) ), + binop( Iop_64HLtoV128, mkU64( shift_by ), + mkU64( shift_by ) ) ) ); + putVReg( RT_addr, mkexpr( vRT ) ); + return True; + + case 0x1: // vexpandhm + DIP("vexpandhm v%u,r%u\n", RT_addr, B_addr); + + extract_mask = 0x8000800080008000ULL; + shift_by = 0x000F000F000F000FULL; + + /* Use extract mask to select the MSB from each byte field. Then + use the arithmetic right shift to replicate the MSB into each + bit of the element field. */ + assign( vRT, + binop( Iop_Sar16x8, + binop( Iop_AndV128, + getVReg(B_addr), + binop( Iop_64HLtoV128, mkU64( extract_mask ), + mkU64( extract_mask ) ) ), + binop( Iop_64HLtoV128, mkU64( shift_by ), + mkU64( shift_by ) ) ) ); + putVReg( RT_addr, mkexpr( vRT ) ); + return True; + + case 0x2: // vexpandwm + DIP("vexpandwm v%u,r%u\n", RT_addr, B_addr); + + extract_mask = 0x8000000080000000ULL; + shift_by = 0x0000001F0000001FULL; + + /* Use extract mask to select the MSB from each byte field. Then + use the arithmetic right shift to replicate the MSB into each + bit of the element field. */ + assign( vRT, + binop( Iop_Sar32x4, + binop( Iop_AndV128, + getVReg(B_addr), + binop( Iop_64HLtoV128, mkU64( extract_mask ), + mkU64( extract_mask ) ) ), + binop( Iop_64HLtoV128, mkU64( shift_by ), + mkU64( shift_by ) ) ) ); + putVReg( RT_addr, mkexpr( vRT ) ); + return True; + + case 0x3: // vexpanddm + DIP("vexpanddm v%u,r%u\n", RT_addr, B_addr); + extract_mask = 0x8000000080000000ULL; + shift_by = 0x000003F000003FULL; + + /* Use extract mask to select the MSB from each byte field. Then + use the arithmetic right shift to replicate the MSB into each + bit of the element field. */ + assign( vRT, + binop( Iop_Sar64x2, + binop( Iop_AndV128, + getVReg(B_addr), + binop( Iop_64HLtoV128, mkU64( extract_mask ), + mkU64( extract_mask ) ) ), + binop( Iop_64HLtoV128, mkU64( shift_by ), + mkU64( shift_by ) ) ) ); + putVReg( RT_addr, mkexpr( vRT ) ); + return True; + + case 0x4: // vexpandqm + { + IRTemp ones = newTemp( Ity_I64 ); + DIP("vexpandqm v%u,r%u\n", RT_addr, B_addr); + + assign( src, binop( Iop_Shr64, + unop( Iop_V128HIto64, getVReg( B_addr) ), + mkU8( 63 ) ) ); + assign( ones, + unop( Iop_1Sto64, + binop( Iop_CmpEQ64, + mkU64( 1 ), + binop( Iop_And64, + mkU64( 0x1 ), + mkexpr( src ) ) ) ) ); + putVReg( RT_addr, binop( Iop_64HLtoV128, + mkexpr( ones ), mkexpr( ones ) ) ); + } + return True; + + case 0x8: // vextractbm + DIP("vextractbm v%u,r%u\n", RT_addr, B_addr); + size = 8; + putIReg( RT_addr, copy_MSB_bit_fields( getVReg( B_addr ), size, vbi ) ); + return True; + + case 0x9: // vextracthm + DIP("vextracthm v%u,r%u\n", RT_addr, B_addr); + size = 16; + putIReg( RT_addr, copy_MSB_bit_fields( getVReg( B_addr ), size, vbi ) ); + return True; + + case 0xA: // vextractwm + DIP("vextractwm v%u,r%u\n", RT_addr, B_addr ); + size = 32; + putIReg( RT_addr, copy_MSB_bit_fields( getVReg( B_addr ), size, vbi ) ); + return True; + + case 0xB: // vextractdm + DIP("vextractdm v%u,r%u\n", RT_addr, B_addr); + size = 64; + putIReg( RT_addr, copy_MSB_bit_fields( getVReg( B_addr ), size, vbi ) ); + return True; + + case 0xC: // vextractqm + DIP("vextractqm v%u,r%u\n", RT_addr, B_addr); + putIReg( RT_addr, binop( Iop_Shr64, + unop( Iop_V128HIto64, getVReg( B_addr ) ), + mkU8 (63) ) ); + return True; + + case 0x10: // mtvsrbm + { + IRTemp src_upper = newTemp(Ity_I32); + IRTemp src_upper2 = newTemp(Ity_I32); + IRTemp src_upper4 = newTemp(Ity_I32); + IRTemp src_lower = newTemp(Ity_I32); + IRTemp src_lower2 = newTemp(Ity_I32); + IRTemp src_lower4 = newTemp(Ity_I32); + IRTemp tmp128 = newTemp(Ity_V128); + + DIP("mtvsrbm v%u,r%u\n", RT_addr, B_addr); + + /* Copy the lower 8-bits of the 16 bit mask to lower 8 byte elements + and copy the upper 8-bits of the 16 bit mask to the upper 8 byte + elements. */ + assign( src_upper, binop( Iop_Shr32, + binop( Iop_And32, mkU32( 0xFF00 ), + unop ( Iop_64to32, + getIReg( B_addr ) ) ), + mkU8( 0x8 ) ) ); + assign( src_lower, binop( Iop_And32, mkU32( 0xFF ), + unop ( Iop_64to32, + getIReg( B_addr ) ) ) ); + + assign( src_upper2, + binop( Iop_Or32, mkexpr( src_upper ), + binop( Iop_Shl32, mkexpr( src_upper ), mkU8( 8 ) ) ) ); + + assign( src_upper4, + binop( Iop_Or32, mkexpr( src_upper2 ), + binop( Iop_Shl32, mkexpr( src_upper2 ), + mkU8( 16 ) ) ) ); + + assign( src_lower2, + binop( Iop_Or32, mkexpr( src_lower ), + binop( Iop_Shl32, mkexpr( src_lower ), mkU8( 8 ) ) ) ); + + assign( src_lower4, + binop( Iop_Or32, mkexpr( src_lower2 ), + binop( Iop_Shl32, mkexpr( src_lower2 ), + mkU8( 16 ) ) ) ); + + /* Shift the bits in each element so the bit corresponding to the + element position is in the MSB. */ + assign( tmp128, binop( Iop_Shl8x16, + binop( Iop_64HLtoV128, + binop( Iop_32HLto64, + mkexpr( src_upper4 ), + mkexpr( src_upper4 ) ), + binop( Iop_32HLto64, + mkexpr( src_lower4 ), + mkexpr( src_lower4 ) ) ), + binop( Iop_64HLtoV128, + mkU64( 0x0001020304050607ULL ), + mkU64( 0x0001020304050607ULL ) ) ) ); + /* Do an arithmetic shift to replicate MSB to all bit positions. */ + assign( vRT, binop( Iop_Sar8x16, mkexpr( tmp128 ), + binop( Iop_64HLtoV128, + mkU64( 0x0707070707070707ULL ), + mkU64( 0x0707070707070707ULL ) ) ) ); + putVReg( RT_addr, mkexpr( vRT ) ); + return True; + } + + case 0x9999: // mtvsrbmi + { + ULong immediate16, immediate16_hi, immediate16_lo; + ULong immediate64_hi, immediate64_lo; + IRTemp tmp128 = newTemp(Ity_V128); + + DIP("mtvsrbmi v%u,r%u\n", RT_addr, B_addr); + + /* Replicate the immediate fields b0|b1|b2 to all 16 vector + elements */ + immediate16 = (IFIELD(theInstr, 0, 1) ) | //b2 bits[31] + (IFIELD(theInstr, 16, 5) << 1) | //b1 bits[11:15] + (IFIELD(theInstr, 6, 10) << 6 ); //b0 bits[16:25] + + immediate16_hi = (immediate16 >> 8) & 0xFF; + immediate16_lo = immediate16 & 0xFF; + + immediate64_hi = ((immediate16_hi << 32) | (immediate16_hi << 56) | + (immediate16_hi << 48) | (immediate16_hi << 40) | + (immediate16_hi << 32) | (immediate16_hi << 16) | + (immediate16_hi << 8) | immediate16_hi); + + immediate64_lo = ((immediate16_lo << 32) | (immediate16_lo << 56) | + (immediate16_lo << 48) | (immediate16_lo << 40) | + (immediate16_lo << 32) | (immediate16_lo << 16) | + (immediate16_lo << 8) | immediate16_lo); + + /* Shift the bits in each element so the bit corresponding to the + element position is in the MSB. */ + assign( tmp128, binop( Iop_Shl8x16, + binop( Iop_64HLtoV128, + mkU64( immediate64_hi ), + mkU64( immediate64_lo ) ), + binop( Iop_64HLtoV128, + mkU64( 0x0001020304050607ULL ), + mkU64( 0x0001020304050607ULL ) ) ) ); + /* Do an arithmetic shift to replicate MSB to all bit positions. */ + assign( vRT, binop( Iop_Sar8x16, mkexpr( tmp128 ), + binop( Iop_64HLtoV128, + mkU64( 0x0707070707070707ULL ), + mkU64( 0x0707070707070707ULL ) ) ) ); + putVReg( RT_addr, mkexpr( vRT ) ); + return True; + } + + case 0x11: // mtvsrhm + { + DIP("mtvsrhm v%u,r%u\n", RT_addr, B_addr); + + IRTemp src2 = newTemp(Ity_I32); + IRTemp tmp128 = newTemp(Ity_V128); + + /* Copy the 16 bit mask to all eight of the 16-bit elements. */ + assign( src, binop( Iop_And32, mkU32( 0xFFFF ), + unop ( Iop_64to32, + getIReg( B_addr ) ) ) ); + + assign( src2, + binop( Iop_Or32, mkexpr( src ), + binop( Iop_Shl32, mkexpr( src ), mkU8( 16 ) ) ) ); + + /* Shift the bits in each element so the bit corresponding to the + element position is in the MSB. */ + assign( tmp128, binop( Iop_Shl16x8, + binop( Iop_64HLtoV128, + binop( Iop_32HLto64, + mkexpr( src2 ), + mkexpr( src2 ) ), + binop( Iop_32HLto64, + mkexpr( src2 ), + mkexpr( src2 ) ) ), + binop( Iop_64HLtoV128, + mkU64( 0x0000000100020003ULL ), + mkU64( 0x0004000500060007ULL ) ) ) ); + /* Do an arithmetic shift to replicate MSB to all bit positions. */ + assign( vRT, binop( Iop_Sar16x8, mkexpr( tmp128 ), + binop( Iop_64HLtoV128, + mkU64( 0x000F000F000F000FULL ), + mkU64( 0x000F000F000F000FULL ) ) ) ); + putVReg( RT_addr, mkexpr( vRT ) ); + return True; + } + + case 0x12: // mtvsrwm + { + IRTemp tmp128 = newTemp(Ity_V128); + IRTemp src32 = newTemp(Ity_I32); + + DIP("mtvsrwm v%u,r%u\n", RT_addr, B_addr); + + /* Copy the 32 bit mask to all four of the 32-bit elements. */ + assign( src32, binop( Iop_Shl32, + unop ( Iop_64to32, getIReg( B_addr ) ), + mkU8( 28 ) ) ); + + /* Shift the bits in each element so the bit corresponding to the + element position is in the MSB. */ + assign( tmp128, binop( Iop_Shl32x4, + binop( Iop_64HLtoV128, + binop( Iop_32HLto64, + mkexpr( src32 ), + mkexpr( src32 ) ), + binop( Iop_32HLto64, + mkexpr( src32 ), + mkexpr( src32 ) ) ), + binop( Iop_64HLtoV128, + mkU64( 0x0000000000000001ULL ), + mkU64( 0x0000000200000003ULL ) ) ) ); + + /* Do an arithmetic shift to replicate MSB to all bit positions. */ + assign( vRT, binop( Iop_Sar32x4, mkexpr( tmp128 ), + binop( Iop_64HLtoV128, + mkU64( 0x0000001F0000001FULL ), + mkU64( 0x0000001F0000001FULL ) ) ) ); + putVReg( RT_addr, mkexpr( vRT ) ); + return True; + } + + case 0x13: // mtvsrdm + { + IRTemp tmp128 = newTemp(Ity_V128); + + DIP("mtvsrdm v%u,r%u\n", RT_addr, B_addr); + + /* Copy the 64 bit mask to both of the 64-bit elements. */ + assign( src, binop( Iop_Shl64, + getIReg( B_addr ), + mkU8( 62 ) ) ); + + /* Shift the bits in each element so the bit corresponding to the + element position is in the MSB. */ + assign( tmp128, binop( Iop_Shl64x2, + binop( Iop_64HLtoV128, + mkexpr( src ), + mkexpr( src ) ), + binop( Iop_64HLtoV128, + mkU64( 0x0000000000000000ULL ), + mkU64( 0x0000000000000001ULL ) ) ) ); + + /* Do an arithmetic shift to replicate MSB to all bit positions. */ + assign( vRT, binop( Iop_Sar64x2, mkexpr( tmp128 ), + binop( Iop_64HLtoV128, + mkU64( 0x000000000000003FULL ), + mkU64( 0x000000000000003FULL ) ) ) ); + putVReg( RT_addr, mkexpr( vRT ) ); + return True; + } + + case 0x14: // mtvsrqm + { + IRTemp ones = newTemp( Ity_I64 ); + DIP("mtvsrqm v%u,r%u\n", RT_addr, B_addr); + + assign( src, getIReg( B_addr ) ); + assign( ones, + unop( Iop_1Sto64, + binop( Iop_CmpEQ64, + mkU64( 1 ), + binop( Iop_And64, + mkU64( 0x1 ), + mkexpr( src ) ) ) ) ); + putVReg( RT_addr, binop( Iop_64HLtoV128, + mkexpr( ones ), mkexpr( ones ) ) ); + return True; + } + + case 0x18: // vcntmbb MP=0 + case 0x19: // vcntmbb MP=1 + { + UInt MP = IFIELD(theInstr, 16, 1); // bits[15] IBM numbering + IRTemp bit_mask = newTemp(Ity_I64); + IRTemp bit_cnt = newTemp(Ity_I64); + + DIP("vcntmbb r%u,v%u,%u\n", RT_addr, B_addr, MP); + + size = 8; + assign( bit_mask, copy_MSB_bit_fields( getVReg( B_addr ), size, + vbi ) ); + + if ( MP == 1) { + assign( bit_cnt, binop( Iop_Shl64, + popcnt64( vbi, mkexpr( bit_mask ) ), + mkU8( 56 ) ) ); + + } else { + /* Need to complement the bit mask then count the ones. */ + assign( bit_cnt, + binop( Iop_Shl64, + popcnt64( vbi, + binop( Iop_And64, + mkU64( 0xFFFF ), + unop( Iop_Not64, + mkexpr( bit_mask ) ) ) ), + mkU8( 56 ) ) ); + } + putIReg( RT_addr, mkexpr( bit_cnt ) ); + return True; + } + + case 0x1A: // vcntmbh MP=0 + case 0x1B: // vcntmbh MP=1 + { + UInt MP = IFIELD(theInstr, 16, 1); // bits[15] IBM numbering + IRTemp bit_mask = newTemp(Ity_I64); + IRTemp bit_cnt = newTemp(Ity_I64); + + DIP("vcntmbh r%u,v%u,%u\n", RT_addr, B_addr, MP); + + size = 16; + assign( bit_mask, copy_MSB_bit_fields( getVReg( B_addr ), size, + vbi ) ); + + /* Result is in IBM bits [0:6] */ + if ( MP == 1) { + assign( bit_cnt, + binop( Iop_Shl64, + popcnt64( vbi, mkexpr( bit_mask ) ), + mkU8( 57 ) ) ); + + } else { + /* Need to complement the bit mask then count the ones. */ + assign( bit_cnt, + binop( Iop_Shl64, + popcnt64( vbi, + binop( Iop_And64, + mkU64( 0xFF ), + unop( Iop_Not64, + mkexpr( bit_mask ) ) ) ), + mkU8( 57 ) ) ); + } + putIReg( RT_addr, mkexpr( bit_cnt ) ); + return True; + } + + case 0x1C: // vcntmbw MP=0 + case 0x1D: // vcntmbw MP=1 + { + UInt MP = IFIELD(theInstr, 16, 1); // bits[15] IBM numbering + IRTemp bit_mask = newTemp(Ity_I64); + IRTemp bit_cnt = newTemp(Ity_I64); + + DIP("vcntmbw r%u,v%u,%u\n", RT_addr, B_addr, MP); + + size = 32; + assign( bit_mask, copy_MSB_bit_fields( getVReg( B_addr ), size, + vbi) ); + + if ( MP == 1) { + assign( bit_cnt, + binop( Iop_Shl64, + popcnt64( vbi, mkexpr( bit_mask ) ), + mkU8( 58 ) ) ); + + } else { + /* Need to complement the bit mask then count the ones. */ + assign( bit_cnt, + binop( Iop_Shl64, + popcnt64( vbi, + binop( Iop_And64, + mkU64( 0xF ), + unop( Iop_Not64, + mkexpr( bit_mask ) ) ) ), + mkU8( 58 ) ) ); + } + putIReg( RT_addr, mkexpr( bit_cnt ) ); + return True; + } + + case 0x1E: // vcntmbd MP=0 + case 0x1F: // vcntmbd MP=1 + { + UInt MP = IFIELD(theInstr, 16, 1); // bits[15] IBM numbering + IRTemp bit_mask = newTemp(Ity_I64); + IRTemp bit_cnt = newTemp(Ity_I64); + + DIP("vcntmbd r%u,v%u,%u\n", RT_addr, B_addr, MP); + + size = 64; + assign( bit_mask, copy_MSB_bit_fields( getVReg( B_addr ), size, + vbi ) ); + + /* Result is in IBM bits [0:4] */ + if ( MP == 1) { + assign( bit_cnt, + binop( Iop_Shl64, + popcnt64( vbi, mkexpr( bit_mask ) ), + mkU8( 59 ) ) ); + + } else { + /* Need to complement the bit mask then count the ones. */ + assign( bit_cnt, + binop( Iop_Shl64, + popcnt64( vbi, + binop( Iop_And64, + mkU64( 0x3 ), + unop( Iop_Not64, + mkexpr( bit_mask ) ) ) ), + mkU8( 59 ) ) ); + } + putIReg( RT_addr, mkexpr( bit_cnt ) ); + return True; + } + + default: + /* Unkown opc2 value for the dis_VSR_byte_mask function. */ + return False; + } +} static Bool dis_av_quad ( UInt prefix, UInt theInstr ) { @@ -36541,6 +37131,20 @@ DisResult disInstr_PPC_WRK ( case 0x04: /* AltiVec instructions */ + opc2 = IFIELD(theInstr, 1, 5); + switch (opc2) { + case 0xA: // mtvsrbmi + if (!allow_V) goto decode_noV; + if ( !(allow_isa_3_1) ) goto decode_noIsa3_1; + if (dis_VSR_byte_mask( prefix, theInstr, abiinfo )) + goto decode_success; + goto decode_failure; + break; + + default: + break; // Fall through... + } + opc2 = IFIELD(theInstr, 0, 6); switch (opc2) { /* AV Mult-Add, Mult-Sum */ @@ -36899,6 +37503,16 @@ DisResult disInstr_PPC_WRK ( goto decode_success; goto decode_failure; + case 0x642: // mtvsrbm, mtvsrhm, mtvswm, mtvsdm, mtvsqm, mtvsrbmi + // vcntmbb, vcntmbh, vcntmbw, vcntmbd + // vexpandbm, vexpandhm, vexpandwm, vexpanddm, vexpandqm + // vextractbm, vextracthm, vextractwm, vextractdm, vextractqm + if (!allow_V) goto decode_noV; + if ( !(allow_isa_3_1) ) goto decode_noIsa3_1; + if (dis_VSR_byte_mask( prefix, theInstr, abiinfo)) + goto decode_success; + goto decode_failure; + case 0x6C2: case 0x682: // vshasigmaw, vshasigmad if (!allow_isa_2_07) goto decode_noP8; if (dis_av_hash( prefix, theInstr )) goto decode_success; |
|
From: Mark W. <ma...@so...> - 2021-03-04 18:26:20
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=b92d30bb6de3aec40be9ad368f10f881e2b84ca7 commit b92d30bb6de3aec40be9ad368f10f881e2b84ca7 Author: Mark Wielaard <ma...@kl...> Date: Thu Mar 4 19:24:06 2021 +0100 arm64: Handle sp, lr, fp as DwReg in CfiExpr When copy_convert_CfiExpr_tree sees a DwReg on arm64 we simply call I_die_here; This causes an issue in the case we really do have to handle that case (see https://bugzilla.redhat.com/show_bug.cgi?id=1923493). Handle the stack pointer (sp), link register (x30) and frame pointer (x29), which we already keep in D3UnwindRegs, like we do for other architectures in evalCfiExpr and copy_convert_CfiExpr_tree. https://bugs.kde.org/show_bug.cgi?id=433898 Diff: --- NEWS | 1 + coregrind/m_debuginfo/d3basics.c | 1 + coregrind/m_debuginfo/debuginfo.c | 2 ++ coregrind/m_debuginfo/priv_storage.h | 2 ++ coregrind/m_debuginfo/readdwarf.c | 7 ++++++- coregrind/m_debuginfo/storage.c | 2 ++ 6 files changed, 14 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 8595a9034a..6cb270c1c4 100644 --- a/NEWS +++ b/NEWS @@ -152,6 +152,7 @@ where XXXXXX is the bug number as listed below. 433500 DRD regtest faulures when libstdc++ and libgcc debuginfo are installed 433629 valgrind/README has type "abd" instead of "and" 433641 Rust std::sys::unix::fs::try_statx Syscall param fstatat(file_name) +433898 arm64: Handle sp, lr, fp as DwReg in CfiExpr n-i-bz helgrind: If hg_cli__realloc fails, return NULL. diff --git a/coregrind/m_debuginfo/d3basics.c b/coregrind/m_debuginfo/d3basics.c index e1127ffe2f..1bc5f8f052 100644 --- a/coregrind/m_debuginfo/d3basics.c +++ b/coregrind/m_debuginfo/d3basics.c @@ -523,6 +523,7 @@ static Bool get_Dwarf_Reg( /*OUT*/Addr* a, Word regno, const RegSummary* regs ) if (regno == 30) { *a = regs->fp; return True; } # elif defined(VGP_arm64_linux) if (regno == 31) { *a = regs->sp; return True; } + if (regno == 29) { *a = regs->fp; return True; } # else # error "Unknown platform" # endif diff --git a/coregrind/m_debuginfo/debuginfo.c b/coregrind/m_debuginfo/debuginfo.c index c4a5ea593a..bc2578b377 100644 --- a/coregrind/m_debuginfo/debuginfo.c +++ b/coregrind/m_debuginfo/debuginfo.c @@ -2874,7 +2874,9 @@ UWord evalCfiExpr ( const XArray* exprs, Int ix, # elif defined(VGA_ppc32) || defined(VGA_ppc64be) \ || defined(VGA_ppc64le) # elif defined(VGP_arm64_linux) + case Creg_ARM64_SP: return eec->uregs->sp; case Creg_ARM64_X30: return eec->uregs->x30; + case Creg_ARM64_X29: return eec->uregs->x29; # else # error "Unsupported arch" # endif diff --git a/coregrind/m_debuginfo/priv_storage.h b/coregrind/m_debuginfo/priv_storage.h index 39456eccbd..ae44ca34e5 100644 --- a/coregrind/m_debuginfo/priv_storage.h +++ b/coregrind/m_debuginfo/priv_storage.h @@ -415,7 +415,9 @@ typedef Creg_ARM_R15, Creg_ARM_R14, Creg_ARM_R7, + Creg_ARM64_SP, Creg_ARM64_X30, + Creg_ARM64_X29, Creg_S390_IA, Creg_S390_SP, Creg_S390_FP, diff --git a/coregrind/m_debuginfo/readdwarf.c b/coregrind/m_debuginfo/readdwarf.c index 3996623edb..bcacca4cb9 100644 --- a/coregrind/m_debuginfo/readdwarf.c +++ b/coregrind/m_debuginfo/readdwarf.c @@ -2816,7 +2816,12 @@ static Int copy_convert_CfiExpr_tree ( XArray* dstxa, if (dwreg == srcuc->ra_reg) return ML_(CfiExpr_CfiReg)( dstxa, Creg_IA_IP ); # elif defined(VGA_arm64) - I_die_here; + if (dwreg == SP_REG) + return ML_(CfiExpr_CfiReg)( dstxa, Creg_ARM64_SP ); + if (dwreg == FP_REG) + return ML_(CfiExpr_CfiReg)( dstxa, Creg_ARM64_X29 ); + if (dwreg == srcuc->ra_reg) + return ML_(CfiExpr_CfiReg)( dstxa, Creg_ARM64_X30 ); # elif defined(VGA_ppc32) || defined(VGA_ppc64be) \ || defined(VGA_ppc64le) # else diff --git a/coregrind/m_debuginfo/storage.c b/coregrind/m_debuginfo/storage.c index 8667d123ff..48a92b4029 100644 --- a/coregrind/m_debuginfo/storage.c +++ b/coregrind/m_debuginfo/storage.c @@ -1002,7 +1002,9 @@ static void ppCfiReg ( CfiReg reg ) case Creg_ARM_R15: VG_(printf)("R15"); break; case Creg_ARM_R14: VG_(printf)("R14"); break; case Creg_ARM_R7: VG_(printf)("R7"); break; + case Creg_ARM64_SP: VG_(printf)("SP"); break; case Creg_ARM64_X30: VG_(printf)("X30"); break; + case Creg_ARM64_X29: VG_(printf)("X29"); break; case Creg_MIPS_RA: VG_(printf)("RA"); break; case Creg_S390_IA: VG_(printf)("IA"); break; case Creg_S390_SP: VG_(printf)("SP"); break; |
|
From: Mark W. <ma...@kl...> - 2021-03-04 14:31:27
|
Hi Eyal, On Wed, 2021-03-03 at 10:59 -0700, Eyal Soha wrote: > On Wed, Mar 3, 2021 at 6:54 AM Mark Wielaard <ma...@kl...> wrote: > So I think you patch is correct. I just wonder if this should just > > impact clo_exit_on_first_error or also the other actions in > > do_actions_on_error. Does it make sense to invoke vgdb if the error > > doesn't count, or to generate a suppression? > > Good question. We could think about that and maybe adjust it, too. Or, we > could just make this change and then do the vgdb part in a future commit. > One advantage of doing it separately is that if we change our minds about > just one or the other then it will be easier to revert in the future. You might be right. For --errors-for-leak-kinds=xxx it is kind of obvious that you don't want exit_on_first_error to trigger. That is the whole point of the option. For suppression generation and vgdb invocation it might be debatable. If there are errors that are NOT in the errors-for-leak-kinds set, but are in the show-leak-kinds set (so ppError will be called, but with count_error set to False) the user might want to invoke vgdb or generate a suppression? I am not really sure they do. But maybe they do? If nobody else has an opinion then lets go with you variant. So, any opinions, anybody? Cheers, Mark |
|
From: Eyal S. <eya...@gm...> - 2021-03-04 06:41:08
|
From: eyal0 <109...@us...> This fixes https://bugs.kde.org/show_bug.cgi?id=432801 To test: ```sh make && perl tests/vg_regtest memcheck/tests/x86/pcmpgtd ``` --- memcheck/mc_translate.c | 115 +++++++++++++++++++++++++- memcheck/tests/x86/Makefile.am | 7 ++ memcheck/tests/x86/pcmpgtd | Bin 0 -> 19688 bytes memcheck/tests/x86/pcmpgtd.c | 25 ++++++ memcheck/tests/x86/pcmpgtd.stderr.exp | 10 +++ memcheck/tests/x86/pcmpgtd.vgtest | 2 + 6 files changed, 158 insertions(+), 1 deletion(-) create mode 100755 memcheck/tests/x86/pcmpgtd create mode 100644 memcheck/tests/x86/pcmpgtd.c create mode 100644 memcheck/tests/x86/pcmpgtd.stderr.exp create mode 100644 memcheck/tests/x86/pcmpgtd.vgtest diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c index 516988bdd..07f3c0f5f 100644 --- a/memcheck/mc_translate.c +++ b/memcheck/mc_translate.c @@ -1287,6 +1287,115 @@ static IRAtom* expensiveCmpEQorNE ( MCEnv* mce, return final_cast; } +/* Check if we can know, despite the uncertain bits, that xx is greater than yy. + Notice that it's xx > yy and not the other way around. pcmpgtd appears + reversed in gdb disassembly, with yy first and xx second and xx is the + target. + + We can combine xx and vxx to create two values: the largest that xx could + possibly be and the smallest that xx could possibly be. Likewise, we can do + the same for yy. We'll call those max_xx and min_xx and max_yy and min_yy. + + If max_yy is is not greater than min_xx then yy can't possibly be greater + than xx so we know our answer for sure. If min_yy is greater than max_xx + then yy is definitely greater than xx. For all other cases, we can't know. + + For unsigned it's easy to make the min and max: Just set the unknown bits to + all 0s or all 1s. For signed it's harder because having a 1 in the MSB makes + a number smaller! It's tricky because the first bit is the sign bit. We can + work around this by changing from 2's complement numbers to biased numbers. + We just need to xor the MSB of the inputs with 1. Then we can treat the + values as if unsigned. + */ +static IRAtom* expensiveCmpGT ( MCEnv* mce, + unsigned int word_size, + Bool is_signed, + unsigned int count, + IRAtom* vxx, IRAtom* vyy, + IRAtom* xx, IRAtom* yy ) +{ + IROp opAND, opOR, opXOR, opNOT, opEQ, opSHL, opGT; + IRType ty; + + tl_assert(isShadowAtom(mce,vxx)); + tl_assert(isShadowAtom(mce,vyy)); + tl_assert(isOriginalAtom(mce,xx)); + tl_assert(isOriginalAtom(mce,yy)); + tl_assert(sameKindedAtoms(vxx,xx)); + tl_assert(sameKindedAtoms(vyy,yy)); + + switch (word_size * count) { + case 128: + ty = Ity_V128; + opAND = Iop_AndV128; + opOR = Iop_OrV128; + opXOR = Iop_XorV128; + opNOT = Iop_NotV128; + break; + default: + VG_(tool_panic)("expensiveCmpGT"); + } + if (word_size == 32 && count == 4) { + opEQ = Iop_CmpEQ32x4; + opSHL = Iop_ShlN32x4; + if (is_signed) { + opGT = Iop_CmpGT32Sx4; + } else { + opGT = Iop_CmpGT32Ux4; + } + } else { + VG_(tool_panic)("expensiveCmpGT"); + } + IRAtom *MSBs; + if (is_signed) { + IRAtom *const0 = mkV128(0); + IRAtom *all_ones = assignNew('V', mce, ty, binop(opEQ, const0, const0)); + MSBs = assignNew('V', mce, ty, binop(opSHL, all_ones, mkU8(31))); + xx = assignNew('V', mce, ty, binop(opXOR, xx, MSBs)); + yy = assignNew('V', mce, ty, binop(opXOR, yy, MSBs)); + // From here on out, we're dealing with biased integers instead of 2's + // complement. + } + IRAtom *not_vxx = assignNew('V', mce, ty, unop(opNOT, vxx)); + IRAtom *not_vyy = assignNew('V', mce, ty, unop(opNOT, vyy)); + IRAtom *max_xx = assignNew('V', mce, ty, binop(opOR, xx, vxx)); + IRAtom *min_xx = assignNew('V', mce, ty, binop(opAND, xx, not_vxx)); + IRAtom *max_yy = assignNew('V', mce, ty, binop(opOR, yy, vyy)); + IRAtom *min_yy = assignNew('V', mce, ty, binop(opAND, yy, not_vyy)); + if (is_signed) { + // Now unbias. + max_xx = assignNew('V', mce, ty, binop(opXOR, max_xx, MSBs)); + min_xx = assignNew('V', mce, ty, binop(opXOR, min_xx, MSBs)); + max_yy = assignNew('V', mce, ty, binop(opXOR, max_yy, MSBs)); + min_yy = assignNew('V', mce, ty, binop(opXOR, min_yy, MSBs)); + } + IRAtom *min_xx_gt_max_yy = assignNew('V', mce, ty, binop(opGT, min_xx, max_yy)); + IRAtom *max_xx_gt_min_yy = assignNew('V', mce, ty, binop(opGT, max_xx, min_yy)); + // For each vector, if the value in the first operand is greater than the one + // in in the second operand, all bits are set to one. Otherwise, zero. + // + // If former is 1s then xx is definitely greater than yy. That's a defined + // value. + // + // If the latter is true then there could be a value of xx greater than yy, + // so it's undefined. And the inverse of that is that there cannot be a + // value of xx greater than yy, so the result is definitely false. That's a + // defined value, too. + // + // So the result is defined if: + // + // min_xx_gt_max_yy | ~max_xx_gt_min_yy + // + // Because defined in vbits is 0s and not1s, we need to invert that: + // + // ~(min_xx_gt_max_yy | ~max_xx_gt_min_yy) + // + // We can use DeMorgan's Law to simplify the above: + // + // ~min_xx_gt_max_yy & max_xx_gt_min_yy + IRAtom *not_min_xx_gt_max_yy = assignNew('V', mce, ty, unop(opNOT, min_xx_gt_max_yy)); + return assignNew('V', mce, ty, binop(opAND, not_min_xx_gt_max_yy, max_xx_gt_min_yy)); +} /* --------- Semi-accurate interpretation of CmpORD. --------- */ @@ -3947,9 +4056,13 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, case Iop_PwExtUSMulQAdd8x16: return binary16Ix8(mce, vatom1, vatom2); - case Iop_Sub32x4: case Iop_CmpGT32Sx4: + return expensiveCmpGT(mce, 32, True /* signed */, + 4, vatom1, vatom2, atom1, atom2); case Iop_CmpGT32Ux4: + return expensiveCmpGT(mce, 32, False /* unsigned */, + 4, vatom1, vatom2, atom1, atom2); + case Iop_Sub32x4: case Iop_CmpEQ32x4: case Iop_QAdd32Sx4: case Iop_QAdd32Ux4: diff --git a/memcheck/tests/x86/Makefile.am b/memcheck/tests/x86/Makefile.am index 557de6b11..f5c04ee90 100644 --- a/memcheck/tests/x86/Makefile.am +++ b/memcheck/tests/x86/Makefile.am @@ -13,6 +13,7 @@ EXTRA_DIST = \ $(addsuffix .stderr.exp,$(INSN_TESTS)) \ $(addsuffix .stdout.exp,$(INSN_TESTS)) \ $(addsuffix .vgtest,$(INSN_TESTS)) \ + pcmpgtd.stderr.exp pcmpgtd.vgtest \ pushfpopf.stderr.exp pushfpopf.stdout.exp pushfpopf.vgtest \ pushfw_x86.vgtest pushfw_x86.stdout.exp pushfw_x86.stderr.exp \ pushpopmem.stderr.exp pushpopmem.stdout.exp pushpopmem.vgtest \ @@ -37,6 +38,7 @@ check_PROGRAMS = \ fprem \ fxsave \ more_x86_fp \ + pcmpgtd \ pushfpopf \ pushfw_x86 \ pushpopmem \ @@ -52,6 +54,11 @@ AM_CCASFLAGS += @FLAG_M32@ # fpeflags must use these flags -- bug only occurred with them. fpeflags_CFLAGS = $(AM_CFLAGS) -march=i686 + +# pcmpgtd failure only occurs with clang version >= 10 and -O2 +pcmpgtd_SOURCES = pcmpgtd.c +pcmpgtd_CFLAGS = -O2 + pushfpopf_SOURCES = pushfpopf_c.c pushfpopf_s.S if VGCONF_OS_IS_DARWIN pushpopmem_CFLAGS = $(AM_CFLAGS) -mdynamic-no-pic diff --git a/memcheck/tests/x86/pcmpgtd b/memcheck/tests/x86/pcmpgtd new file mode 100755 index 0000000000000000000000000000000000000000..87c0185f0909c5a1c8e6db6ece1dff78dae491a1 GIT binary patch literal 19688 zcmeHPe{dYteSdp<ds<8K-AVEf*(UhF25eKGBwH9^8~aX{>=g(j`~xt?UQYKzcldtg zZqHaGLrrDVTpLr9W~OdB0j8Z{+?oE!KdA`~VAn1-B%PYz2?RpN5JOE8u+sub3E}$r zzJ2d>x0WZ&q|@n4?`igZe}CWiefRC|d%O4a`-8*lU5dhFaj`Xws1bjNv+SC&MFX4^ zygt^#LTmxMnt8x+;|OsHK^v1W>9Pe9>OxNiB)h{xe%KWPMRbS&yMot~P)10Q>?TV{ zpf-F`2qk2GTTFHup@L?je-ge)0ij4J^{VX!xR|j($VLRbPXs1mtsUit1#NY<CS)T5 z_KALn1eA6}lAT}J`GvlOXGMDw%6w9{={O<$op6Of5gj7H26#OQNtepVJ64Qve}E{0 zp*pylQ;bO{?e2gb@1KzWHXzf!NwjylyfB#$0Y!INS2{JiqPHs@TbfQ~OM8~?>07aM zMQ^Z}3ohgRCjX?nVdGXF3Y|{riejFDgLovr<J~VlkUsfr)9fdwC$IbS_QziD82%c` zXnc?^;j&$zyhvZfmxmC2F3=7wI-I2ijH&onT!z0A`~Z$i3i1=BbgW=StB{UoSur&h ziCU>#mYF6A&7u`4SY{@Y%CZf^L;dT_Wx-{^8z3Fa<g!9)l3y49vs+NmTk5Yof2Fyp zX`~^C^~JRzu1I5n@+Ql#YWQ-@kgPKl;wnw3$*(Md8otc6lr>-@1C0zcGSJ9CBLj^L zG&0c0KqCW<4E)bzz?k^CZv3(KeiviL{-c(wa@v?Up&ze0uiWr9M3rmKq4v!W0V6s| z^7qbGP_B8MXqw8ucN%2R`$OfIcN*oljfry?N45-|JbJn##Eg^2@*JEDwZX^C-LU!4 zH-CV!<B6be{yyFur=RY_LcqA0YOCXE;!{}Do+n1-Y~tYYM6E%HJ;(JQ{(ysHpQ%Rq zqH*l}T0?okc=<D{t?HpkcyQY9y}1Xmu+r?UM){rd51|JyX!F5Qb{rS+)wlPvN9*aW zn~jHmewe~Muw|kDc^|NzkI(<`vr6UJR^SuAQhfV9Ms|AMs8jeh6u9hl$c(2Im+IC$ zO}bHbFHD>qSg229M)}{*KLM>51{Nxm1K+;CCDrnAgWK&V;=oAdJAmvTsWK)$QGJix z%avX>Cf@qgI57UHQU1(0_JPZoctI(DH1UpN?63ItJx|)@3&!GKK1YTL!<T&_;k)o6 zonPOPD|!>Y>?;Z1eLqO}BCq?7+}rD_VSstzT*8<5Ny1n7NoT@0`s0M}%xhpC1@oW4 ze4Ci3!F-ua-7q~4Uyw$Q62k+=_rQ1&3>@6oS(u$AvlB3b_Jud#>qlU|MNDX)c?liR zjfs;k-~R15ML+Y_2X5?t-@4Uz<jXwLS4HR)DID<@-j;!#A!oiLz0}7aPAVo|mkQ|k z8Fb^!v4k&wY9o+SNY6vxGj4jxnrTeDs2INIX0#g0E5^hz#klEI>4Nc{fBC*$jPh&7 zFW)kb{Zv(caKQbw#mCEUmd_gxpP<pPeFyqQ<2`)WQ2EvHokQi1!&}4U--NdeAGl`E z62`U~2bL1tJiPcFx+c#@Kdn@ZW4}?Yc|C9V=Ck4QC&T5B2g(=1l^H)aCXS==H%mXI zYjyjM@V((3;hkX<BbL#X%dhFWs~F5iX=I?0fkp-z8E9mnk%2}A8X0J0ppk+9_Zd*| zEl;jK@nMfC^VC)Jt(Sj;zF4Uo0v-aq8!!(@OZDzwR4PXS@B7b6<s#rwz&=_@qR@9% zD$0YKn6k&O%xi7Z4=PQ5`hG~?G|yhBRIVoll&N*?Z;hmT4!XiETL`)vM;<(a%<Et8 z?Yza;@_@dVt)07S$?}C)6Z`jYP^?MBy2J$}?DdDeog2JIl?n9`mwKhk+sRqt!#S2@ z>?@@IAC<}uK-TZ||A}kR+xe(E=nWij4|*4URWrQZk9Z7k-$c^}Z|Ht+U)b9n_Acu8 z2Kv37(C_!^{QKx>nD2!j88f+Sltu;`8E9mnk%2}A8X0J0ppk(_1{xV?WZ?fw2D<S* zmKOo#+C@V8h{=oiz4UZJp26o_Dsy;^YE6h^^8_uHS8T4Ng`C!PRHWQ(lT24b9Q)|A zN{*0jLG#~igFYo_`J3YrK{s<N#)br>-_ug*v;mtbpaj+5sN#Dv6<Q)v;cH4jxhAFG zr&5`11D1D(I41caLCbzfds;(Nk?rFPF7Ho3=;P}prxja`VVTP5b|Dw+|1}_gSMF?Z zg*e~q1$4?ctXsD-uxKDYnu=rteZl3y?xiah+X#!MBiXUQ?s%byUmpj0x`X&z+WSyu zPtQ%)_XOBRoOAoM>pTmsY`*Zm7?6G-K!q0Xu0H~zTpUEL@@kl{$LSopgOF=(?GE6c zJ5e=}?jhGaeH-BYI9%7VX}W8^o&o+hL|@JHKLn&Nq^^Y+f$i{+f-hH4*KY<+Ex3BV zqOS*ji1@t+S@RJv_ux<r?YjUiJPxLvs^aR8fT{x&llE_@n%_gE!-e|g*1hUN{p%ok z?AJ2=n-X8-)}NC25?%j0iFdc^-v+)Ic~QUQpO#mb>-4eyJh{7p>93I<$yT#gUG3AU zy<d}TCDY#r?m-mlDmT+V1(7Ai0oLkQ*Xk;89z>Yw%@Xf->ob9OlF>Sy>1#mz6%KWn zj5bVqAFA<wOO4U3FzScFYh-jMYYnKkPa54(W3<Vw-wGWb!De0GN;tK;#jk%+;#)hJ z{ve2Js6Q!kwoTs$nlh_yXZlxx-$nQiU4I<-ZKRj<PxGrg^`}7ZAUaNwNAzz=yC~C- zNIZsU>1P8w$;lWJr2i|ZuajAx%+fmbV}ztxrk@4Q<ITbBT@cTb;y5W<`UjwST)Ub6 zUsCY_U3WpJjU4Qu+vaq#_kIZ870h>n8f<DN);l=dhnh+8LqZR_sn1RB<^|v^hdb>` ze<y&hb?WnlcWo}yX;e_F8t$=e%@pVuIPPz_cLHd-`w#&a_j`f_i<>RzEQ4>4-n!SL z>2rbZ2g`$-*!0JM@ZkK+9iaAg?VDh=YjiPn)Lw<kU7NU^VxQr9%zqbA&7=m3Z6~Td zhzpwEYH6QOQ%9J{d!VtK$eHUw&U#8**nff8W-fp%L$X;UW3!i_x<@E*;!A>H(^o=) z2inZsQJ8oX55?Y0RSvWsWJ;TIjkiNnz5dzrylqOmhc0SG^{#AL)pFgOp^nva*5Wy( zYHX+jd0EI*@0yktJSj9E=XkE=ps52UEiGgOuhXG5)Imfa$Z4|`DQTVsZQD2Q2Q!uH z;JUS=DxR(=)Qa~iXx#`)A2rv~vIeMsc1;6qK6JT#CL{BXAbQa533dKi{~X)%PzOak zqnEilB?DOlIeU_v)xxciz49L@tf!+5iDl2znP(bmMNxk3olb!G#ONv&3tg#fG+m0t zSuN3p-8ANkUa_@iEL-XtO<Bdyq0(iI=i@>VdrZ30>D`&7J>9ffr;9%Yy6n4uwN=`c zl8vN;NeZaCLj@#GB*;`YkuxojIJexjEip~Vc1O}u#OY$(l5{?r$&XpFU=$t2_Lo>J zo`4|@EO!%|=8}VXuGZB2nnx`?qb*Qd?$(-CtEGfT^;+5j&oe|75}NK&+aK2guWJ5n zns$%Y`k1Dz(tNI+9a^i$wX9w9-J-SXHgGMMq?*!KYd&?h9@JW0*9z`4n&uhusQ2p$ z&F4{P!_ITJcC}}fHrI2Drma@*4-&ugaSeCvBwQ*UbxzRLiu3hb9-ot)?Mmh{@ve9= zmyC4f3#r`^E8c}HjTKVaSXU;Vi6-OGU0qhZXcg%UW2a9w<7P6FjiuuSR*Y0pE*5;# zB1u&p=Z1)Ej*xhvU?b5S#*t~}Be8A<aUvB@$BK+-aKoE6g}0fThwd3Pw`?03G)-pi zN~P1}AfF-y)YMAKEO8PwNYhGXD5?~Y7=aw75rq=%W}*PlJ76>tQdzPkEN>O;o)G~L z1sqU~Cez#jc0F08BH5S7H%sR*Hz8}&jHYwZU3LsbE+e}{q?G1iMPe}?2thMl9A(91 zj(m^iN?GbA=Z|L5m2~a_#B7%Gc>?)e@o_jWp^fFsC?Gh^=j@)3BI^`N0#;xb$KfX$ zNvF9pu*Qo-!FM5=gr2w_=u~7P#a-}93E({_Wg*VSW1=hMjIIY#O{61Z=y1d=;LPU= zh>7p{!e%g*k62c`kY&+iq`>mDODu(E9*n~<nP(BYd{Df&7^x_1#6~9Tgg`ISJq5+Z zx}W$+;7ZkOlP#RAKcCs=NrTUYsfh5fD|Csjim*;h$yeFADfyT(sZ;sUs_T&2MpS$q zlk1Oqep7XwQqR}fRP%e4O=EIhP_N(2<ocqX-%?$N)blajm|~u;vgs@^CEr`!-&^m; z$K<-Mp6_SXeXlhFm9<s(i`KAl^KNG&flw`r<0`|0QLF=Ng(_BE`jq^cY^wE%%4RXS z@37tvR%@Y9EsNtSR%45%<j-OByJ2ThtE<*}iOS^a*2%x3o*^ieec>|vD=)*B3!zCd zU0ZJUi^?R&#%O}+VqSL8aelpmf7HRhO7K}*2-sw~5vYPyyv&2Rmf3pFzx2G&YKq#e zzBdrA#<~8b=l2l!Q^kL|e*Go))6VX_Dg>;)Oy%cWT&3fZ@pBA(1yfSz_<2d_JICRh zoPX(fzR3MtI<7qEKs)k4ziy`@^Ogb<m}0!m;`&^*u38GdANn%?Qs^gr*QM8E?`8ZL zm*LYAf&4qizXg6kHdX^Cz;APPvTnz9agg}*l{*mP+BCVM@&x#l2l{zG75)t!@Vhwt z7*`>OpT8%4mzV7o;}aw=8R19%-+}7||0MY2$C;m3gua~H(dz;#w1%Vjo%(-G`j}oZ z$92J?1*>S4u&W@-V(~(JELFr4(zG%(JIG>gYnri~IhM|iVs;V3<Fsf-N_$u|m%-!H zipPRCE?-7_sHade6RB(pkMcrf+>B?f!Z=G5BAK`uD`he?L#UxJok>}Bs*|%k(_FtP zeA}QoxN(3cQT3{rM+|J+7`|<2UA=_w^#%*u-OWKmcrXSwF>}N4?fv0l^Y-=YHxF(x zw}ktL2gxhnAs#K3c+fQUu=kFKLUmik<FSYp5gW`MBIh1+r<g45P3CB=ZpS&2SImi7 zC^Ls{hgK|=HA}@fO%tnuQHq!uOJiDtc^l@s<jqP&F_ENM=xDJh-1FIvX>Pe~oyZvr z7RNJIWE9XU*f1$E%+lh8JPT%XRy-K)A6jZf#sr0RP;j)A!oM8AR5VCbGEz*kU~D`K ze>TM2lS{<Qt~$bm46#PYKwx=HErYxxK`XuoI1LfVay+lWcv1|CB&MEK%C@jahixHo zIF4jeQM8(aFH}L^fb9$kVL^=348|UUjgOYbFm@6-iAP393-R5O!hcnWOB}h8C^Btd zSHEjbPflEdcy`!D&fDbw|8PD*Nw>Bv0736wrM;Zr$^Ul|s8t3o?@yB51Ekho&I8X0 zhYO{mfKGb@d<}<eU(OeG)izip?C|P=7hOQ<X8`oac-JfKyVVfa=oa>}|4w`1hHV6b zR|wVi<@}Rg15u%OlvJpXlsnmeCoxgzbq%S?_D_lSPYL@WQmQG={O^Q}-d#(3IiGD8 z9c&i1GJdHi;V#JOUAUy>yf+~11H#@JzhJWmfsm@Sm-FM0u<sU%vVCX&_X+!cp(o$x z=clm8)BckDJqjM>MEb8jNAO6bLN>KUxUI6vz-sLS;yDx$_A-8HAlUT8<%M1&*V@bH z<cLssSe!?pUbU0-HyrkIo__{YHY(F>P_3QuKM4u7FY_<w0W1I}3e7ufi_`uvgthjQ z|7XUwT3u@l+-d(04tu%27zx;hG+(MM^88EQQ6Lm!HBD<mpiqJ~aOz81!tX<k-q=Zd zxn9BBM_#0!O-+s;$){KMG<KxDoR`Y~^K(|jPvf78w3FxmE$~Q1+ROFN#V*pOIZ}O* z_7c7eajkuIoz!jHEvh%KwlD23IPB$oa*?pVT>H}PM?lDzj9<>r`h>k~N9v1oNRoX5 zjCex5%9r-5svH}U_7c(;5Q*2S7KOu2jsfS?^<BpPpL=a@Ee-<*$-ZzI`_nhr_QxFt z4w8QoBA&DVe@Y8Dl=13?Y17hPLRvRS+}5z`)|EDVtd<GfsY|q@32yPEu)j~-7_S1C op0)CP%I6Q=U*x&xhO!FV$T>%Y4iamRsqN2+8~R#@frDiK4ffTmCjbBd literal 0 HcmV?d00001 diff --git a/memcheck/tests/x86/pcmpgtd.c b/memcheck/tests/x86/pcmpgtd.c new file mode 100644 index 000000000..f9580dd6b --- /dev/null +++ b/memcheck/tests/x86/pcmpgtd.c @@ -0,0 +1,25 @@ +#include <signal.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + + +int main() +{ + struct sigaction act; + if (sigaction(SIGTERM, 0, &act) == 1) { + return 12; + } + if (sigaction(SIGTERM, 0, &act) == 1) { + return 12; + } + + char pattern[] = "\x1\x2\x3\x4\x5\x6\x7\x8\x9"; + const unsigned long plen = strlen(pattern); + pattern[1] = 0; + size_t hp=0; + for (size_t i = 0; i < plen; ++i) + hp += pattern[i]; + return hp % 10; +} diff --git a/memcheck/tests/x86/pcmpgtd.stderr.exp b/memcheck/tests/x86/pcmpgtd.stderr.exp new file mode 100644 index 000000000..eb42921c6 --- /dev/null +++ b/memcheck/tests/x86/pcmpgtd.stderr.exp @@ -0,0 +1,10 @@ + + +HEAP SUMMARY: + in use at exit: 0 bytes in 0 blocks + total heap usage: 0 allocs, 0 frees, 0 bytes allocated + +For a detailed leak analysis, rerun with: --leak-check=full + +For lists of detected and suppressed errors, rerun with: -s +ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0) diff --git a/memcheck/tests/x86/pcmpgtd.vgtest b/memcheck/tests/x86/pcmpgtd.vgtest new file mode 100644 index 000000000..3b8416530 --- /dev/null +++ b/memcheck/tests/x86/pcmpgtd.vgtest @@ -0,0 +1,2 @@ +prog: pcmpgtd +prereq: test -e pcmpgtd -- 2.20.1 |