|
From: Eyal S. <eya...@gm...> - 2021-03-04 06:41:08
|
From: eyal0 <109...@us...> This fixes https://bugs.kde.org/show_bug.cgi?id=432801 To test: ```sh make && perl tests/vg_regtest memcheck/tests/x86/pcmpgtd ``` --- memcheck/mc_translate.c | 115 +++++++++++++++++++++++++- memcheck/tests/x86/Makefile.am | 7 ++ memcheck/tests/x86/pcmpgtd | Bin 0 -> 19688 bytes memcheck/tests/x86/pcmpgtd.c | 25 ++++++ memcheck/tests/x86/pcmpgtd.stderr.exp | 10 +++ memcheck/tests/x86/pcmpgtd.vgtest | 2 + 6 files changed, 158 insertions(+), 1 deletion(-) create mode 100755 memcheck/tests/x86/pcmpgtd create mode 100644 memcheck/tests/x86/pcmpgtd.c create mode 100644 memcheck/tests/x86/pcmpgtd.stderr.exp create mode 100644 memcheck/tests/x86/pcmpgtd.vgtest diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c index 516988bdd..07f3c0f5f 100644 --- a/memcheck/mc_translate.c +++ b/memcheck/mc_translate.c @@ -1287,6 +1287,115 @@ static IRAtom* expensiveCmpEQorNE ( MCEnv* mce, return final_cast; } +/* Check if we can know, despite the uncertain bits, that xx is greater than yy. + Notice that it's xx > yy and not the other way around. pcmpgtd appears + reversed in gdb disassembly, with yy first and xx second and xx is the + target. + + We can combine xx and vxx to create two values: the largest that xx could + possibly be and the smallest that xx could possibly be. Likewise, we can do + the same for yy. We'll call those max_xx and min_xx and max_yy and min_yy. + + If max_yy is is not greater than min_xx then yy can't possibly be greater + than xx so we know our answer for sure. If min_yy is greater than max_xx + then yy is definitely greater than xx. For all other cases, we can't know. + + For unsigned it's easy to make the min and max: Just set the unknown bits to + all 0s or all 1s. For signed it's harder because having a 1 in the MSB makes + a number smaller! It's tricky because the first bit is the sign bit. We can + work around this by changing from 2's complement numbers to biased numbers. + We just need to xor the MSB of the inputs with 1. Then we can treat the + values as if unsigned. + */ +static IRAtom* expensiveCmpGT ( MCEnv* mce, + unsigned int word_size, + Bool is_signed, + unsigned int count, + IRAtom* vxx, IRAtom* vyy, + IRAtom* xx, IRAtom* yy ) +{ + IROp opAND, opOR, opXOR, opNOT, opEQ, opSHL, opGT; + IRType ty; + + tl_assert(isShadowAtom(mce,vxx)); + tl_assert(isShadowAtom(mce,vyy)); + tl_assert(isOriginalAtom(mce,xx)); + tl_assert(isOriginalAtom(mce,yy)); + tl_assert(sameKindedAtoms(vxx,xx)); + tl_assert(sameKindedAtoms(vyy,yy)); + + switch (word_size * count) { + case 128: + ty = Ity_V128; + opAND = Iop_AndV128; + opOR = Iop_OrV128; + opXOR = Iop_XorV128; + opNOT = Iop_NotV128; + break; + default: + VG_(tool_panic)("expensiveCmpGT"); + } + if (word_size == 32 && count == 4) { + opEQ = Iop_CmpEQ32x4; + opSHL = Iop_ShlN32x4; + if (is_signed) { + opGT = Iop_CmpGT32Sx4; + } else { + opGT = Iop_CmpGT32Ux4; + } + } else { + VG_(tool_panic)("expensiveCmpGT"); + } + IRAtom *MSBs; + if (is_signed) { + IRAtom *const0 = mkV128(0); + IRAtom *all_ones = assignNew('V', mce, ty, binop(opEQ, const0, const0)); + MSBs = assignNew('V', mce, ty, binop(opSHL, all_ones, mkU8(31))); + xx = assignNew('V', mce, ty, binop(opXOR, xx, MSBs)); + yy = assignNew('V', mce, ty, binop(opXOR, yy, MSBs)); + // From here on out, we're dealing with biased integers instead of 2's + // complement. + } + IRAtom *not_vxx = assignNew('V', mce, ty, unop(opNOT, vxx)); + IRAtom *not_vyy = assignNew('V', mce, ty, unop(opNOT, vyy)); + IRAtom *max_xx = assignNew('V', mce, ty, binop(opOR, xx, vxx)); + IRAtom *min_xx = assignNew('V', mce, ty, binop(opAND, xx, not_vxx)); + IRAtom *max_yy = assignNew('V', mce, ty, binop(opOR, yy, vyy)); + IRAtom *min_yy = assignNew('V', mce, ty, binop(opAND, yy, not_vyy)); + if (is_signed) { + // Now unbias. + max_xx = assignNew('V', mce, ty, binop(opXOR, max_xx, MSBs)); + min_xx = assignNew('V', mce, ty, binop(opXOR, min_xx, MSBs)); + max_yy = assignNew('V', mce, ty, binop(opXOR, max_yy, MSBs)); + min_yy = assignNew('V', mce, ty, binop(opXOR, min_yy, MSBs)); + } + IRAtom *min_xx_gt_max_yy = assignNew('V', mce, ty, binop(opGT, min_xx, max_yy)); + IRAtom *max_xx_gt_min_yy = assignNew('V', mce, ty, binop(opGT, max_xx, min_yy)); + // For each vector, if the value in the first operand is greater than the one + // in in the second operand, all bits are set to one. Otherwise, zero. + // + // If former is 1s then xx is definitely greater than yy. That's a defined + // value. + // + // If the latter is true then there could be a value of xx greater than yy, + // so it's undefined. And the inverse of that is that there cannot be a + // value of xx greater than yy, so the result is definitely false. That's a + // defined value, too. + // + // So the result is defined if: + // + // min_xx_gt_max_yy | ~max_xx_gt_min_yy + // + // Because defined in vbits is 0s and not1s, we need to invert that: + // + // ~(min_xx_gt_max_yy | ~max_xx_gt_min_yy) + // + // We can use DeMorgan's Law to simplify the above: + // + // ~min_xx_gt_max_yy & max_xx_gt_min_yy + IRAtom *not_min_xx_gt_max_yy = assignNew('V', mce, ty, unop(opNOT, min_xx_gt_max_yy)); + return assignNew('V', mce, ty, binop(opAND, not_min_xx_gt_max_yy, max_xx_gt_min_yy)); +} /* --------- Semi-accurate interpretation of CmpORD. --------- */ @@ -3947,9 +4056,13 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, case Iop_PwExtUSMulQAdd8x16: return binary16Ix8(mce, vatom1, vatom2); - case Iop_Sub32x4: case Iop_CmpGT32Sx4: + return expensiveCmpGT(mce, 32, True /* signed */, + 4, vatom1, vatom2, atom1, atom2); case Iop_CmpGT32Ux4: + return expensiveCmpGT(mce, 32, False /* unsigned */, + 4, vatom1, vatom2, atom1, atom2); + case Iop_Sub32x4: case Iop_CmpEQ32x4: case Iop_QAdd32Sx4: case Iop_QAdd32Ux4: diff --git a/memcheck/tests/x86/Makefile.am b/memcheck/tests/x86/Makefile.am index 557de6b11..f5c04ee90 100644 --- a/memcheck/tests/x86/Makefile.am +++ b/memcheck/tests/x86/Makefile.am @@ -13,6 +13,7 @@ EXTRA_DIST = \ $(addsuffix .stderr.exp,$(INSN_TESTS)) \ $(addsuffix .stdout.exp,$(INSN_TESTS)) \ $(addsuffix .vgtest,$(INSN_TESTS)) \ + pcmpgtd.stderr.exp pcmpgtd.vgtest \ pushfpopf.stderr.exp pushfpopf.stdout.exp pushfpopf.vgtest \ pushfw_x86.vgtest pushfw_x86.stdout.exp pushfw_x86.stderr.exp \ pushpopmem.stderr.exp pushpopmem.stdout.exp pushpopmem.vgtest \ @@ -37,6 +38,7 @@ check_PROGRAMS = \ fprem \ fxsave \ more_x86_fp \ + pcmpgtd \ pushfpopf \ pushfw_x86 \ pushpopmem \ @@ -52,6 +54,11 @@ AM_CCASFLAGS += @FLAG_M32@ # fpeflags must use these flags -- bug only occurred with them. fpeflags_CFLAGS = $(AM_CFLAGS) -march=i686 + +# pcmpgtd failure only occurs with clang version >= 10 and -O2 +pcmpgtd_SOURCES = pcmpgtd.c +pcmpgtd_CFLAGS = -O2 + pushfpopf_SOURCES = pushfpopf_c.c pushfpopf_s.S if VGCONF_OS_IS_DARWIN pushpopmem_CFLAGS = $(AM_CFLAGS) -mdynamic-no-pic diff --git a/memcheck/tests/x86/pcmpgtd b/memcheck/tests/x86/pcmpgtd new file mode 100755 index 0000000000000000000000000000000000000000..87c0185f0909c5a1c8e6db6ece1dff78dae491a1 GIT binary patch literal 19688 zcmeHPe{dYteSdp<ds<8K-AVEf*(UhF25eKGBwH9^8~aX{>=g(j`~xt?UQYKzcldtg zZqHaGLrrDVTpLr9W~OdB0j8Z{+?oE!KdA`~VAn1-B%PYz2?RpN5JOE8u+sub3E}$r zzJ2d>x0WZ&q|@n4?`igZe}CWiefRC|d%O4a`-8*lU5dhFaj`Xws1bjNv+SC&MFX4^ zygt^#LTmxMnt8x+;|OsHK^v1W>9Pe9>OxNiB)h{xe%KWPMRbS&yMot~P)10Q>?TV{ zpf-F`2qk2GTTFHup@L?je-ge)0ij4J^{VX!xR|j($VLRbPXs1mtsUit1#NY<CS)T5 z_KALn1eA6}lAT}J`GvlOXGMDw%6w9{={O<$op6Of5gj7H26#OQNtepVJ64Qve}E{0 zp*pylQ;bO{?e2gb@1KzWHXzf!NwjylyfB#$0Y!INS2{JiqPHs@TbfQ~OM8~?>07aM zMQ^Z}3ohgRCjX?nVdGXF3Y|{riejFDgLovr<J~VlkUsfr)9fdwC$IbS_QziD82%c` zXnc?^;j&$zyhvZfmxmC2F3=7wI-I2ijH&onT!z0A`~Z$i3i1=BbgW=StB{UoSur&h ziCU>#mYF6A&7u`4SY{@Y%CZf^L;dT_Wx-{^8z3Fa<g!9)l3y49vs+NmTk5Yof2Fyp zX`~^C^~JRzu1I5n@+Ql#YWQ-@kgPKl;wnw3$*(Md8otc6lr>-@1C0zcGSJ9CBLj^L zG&0c0KqCW<4E)bzz?k^CZv3(KeiviL{-c(wa@v?Up&ze0uiWr9M3rmKq4v!W0V6s| z^7qbGP_B8MXqw8ucN%2R`$OfIcN*oljfry?N45-|JbJn##Eg^2@*JEDwZX^C-LU!4 zH-CV!<B6be{yyFur=RY_LcqA0YOCXE;!{}Do+n1-Y~tYYM6E%HJ;(JQ{(ysHpQ%Rq zqH*l}T0?okc=<D{t?HpkcyQY9y}1Xmu+r?UM){rd51|JyX!F5Qb{rS+)wlPvN9*aW zn~jHmewe~Muw|kDc^|NzkI(<`vr6UJR^SuAQhfV9Ms|AMs8jeh6u9hl$c(2Im+IC$ zO}bHbFHD>qSg229M)}{*KLM>51{Nxm1K+;CCDrnAgWK&V;=oAdJAmvTsWK)$QGJix z%avX>Cf@qgI57UHQU1(0_JPZoctI(DH1UpN?63ItJx|)@3&!GKK1YTL!<T&_;k)o6 zonPOPD|!>Y>?;Z1eLqO}BCq?7+}rD_VSstzT*8<5Ny1n7NoT@0`s0M}%xhpC1@oW4 ze4Ci3!F-ua-7q~4Uyw$Q62k+=_rQ1&3>@6oS(u$AvlB3b_Jud#>qlU|MNDX)c?liR zjfs;k-~R15ML+Y_2X5?t-@4Uz<jXwLS4HR)DID<@-j;!#A!oiLz0}7aPAVo|mkQ|k z8Fb^!v4k&wY9o+SNY6vxGj4jxnrTeDs2INIX0#g0E5^hz#klEI>4Nc{fBC*$jPh&7 zFW)kb{Zv(caKQbw#mCEUmd_gxpP<pPeFyqQ<2`)WQ2EvHokQi1!&}4U--NdeAGl`E z62`U~2bL1tJiPcFx+c#@Kdn@ZW4}?Yc|C9V=Ck4QC&T5B2g(=1l^H)aCXS==H%mXI zYjyjM@V((3;hkX<BbL#X%dhFWs~F5iX=I?0fkp-z8E9mnk%2}A8X0J0ppk+9_Zd*| zEl;jK@nMfC^VC)Jt(Sj;zF4Uo0v-aq8!!(@OZDzwR4PXS@B7b6<s#rwz&=_@qR@9% zD$0YKn6k&O%xi7Z4=PQ5`hG~?G|yhBRIVoll&N*?Z;hmT4!XiETL`)vM;<(a%<Et8 z?Yza;@_@dVt)07S$?}C)6Z`jYP^?MBy2J$}?DdDeog2JIl?n9`mwKhk+sRqt!#S2@ z>?@@IAC<}uK-TZ||A}kR+xe(E=nWij4|*4URWrQZk9Z7k-$c^}Z|Ht+U)b9n_Acu8 z2Kv37(C_!^{QKx>nD2!j88f+Sltu;`8E9mnk%2}A8X0J0ppk(_1{xV?WZ?fw2D<S* zmKOo#+C@V8h{=oiz4UZJp26o_Dsy;^YE6h^^8_uHS8T4Ng`C!PRHWQ(lT24b9Q)|A zN{*0jLG#~igFYo_`J3YrK{s<N#)br>-_ug*v;mtbpaj+5sN#Dv6<Q)v;cH4jxhAFG zr&5`11D1D(I41caLCbzfds;(Nk?rFPF7Ho3=;P}prxja`VVTP5b|Dw+|1}_gSMF?Z zg*e~q1$4?ctXsD-uxKDYnu=rteZl3y?xiah+X#!MBiXUQ?s%byUmpj0x`X&z+WSyu zPtQ%)_XOBRoOAoM>pTmsY`*Zm7?6G-K!q0Xu0H~zTpUEL@@kl{$LSopgOF=(?GE6c zJ5e=}?jhGaeH-BYI9%7VX}W8^o&o+hL|@JHKLn&Nq^^Y+f$i{+f-hH4*KY<+Ex3BV zqOS*ji1@t+S@RJv_ux<r?YjUiJPxLvs^aR8fT{x&llE_@n%_gE!-e|g*1hUN{p%ok z?AJ2=n-X8-)}NC25?%j0iFdc^-v+)Ic~QUQpO#mb>-4eyJh{7p>93I<$yT#gUG3AU zy<d}TCDY#r?m-mlDmT+V1(7Ai0oLkQ*Xk;89z>Yw%@Xf->ob9OlF>Sy>1#mz6%KWn zj5bVqAFA<wOO4U3FzScFYh-jMYYnKkPa54(W3<Vw-wGWb!De0GN;tK;#jk%+;#)hJ z{ve2Js6Q!kwoTs$nlh_yXZlxx-$nQiU4I<-ZKRj<PxGrg^`}7ZAUaNwNAzz=yC~C- zNIZsU>1P8w$;lWJr2i|ZuajAx%+fmbV}ztxrk@4Q<ITbBT@cTb;y5W<`UjwST)Ub6 zUsCY_U3WpJjU4Qu+vaq#_kIZ870h>n8f<DN);l=dhnh+8LqZR_sn1RB<^|v^hdb>` ze<y&hb?WnlcWo}yX;e_F8t$=e%@pVuIPPz_cLHd-`w#&a_j`f_i<>RzEQ4>4-n!SL z>2rbZ2g`$-*!0JM@ZkK+9iaAg?VDh=YjiPn)Lw<kU7NU^VxQr9%zqbA&7=m3Z6~Td zhzpwEYH6QOQ%9J{d!VtK$eHUw&U#8**nff8W-fp%L$X;UW3!i_x<@E*;!A>H(^o=) z2inZsQJ8oX55?Y0RSvWsWJ;TIjkiNnz5dzrylqOmhc0SG^{#AL)pFgOp^nva*5Wy( zYHX+jd0EI*@0yktJSj9E=XkE=ps52UEiGgOuhXG5)Imfa$Z4|`DQTVsZQD2Q2Q!uH z;JUS=DxR(=)Qa~iXx#`)A2rv~vIeMsc1;6qK6JT#CL{BXAbQa533dKi{~X)%PzOak zqnEilB?DOlIeU_v)xxciz49L@tf!+5iDl2znP(bmMNxk3olb!G#ONv&3tg#fG+m0t zSuN3p-8ANkUa_@iEL-XtO<Bdyq0(iI=i@>VdrZ30>D`&7J>9ffr;9%Yy6n4uwN=`c zl8vN;NeZaCLj@#GB*;`YkuxojIJexjEip~Vc1O}u#OY$(l5{?r$&XpFU=$t2_Lo>J zo`4|@EO!%|=8}VXuGZB2nnx`?qb*Qd?$(-CtEGfT^;+5j&oe|75}NK&+aK2guWJ5n zns$%Y`k1Dz(tNI+9a^i$wX9w9-J-SXHgGMMq?*!KYd&?h9@JW0*9z`4n&uhusQ2p$ z&F4{P!_ITJcC}}fHrI2Drma@*4-&ugaSeCvBwQ*UbxzRLiu3hb9-ot)?Mmh{@ve9= zmyC4f3#r`^E8c}HjTKVaSXU;Vi6-OGU0qhZXcg%UW2a9w<7P6FjiuuSR*Y0pE*5;# zB1u&p=Z1)Ej*xhvU?b5S#*t~}Be8A<aUvB@$BK+-aKoE6g}0fThwd3Pw`?03G)-pi zN~P1}AfF-y)YMAKEO8PwNYhGXD5?~Y7=aw75rq=%W}*PlJ76>tQdzPkEN>O;o)G~L z1sqU~Cez#jc0F08BH5S7H%sR*Hz8}&jHYwZU3LsbE+e}{q?G1iMPe}?2thMl9A(91 zj(m^iN?GbA=Z|L5m2~a_#B7%Gc>?)e@o_jWp^fFsC?Gh^=j@)3BI^`N0#;xb$KfX$ zNvF9pu*Qo-!FM5=gr2w_=u~7P#a-}93E({_Wg*VSW1=hMjIIY#O{61Z=y1d=;LPU= zh>7p{!e%g*k62c`kY&+iq`>mDODu(E9*n~<nP(BYd{Df&7^x_1#6~9Tgg`ISJq5+Z zx}W$+;7ZkOlP#RAKcCs=NrTUYsfh5fD|Csjim*;h$yeFADfyT(sZ;sUs_T&2MpS$q zlk1Oqep7XwQqR}fRP%e4O=EIhP_N(2<ocqX-%?$N)blajm|~u;vgs@^CEr`!-&^m; z$K<-Mp6_SXeXlhFm9<s(i`KAl^KNG&flw`r<0`|0QLF=Ng(_BE`jq^cY^wE%%4RXS z@37tvR%@Y9EsNtSR%45%<j-OByJ2ThtE<*}iOS^a*2%x3o*^ieec>|vD=)*B3!zCd zU0ZJUi^?R&#%O}+VqSL8aelpmf7HRhO7K}*2-sw~5vYPyyv&2Rmf3pFzx2G&YKq#e zzBdrA#<~8b=l2l!Q^kL|e*Go))6VX_Dg>;)Oy%cWT&3fZ@pBA(1yfSz_<2d_JICRh zoPX(fzR3MtI<7qEKs)k4ziy`@^Ogb<m}0!m;`&^*u38GdANn%?Qs^gr*QM8E?`8ZL zm*LYAf&4qizXg6kHdX^Cz;APPvTnz9agg}*l{*mP+BCVM@&x#l2l{zG75)t!@Vhwt z7*`>OpT8%4mzV7o;}aw=8R19%-+}7||0MY2$C;m3gua~H(dz;#w1%Vjo%(-G`j}oZ z$92J?1*>S4u&W@-V(~(JELFr4(zG%(JIG>gYnri~IhM|iVs;V3<Fsf-N_$u|m%-!H zipPRCE?-7_sHade6RB(pkMcrf+>B?f!Z=G5BAK`uD`he?L#UxJok>}Bs*|%k(_FtP zeA}QoxN(3cQT3{rM+|J+7`|<2UA=_w^#%*u-OWKmcrXSwF>}N4?fv0l^Y-=YHxF(x zw}ktL2gxhnAs#K3c+fQUu=kFKLUmik<FSYp5gW`MBIh1+r<g45P3CB=ZpS&2SImi7 zC^Ls{hgK|=HA}@fO%tnuQHq!uOJiDtc^l@s<jqP&F_ENM=xDJh-1FIvX>Pe~oyZvr z7RNJIWE9XU*f1$E%+lh8JPT%XRy-K)A6jZf#sr0RP;j)A!oM8AR5VCbGEz*kU~D`K ze>TM2lS{<Qt~$bm46#PYKwx=HErYxxK`XuoI1LfVay+lWcv1|CB&MEK%C@jahixHo zIF4jeQM8(aFH}L^fb9$kVL^=348|UUjgOYbFm@6-iAP393-R5O!hcnWOB}h8C^Btd zSHEjbPflEdcy`!D&fDbw|8PD*Nw>Bv0736wrM;Zr$^Ul|s8t3o?@yB51Ekho&I8X0 zhYO{mfKGb@d<}<eU(OeG)izip?C|P=7hOQ<X8`oac-JfKyVVfa=oa>}|4w`1hHV6b zR|wVi<@}Rg15u%OlvJpXlsnmeCoxgzbq%S?_D_lSPYL@WQmQG={O^Q}-d#(3IiGD8 z9c&i1GJdHi;V#JOUAUy>yf+~11H#@JzhJWmfsm@Sm-FM0u<sU%vVCX&_X+!cp(o$x z=clm8)BckDJqjM>MEb8jNAO6bLN>KUxUI6vz-sLS;yDx$_A-8HAlUT8<%M1&*V@bH z<cLssSe!?pUbU0-HyrkIo__{YHY(F>P_3QuKM4u7FY_<w0W1I}3e7ufi_`uvgthjQ z|7XUwT3u@l+-d(04tu%27zx;hG+(MM^88EQQ6Lm!HBD<mpiqJ~aOz81!tX<k-q=Zd zxn9BBM_#0!O-+s;$){KMG<KxDoR`Y~^K(|jPvf78w3FxmE$~Q1+ROFN#V*pOIZ}O* z_7c7eajkuIoz!jHEvh%KwlD23IPB$oa*?pVT>H}PM?lDzj9<>r`h>k~N9v1oNRoX5 zjCex5%9r-5svH}U_7c(;5Q*2S7KOu2jsfS?^<BpPpL=a@Ee-<*$-ZzI`_nhr_QxFt z4w8QoBA&DVe@Y8Dl=13?Y17hPLRvRS+}5z`)|EDVtd<GfsY|q@32yPEu)j~-7_S1C op0)CP%I6Q=U*x&xhO!FV$T>%Y4iamRsqN2+8~R#@frDiK4ffTmCjbBd literal 0 HcmV?d00001 diff --git a/memcheck/tests/x86/pcmpgtd.c b/memcheck/tests/x86/pcmpgtd.c new file mode 100644 index 000000000..f9580dd6b --- /dev/null +++ b/memcheck/tests/x86/pcmpgtd.c @@ -0,0 +1,25 @@ +#include <signal.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + + +int main() +{ + struct sigaction act; + if (sigaction(SIGTERM, 0, &act) == 1) { + return 12; + } + if (sigaction(SIGTERM, 0, &act) == 1) { + return 12; + } + + char pattern[] = "\x1\x2\x3\x4\x5\x6\x7\x8\x9"; + const unsigned long plen = strlen(pattern); + pattern[1] = 0; + size_t hp=0; + for (size_t i = 0; i < plen; ++i) + hp += pattern[i]; + return hp % 10; +} diff --git a/memcheck/tests/x86/pcmpgtd.stderr.exp b/memcheck/tests/x86/pcmpgtd.stderr.exp new file mode 100644 index 000000000..eb42921c6 --- /dev/null +++ b/memcheck/tests/x86/pcmpgtd.stderr.exp @@ -0,0 +1,10 @@ + + +HEAP SUMMARY: + in use at exit: 0 bytes in 0 blocks + total heap usage: 0 allocs, 0 frees, 0 bytes allocated + +For a detailed leak analysis, rerun with: --leak-check=full + +For lists of detected and suppressed errors, rerun with: -s +ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0) diff --git a/memcheck/tests/x86/pcmpgtd.vgtest b/memcheck/tests/x86/pcmpgtd.vgtest new file mode 100644 index 000000000..3b8416530 --- /dev/null +++ b/memcheck/tests/x86/pcmpgtd.vgtest @@ -0,0 +1,2 @@ +prog: pcmpgtd +prereq: test -e pcmpgtd -- 2.20.1 |
|
From: Eyal S. <eya...@gm...> - 2021-03-04 22:08:20
|
From: eyal0 <109...@us...> This fixes https://bugs.kde.org/show_bug.cgi?id=432801 To test: ```sh make && perl tests/vg_regtest memcheck/tests/x86/pcmpgtd ``` --- .gitignore | 1 + memcheck/mc_translate.c | 101 +++++++++++++++++- memcheck/tests/amd64/Makefile.am | 6 +- memcheck/tests/amd64/pcmpgtd.c | 134 ++++++++++++++++++++++++ memcheck/tests/amd64/pcmpgtd.stderr.exp | 44 ++++++++ memcheck/tests/amd64/pcmpgtd.vgtest | 2 + 6 files changed, 285 insertions(+), 3 deletions(-) create mode 100644 memcheck/tests/amd64/pcmpgtd.c create mode 100644 memcheck/tests/amd64/pcmpgtd.stderr.exp create mode 100644 memcheck/tests/amd64/pcmpgtd.vgtest diff --git a/.gitignore b/.gitignore index b9fca3de3..fd1cf9ae5 100644 --- a/.gitignore +++ b/.gitignore @@ -974,6 +974,7 @@ /memcheck/tests/amd64/insn-bsfl /memcheck/tests/amd64/insn-pmovmskb /memcheck/tests/amd64/insn-pcmpistri +/memcheck/tests/amd64/pcmpgtd /memcheck/tests/amd64/sh-mem-vec128 /memcheck/tests/amd64/sh-mem-vec256 /memcheck/tests/amd64/xsave-avx diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c index 516988bdd..51e94427e 100644 --- a/memcheck/mc_translate.c +++ b/memcheck/mc_translate.c @@ -1287,6 +1287,101 @@ static IRAtom* expensiveCmpEQorNE ( MCEnv* mce, return final_cast; } +/* Check if we can know, despite the uncertain bits, that xx is greater than yy. + Notice that it's xx > yy and not the other way around. This is Intel syntax + with destination first. It will appear reversed in gdb disassembly (AT&T + syntax). + */ +static IRAtom* expensiveCmpGT ( MCEnv* mce, + unsigned int word_size, + Bool is_signed, + unsigned int count, + IRAtom* vxx, IRAtom* vyy, + IRAtom* xx, IRAtom* yy ) +{ + IROp opAND, opOR, opXOR, opNOT, opEQ, opSHL, opGT; + IRType ty; + + tl_assert(isShadowAtom(mce,vxx)); + tl_assert(isShadowAtom(mce,vyy)); + tl_assert(isOriginalAtom(mce,xx)); + tl_assert(isOriginalAtom(mce,yy)); + tl_assert(sameKindedAtoms(vxx,xx)); + tl_assert(sameKindedAtoms(vyy,yy)); + + switch (word_size * count) { + case 128: + ty = Ity_V128; + opAND = Iop_AndV128; + opOR = Iop_OrV128; + opXOR = Iop_XorV128; + opNOT = Iop_NotV128; + break; + default: + VG_(tool_panic)("expensiveCmpGT"); + } + if (word_size == 32 && count == 4) { + opEQ = Iop_CmpEQ32x4; + opSHL = Iop_ShlN32x4; + if (is_signed) { + opGT = Iop_CmpGT32Sx4; + } else { + opGT = Iop_CmpGT32Ux4; + } + } else { + VG_(tool_panic)("expensiveCmpGT"); + } + IRAtom *MSBs; + if (is_signed) { + // For unsigned it's easy to make the min and max: Just set the unknown + // bits all to 0s or 1s. For signed it's harder because having a 1 in the + // MSB makes a number smaller, not larger! We can work around this by + // flipping the MSB before and after computing the min and max values. + IRAtom *const0 = mkV128(0); + IRAtom *all_ones = assignNew('V', mce, ty, binop(opEQ, const0, const0)); + MSBs = assignNew('V', mce, ty, binop(opSHL, all_ones, mkU8(31))); + xx = assignNew('V', mce, ty, binop(opXOR, xx, MSBs)); + yy = assignNew('V', mce, ty, binop(opXOR, yy, MSBs)); + // From here on out, we're dealing with MSB-flipped integers. + } + // We can combine xx and vxx to create two values: the largest that xx could + // possibly be and the smallest that xx could possibly be. Likewise, we can + // do the same for yy. We'll call those max_xx and min_xx and max_yy and + // min_yy. + IRAtom *not_vxx = assignNew('V', mce, ty, unop(opNOT, vxx)); + IRAtom *not_vyy = assignNew('V', mce, ty, unop(opNOT, vyy)); + IRAtom *max_xx = assignNew('V', mce, ty, binop(opOR, xx, vxx)); + IRAtom *min_xx = assignNew('V', mce, ty, binop(opAND, xx, not_vxx)); + IRAtom *max_yy = assignNew('V', mce, ty, binop(opOR, yy, vyy)); + IRAtom *min_yy = assignNew('V', mce, ty, binop(opAND, yy, not_vyy)); + if (is_signed) { + // Unflip the MSBs. + max_xx = assignNew('V', mce, ty, binop(opXOR, max_xx, MSBs)); + min_xx = assignNew('V', mce, ty, binop(opXOR, min_xx, MSBs)); + max_yy = assignNew('V', mce, ty, binop(opXOR, max_yy, MSBs)); + min_yy = assignNew('V', mce, ty, binop(opXOR, min_yy, MSBs)); + } + IRAtom *min_xx_gt_max_yy = assignNew('V', mce, ty, binop(opGT, min_xx, max_yy)); + IRAtom *max_xx_gt_min_yy = assignNew('V', mce, ty, binop(opGT, max_xx, min_yy)); + // If min_xx is greater than max_yy then xx is surely greater than yy so we know + // our answer for sure. If max_xx is not greater than min_yy then xx can't + // possible be greater than yy so again we know the answer for sure. For all + // other cases, we can't know. + // + // So the result is defined if: + // + // min_xx_gt_max_yy | ~max_xx_gt_min_yy + // + // Because defined in vbits is 0s and not 1s, we need to invert that: + // + // ~(min_xx_gt_max_yy | ~max_xx_gt_min_yy) + // + // We can use DeMorgan's Law to simplify the above: + // + // ~min_xx_gt_max_yy & max_xx_gt_min_yy + IRAtom *not_min_xx_gt_max_yy = assignNew('V', mce, ty, unop(opNOT, min_xx_gt_max_yy)); + return assignNew('V', mce, ty, binop(opAND, not_min_xx_gt_max_yy, max_xx_gt_min_yy)); +} /* --------- Semi-accurate interpretation of CmpORD. --------- */ @@ -3947,9 +4042,13 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, case Iop_PwExtUSMulQAdd8x16: return binary16Ix8(mce, vatom1, vatom2); - case Iop_Sub32x4: case Iop_CmpGT32Sx4: + return expensiveCmpGT(mce, 32, True /* signed */, + 4, vatom1, vatom2, atom1, atom2); case Iop_CmpGT32Ux4: + return expensiveCmpGT(mce, 32, False /* unsigned */, + 4, vatom1, vatom2, atom1, atom2); + case Iop_Sub32x4: case Iop_CmpEQ32x4: case Iop_QAdd32Sx4: case Iop_QAdd32Ux4: diff --git a/memcheck/tests/amd64/Makefile.am b/memcheck/tests/amd64/Makefile.am index da15cf797..e81ea74da 100644 --- a/memcheck/tests/amd64/Makefile.am +++ b/memcheck/tests/amd64/Makefile.am @@ -18,6 +18,7 @@ EXTRA_DIST = \ insn-pcmpistri.vgtest insn-pcmpistri.stdout.exp insn-pcmpistri.stderr.exp \ insn-pmovmskb.vgtest insn-pmovmskb.stdout.exp insn-pmovmskb.stderr.exp \ more_x87_fp.stderr.exp more_x87_fp.stdout.exp more_x87_fp.vgtest \ + pcmpgtd.stderr.exp pcmpgtd.vgtest \ sh-mem-vec128-plo-no.vgtest \ sh-mem-vec128-plo-no.stderr.exp \ sh-mem-vec128-plo-no.stdout.exp \ @@ -43,6 +44,7 @@ check_PROGRAMS = \ fxsave-amd64 \ insn-bsfl \ insn-pmovmskb \ + pcmpgtd \ sh-mem-vec128 \ sse_memory \ xor-undef-amd64 @@ -55,8 +57,8 @@ endif # clang 3.5.0 barfs about -mfancy-math-387 if !COMPILER_IS_CLANG check_PROGRAMS += \ - more_x87_fp \ - shr_edx + more_x87_fp \ + shr_edx endif AM_CFLAGS += @FLAG_M64@ diff --git a/memcheck/tests/amd64/pcmpgtd.c b/memcheck/tests/amd64/pcmpgtd.c new file mode 100644 index 000000000..891ebad35 --- /dev/null +++ b/memcheck/tests/amd64/pcmpgtd.c @@ -0,0 +1,134 @@ +/* https://bugs.kde.org/show_bug.cgi?id=432801 */ + +#include <signal.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <ctype.h> + +#include "../../memcheck.h" + +// This function fails when compiled on clang version 10 or greater with -O2. +// It's unused by the test but left here as a copy of the error in the bug +// report https://bugs.kde.org/show_bug.cgi?id=432801 +void standalone() { + struct sigaction act; + if (sigaction(SIGTERM, 0, &act) == 1) { + return; + } + if (sigaction(SIGTERM, 0, &act) == 1) { + return; + } + + char pattern[] = "\x1\x2\x3\x4\x5\x6\x7\x8\x9"; + const unsigned long plen = strlen(pattern); + pattern[1] = 0; + size_t hp=0; + for (size_t i = 0; i < plen; ++i) + hp += pattern[i]; + volatile int j = 0; + if (j == hp % 10) { + j++; + } + printf("%ld\n", hp); +} + +typedef unsigned long ULong; + +typedef struct { + ULong w64[2]; /* Note: little-endian */ +} V128; + +static int cmpGT32Sx4(V128 x, V128 y) +{ + int result; + __asm__("movups %1,%%xmm6\n" + "\tmovups %2,%%xmm7\n" + // order swapped for AT&T style which has destination second. + "\tpcmpgtd %%xmm7,%%xmm6\n" + "\tmovd %%xmm6, %0" + : "=r" (result) : "m" (x), "m" (y) : "xmm6"); + return result; +} + +/* Set the V bits on the data at "addr". Note the convention: A zero + bit means "defined"; 1 means "undefined". */ +static void set_vbits(V128 *addr, V128 vbits) +{ + int i; + for (i=0 ; i<2 ; ++i) { + (void)VALGRIND_SET_VBITS(&addr->w64[i], &vbits.w64[i], sizeof(vbits.w64[i])); + } +} + +/* Use a value that we know is invalid. */ +static void use(char *x, char* y, int invalid) +{ + /* Convince GCC it does not know what is in "invalid" so it cannot + possibly optimize away the conditional branch below. */ + __asm__ ("" : "=r" (invalid) : "0" (invalid)); + + /* Create a conditional branch on which our output depends, so that + memcheck cannot possibly optimize it away, either. */ + if (invalid) { + fprintf(stderr, "%s > %s == true\n", x, y); + } else { + fprintf(stderr, "%s > %s == false\n", x, y); + } +} + +// Convert a string like "123XXX45" to a value and vbits. +V128 string_to_v128(char *s) { + ULong x = 0; + ULong vx = 0; + + for (; *s; s++) { + int lowered_c = tolower(*s); + x <<= 4; + vx <<= 4; + if (lowered_c == 'x') { + vx |= 0xf; + } else if (isdigit(lowered_c)) { + x |= lowered_c - '0'; + } else if (lowered_c >= 'a' && lowered_c <= 'f') { + x |= lowered_c - 'a' + 0xa; + } else { + fprintf(stderr, "Not a hex digit: %c\n", *s); + exit(1); + } + } + + V128 vx128 = { { vx, 0 } }; + V128 x128 = { { x, 0 } }; + set_vbits(&x128, vx128); + return x128; +} + +static void doit(char *x, char *y) { + int result = cmpGT32Sx4(string_to_v128(x), string_to_v128(y)); + use(x, y, result); +} + +int main() { + // completely undefined + doit("xxxxxxxx", "xxxxxxxx"); + + // completely defined + doit("00000000", "00000000"); + doit("00000000", "f0000000"); + doit("f0000000", "00000000"); + + doit("00000000", "fxxxxxxx"); // defined: 0 > all negatives + doit("0xxxxxxx", "fxxxxxxx"); // defined: non-negatives > all negatives + doit("xxxxxxx0", "f0000000"); // undefined + doit("xxxxxxx1", "80000000"); // defined: ends with 1 > MIN_INT + doit("5xxxxxxx", "6xxxxxxx"); // defined + doit("8xxxxxxx", "9xxxxxxx"); // defined + + doit("1234567x", "12345678"); // undefined + doit("1234567x", "1234567f"); // defined: x can't be more than f + doit("1234567x", "1234567e"); // undefined: x can be more than e + + return 0; +} diff --git a/memcheck/tests/amd64/pcmpgtd.stderr.exp b/memcheck/tests/amd64/pcmpgtd.stderr.exp new file mode 100644 index 000000000..bb248c60c --- /dev/null +++ b/memcheck/tests/amd64/pcmpgtd.stderr.exp @@ -0,0 +1,44 @@ + +Conditional jump or move depends on uninitialised value(s) + at 0x........: use (pcmpgtd.c:74) + by 0x........: doit (pcmpgtd.c:110) + by 0x........: main (pcmpgtd.c:115) + +xxxxxxxx > xxxxxxxx == false +00000000 > 00000000 == false +00000000 > f0000000 == true +f0000000 > 00000000 == false +00000000 > fxxxxxxx == true +0xxxxxxx > fxxxxxxx == true +Conditional jump or move depends on uninitialised value(s) + at 0x........: use (pcmpgtd.c:74) + by 0x........: doit (pcmpgtd.c:110) + by 0x........: main (pcmpgtd.c:124) + +xxxxxxx0 > f0000000 == true +xxxxxxx1 > 80000000 == true +5xxxxxxx > 6xxxxxxx == false +8xxxxxxx > 9xxxxxxx == false +Conditional jump or move depends on uninitialised value(s) + at 0x........: use (pcmpgtd.c:74) + by 0x........: doit (pcmpgtd.c:110) + by 0x........: main (pcmpgtd.c:129) + +1234567x > 12345678 == false +1234567x > 1234567f == false +Conditional jump or move depends on uninitialised value(s) + at 0x........: use (pcmpgtd.c:74) + by 0x........: doit (pcmpgtd.c:110) + by 0x........: main (pcmpgtd.c:131) + +1234567x > 1234567e == false + +HEAP SUMMARY: + in use at exit: 0 bytes in 0 blocks + total heap usage: 0 allocs, 0 frees, 0 bytes allocated + +For a detailed leak analysis, rerun with: --leak-check=full + +Use --track-origins=yes to see where uninitialised values come from +For lists of detected and suppressed errors, rerun with: -s +ERROR SUMMARY: 4 errors from 4 contexts (suppressed: 0 from 0) diff --git a/memcheck/tests/amd64/pcmpgtd.vgtest b/memcheck/tests/amd64/pcmpgtd.vgtest new file mode 100644 index 000000000..08fabeb76 --- /dev/null +++ b/memcheck/tests/amd64/pcmpgtd.vgtest @@ -0,0 +1,2 @@ +prog: pcmpgtd -q +prereq: test -e pcmpgtd -- 2.20.1 |