You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
|
|
|
|
|
1
|
|
2
|
3
(4) |
4
(5) |
5
(5) |
6
(3) |
7
|
8
|
|
9
|
10
(8) |
11
(13) |
12
(12) |
13
(1) |
14
(1) |
15
(5) |
|
16
|
17
(12) |
18
(7) |
19
(5) |
20
|
21
(11) |
22
(8) |
|
23
(8) |
24
(6) |
25
|
26
(2) |
27
(3) |
28
(9) |
29
|
|
30
|
31
(5) |
|
|
|
|
|
|
From: <sv...@va...> - 2011-01-17 23:10:48
|
Author: sewardj
Date: 2011-01-17 23:10:39 +0000 (Mon, 17 Jan 2011)
New Revision: 11503
Log:
Add test cases for PCMPxSTRx $0x00.
Modified:
trunk/none/tests/amd64/pcmpstr64.c
trunk/none/tests/amd64/pcmpstr64.stdout.exp
Modified: trunk/none/tests/amd64/pcmpstr64.c
===================================================================
--- trunk/none/tests/amd64/pcmpstr64.c 2011-01-17 12:34:33 UTC (rev 11502)
+++ trunk/none/tests/amd64/pcmpstr64.c 2011-01-17 23:10:39 UTC (rev 11503)
@@ -202,6 +202,7 @@
even if they would probably work. Life is too short to have
unvalidated cases in the code base. */
switch (imm8) {
+ case 0x00:
case 0x02: case 0x08: case 0x0C: case 0x12: case 0x1A:
case 0x3A: case 0x44: case 0x4A:
break;
@@ -1072,9 +1073,89 @@
}
+//////////////////////////////////////////////////////////
+// //
+// ISTRI_00 //
+// //
+//////////////////////////////////////////////////////////
+UInt h_pcmpistri_00 ( V128* argL, V128* argR )
+{
+ V128 block[2];
+ memcpy(&block[0], argL, sizeof(V128));
+ memcpy(&block[1], argR, sizeof(V128));
+ ULong res, flags;
+ __asm__ __volatile__(
+ "subq $1024, %%rsp" "\n\t"
+ "movdqu 0(%2), %%xmm2" "\n\t"
+ "movdqu 16(%2), %%xmm11" "\n\t"
+ "pcmpistri $0x00, %%xmm2, %%xmm11" "\n\t"
+//"pcmpistrm $0x00, %%xmm2, %%xmm11" "\n\t"
+//"movd %%xmm0, %%ecx" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%rdx" "\n\t"
+ "movq %%rcx, %0" "\n\t"
+ "movq %%rdx, %1" "\n\t"
+ "addq $1024, %%rsp" "\n\t"
+ : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
+ : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
+ );
+ return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
+}
+UInt s_pcmpistri_00 ( V128* argLU, V128* argRU )
+{
+ V128 resV;
+ UInt resOSZACP, resECX;
+ Bool ok
+ = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
+ zmask_from_V128(argLU),
+ zmask_from_V128(argRU),
+ 0x00, False/*!isSTRM*/
+ );
+ assert(ok);
+ resECX = resV.uInt[0];
+ return (resOSZACP << 16) | resECX;
+}
+void istri_00 ( void )
+{
+ char* wot = "00";
+ UInt(*h)(V128*,V128*) = h_pcmpistri_00;
+ UInt(*s)(V128*,V128*) = s_pcmpistri_00;
+
+ try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
+ try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
+ try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
+ try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
+
+ try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
+ try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
+ try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
+ try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
+ try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
+
+ try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
+ try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
+ try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
+ try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
+
+ try_istri(wot,h,s, "0000000000000000", "0000000000000000");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+
+ try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
+ try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
+ try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
+ try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
+
+ try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
+
+ try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
+ try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
+}
+
+
+
//////////////////////////////////////////////////////////
// //
// main //
@@ -1091,5 +1172,6 @@
istri_0C();
istri_12();
istri_44();
+ istri_00();
return 0;
}
Modified: trunk/none/tests/amd64/pcmpstr64.stdout.exp
===================================================================
--- trunk/none/tests/amd64/pcmpstr64.stdout.exp 2011-01-17 12:34:33 UTC (rev 11502)
+++ trunk/none/tests/amd64/pcmpstr64.stdout.exp 2011-01-17 23:10:39 UTC (rev 11503)
@@ -204,3 +204,25 @@
istri 44 123456789abcdef1 000000fecb975421 -> 0881000f 0881000f
istri 44 0123456789abcdef 00000000dca86532 -> 00c1000d 00c1000d
istri 44 123456789abcdef1 00000000dca86532 -> 0081000e 0081000e
+istri 00 abcdacbdabcdabcd 000000000000000a -> 00810003 00810003
+istri 00 abcdabcdabcdabcd 000000000000000b -> 00810002 00810002
+istri 00 abcdabcdabcdabcd 00000000000000ab -> 00810002 00810002
+istri 00 abcdabc0abcdabcd 000000000000abcd -> 08c10000 08c10000
+istri 00 abcdabcdabcdabcd 000000000000abcd -> 08810000 08810000
+istri 00 0bcdabcdabcdabcd 000000000000abcd -> 08c10000 08c10000
+istri 00 abcdabcdabcda0cd 000000000000abcd -> 08c10000 08c10000
+istri 00 abcdabcdabcdab0d 000000000000abcd -> 08c10000 08c10000
+istri 00 abcdabcdabcdabc0 000000000000abcd -> 00c00010 00c00010
+istri 00 abcdabcdabcdabcd 000000000000abcd -> 08810000 08810000
+istri 00 abcdabcdabcdabcd 000000000000a0cd -> 08810000 08810000
+istri 00 abcdabcdabcdabcd 000000000000ab0d -> 08810000 08810000
+istri 00 abcdabcdabcdabcd 000000000000abc0 -> 00800010 00800010
+istri 00 0000000000000000 0000000000000000 -> 00c00010 00c00010
+istri 00 aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000
+istri 00 0000abcdabcdabcd 000000000000abcd -> 08c10000 08c10000
+istri 00 0000abcdabcdabcd 000000000000dcba -> 08c10000 08c10000
+istri 00 0000abcdabcdabcd 000000000000bbbb -> 00c10002 00c10002
+istri 00 0000abcdabcdabcd 000000000000baba -> 00c10002 00c10002
+istri 00 0000abcdabcdabcd 00000000000baba0 -> 00c00010 00c00010
+istri 00 0ddc0ffeebadf00d 00000000cafebabe -> 00c00010 00c00010
+istri 00 0ddc0ffeebadfeed 00000000cafebabe -> 00c10001 00c10001
|
|
From: <sv...@va...> - 2011-01-17 23:06:25
|
Author: sewardj
Date: 2011-01-17 23:06:16 +0000 (Mon, 17 Jan 2011)
New Revision: 2080
Log:
Handle PCMPxSTRx $0x00. Fixes #262995.
Modified:
trunk/priv/guest_amd64_toIR.c
trunk/priv/guest_generic_x87.c
Modified: trunk/priv/guest_amd64_toIR.c
===================================================================
--- trunk/priv/guest_amd64_toIR.c 2011-01-17 13:11:37 UTC (rev 2079)
+++ trunk/priv/guest_amd64_toIR.c 2011-01-17 23:06:16 UTC (rev 2080)
@@ -15583,6 +15583,7 @@
any cases for which the helper function has not been
verified. */
switch (imm) {
+ case 0x00:
case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x12:
case 0x1A: case 0x3A: case 0x44: case 0x4A:
break;
Modified: trunk/priv/guest_generic_x87.c
===================================================================
--- trunk/priv/guest_generic_x87.c 2011-01-17 13:11:37 UTC (rev 2079)
+++ trunk/priv/guest_generic_x87.c 2011-01-17 23:06:16 UTC (rev 2080)
@@ -715,6 +715,7 @@
even if they would probably work. Life is too short to have
unvalidated cases in the code base. */
switch (imm8) {
+ case 0x00:
case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x12:
case 0x1A: case 0x3A: case 0x44: case 0x4A:
break;
|
|
From: Eric P. <eri...@or...> - 2011-01-17 20:46:16
|
Le 17/01/2011 14:11, sv...@va... a écrit : > Author: sewardj > Date: 2011-01-17 13:11:37 +0000 (Mon, 17 Jan 2011) > New Revision: 2079 > > Log: > Tolerate redundant REX.W in POPQ m64. (#256669). > Hi Julian as you're buzy updating amd64 support, any chance to see patches included in #253657 to get into mainstream ? A+ -- Eric Pouech "The problem with designing something completely foolproof is to underestimate the ingenuity of a complete idiot." (Douglas Adams) |
|
From: <sv...@va...> - 2011-01-17 13:11:45
|
Author: sewardj
Date: 2011-01-17 13:11:37 +0000 (Mon, 17 Jan 2011)
New Revision: 2079
Log:
Tolerate redundant REX.W in POPQ m64. (#256669).
Modified:
trunk/priv/guest_amd64_toIR.c
Modified: trunk/priv/guest_amd64_toIR.c
===================================================================
--- trunk/priv/guest_amd64_toIR.c 2011-01-17 12:32:25 UTC (rev 2078)
+++ trunk/priv/guest_amd64_toIR.c 2011-01-17 13:11:37 UTC (rev 2079)
@@ -16856,7 +16856,8 @@
/* There is no encoding for 32-bit pop in 64-bit mode.
So sz==4 actually means sz==8. */
if (haveF2orF3(pfx)) goto decode_failure;
- vassert(sz == 2 || sz == 4);
+ vassert(sz == 2 || sz == 4
+ || /* tolerate redundant REX.W, see #210481 */ sz == 8);
if (sz == 4) sz = 8;
if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
|
|
From: <sv...@va...> - 2011-01-17 12:34:42
|
Author: sewardj Date: 2011-01-17 12:34:33 +0000 (Mon, 17 Jan 2011) New Revision: 11502 Log: Add tests for AAD and AAM (base 10 only). (Vince Weaver, vi...@cs...) Added: trunk/none/tests/x86/aad_aam.c trunk/none/tests/x86/aad_aam.stderr.exp trunk/none/tests/x86/aad_aam.stdout.exp trunk/none/tests/x86/aad_aam.vgtest Modified: trunk/none/tests/x86/Makefile.am Modified: trunk/none/tests/x86/Makefile.am =================================================================== --- trunk/none/tests/x86/Makefile.am 2011-01-17 11:42:19 UTC (rev 11501) +++ trunk/none/tests/x86/Makefile.am 2011-01-17 12:34:33 UTC (rev 11502) @@ -21,6 +21,7 @@ ## FIXME: move lzcnt32 to SSE4 conditionalisation, when that happens. EXTRA_DIST = \ + aad_aam.stdout.exp aad_aam.stderr.exp aad_aam.vgtest \ badseg.stderr.exp badseg.stdout.exp badseg.vgtest \ bt_everything.stderr.exp bt_everything.stdout.exp bt_everything.vgtest \ bt_literal.stderr.exp bt_literal.stdout.exp bt_literal.vgtest \ @@ -61,6 +62,7 @@ xadd.stdout.exp xadd.stderr.exp xadd.vgtest check_PROGRAMS = \ + aad_aam \ badseg \ bt_everything \ bt_literal \ Added: trunk/none/tests/x86/aad_aam.c =================================================================== --- trunk/none/tests/x86/aad_aam.c (rev 0) +++ trunk/none/tests/x86/aad_aam.c 2011-01-17 12:34:33 UTC (rev 11502) @@ -0,0 +1,114 @@ +/* This tests the somewhat obscure 32-bit Intel aam and aad instructions */ +/* by Vince Weaver (vince _at_ deater.net ) */ + +#include <stdio.h> + +int parity(int v) { + + int i; + int p = 1; + + for (i = 0; i < 8; i++) + p ^= (1 & (v >> i)); + return p; +} + +int main(int argc, char **argv) { + + printf("test begins\n"); + unsigned short i,out; + unsigned int flags; + int cf,pf,af,zf,sf,of; + + /* test AAM */ + + for(i=0;i<65535;i++) { + // printf("%d, %d, %d\n",i,(i&0xff)/10,(i&0xff)%10); + out=i; + __asm__ __volatile__ ("mov %2 ,%%ax\n" + "aam\n" + "pushf\n" + "mov %%ax, %0\n" + "pop %%eax\n" + "mov %%eax, %1\n" + :"=r"(out), "=r"(flags) /* outputs */ + :"r"(out) /* input */ + :"%eax" /* clobbered */ + ); + cf=!!(flags&0x1); + pf=!!(flags&0x4); + af=!!(flags&0x10); + zf=!!(flags&0x40); + sf=!!(flags&0x80); + of=!!(flags&0x800); + + // printf("%d, %d, %d, ",i,(out>>8)&0xff,out&0xff); + // printf("%x CF=%d PF=%d AF=%d ZF=%d SF=%d OF=%d\n", + // flags,cf,pf,af,zf,sf,of); + + if (zf && ((out&0xff)!=0)) { + printf("Error with aam (zf)!\n"); + } + if (pf != parity(out&0xff)) { + printf("Error with aam (pf)!\n"); + } + if (sf != !!(out&0x80)) { + printf("Error with aam (sf)!\n"); + } + + + if ( ((out>>8)&0xff) != ((i&0xff)/10)) { + printf("Error with aam!\n"); + } + if ( (out&0xff) != ((i&0xff)%10)) { + printf("Error with aam!\n"); + } + + } + + /* test AAD */ + + for(i=0;i<65535;i++) { + // printf("%x, %d\n",i, ((((i>>8)&0xff)*10)+(i&0xff))&0xff ); + out=i; + __asm__ __volatile__ ("mov %2 ,%%ax\n" + "aad\n" + "pushf\n" + "mov %%ax, %0\n" + "pop %%eax\n" + "mov %%eax, %1\n" + :"=r"(out), "=r"(flags) /* outputs */ + :"r"(out) /* input */ + :"%eax" /* clobbered */ +); + + cf=!!(flags&0x1); + pf=!!(flags&0x4); + af=!!(flags&0x10); + zf=!!(flags&0x40); + sf=!!(flags&0x80); + of=!!(flags&0x800); + + // printf("%x, %d ",i,out); + // printf("%x CF=%d PF=%d AF=%d ZF=%d SF=%d OF=%d\n", + // flags,cf,pf,af,zf,sf,of); + + if (zf && ((out&0xff)!=0)) { + printf("Error with aad (zf)!\n"); + } + if (pf != parity(out&0xff)) { + printf("Error with aad (pf)!\n"); + } + if (sf != !!(out&0x80)) { + printf("Error with aad (sf) %d %d!\n",sf,!!(out&0x80)); + } + + if ( out != ( ((((i>>8)&0xff)*10)+(i&0xff))&0xff) ) { + printf("Error with aad!\n"); + } + } + + printf("test completed\n"); + return 0; + +} Added: trunk/none/tests/x86/aad_aam.stderr.exp =================================================================== Added: trunk/none/tests/x86/aad_aam.stdout.exp =================================================================== --- trunk/none/tests/x86/aad_aam.stdout.exp (rev 0) +++ trunk/none/tests/x86/aad_aam.stdout.exp 2011-01-17 12:34:33 UTC (rev 11502) @@ -0,0 +1,2 @@ +test begins +test completed Added: trunk/none/tests/x86/aad_aam.vgtest =================================================================== --- trunk/none/tests/x86/aad_aam.vgtest (rev 0) +++ trunk/none/tests/x86/aad_aam.vgtest 2011-01-17 12:34:33 UTC (rev 11502) @@ -0,0 +1,2 @@ +prog: aad_aam +vgopts: -q |
|
From: <sv...@va...> - 2011-01-17 12:32:35
|
Author: sewardj
Date: 2011-01-17 12:32:25 +0000 (Mon, 17 Jan 2011)
New Revision: 2078
Log:
Add support for AAD and AAM (base 10 only). Fixes #256387.
(Vince Weaver, vi...@cs...)
Modified:
trunk/priv/guest_x86_defs.h
trunk/priv/guest_x86_helpers.c
trunk/priv/guest_x86_toIR.c
Modified: trunk/priv/guest_x86_defs.h
===================================================================
--- trunk/priv/guest_x86_defs.h 2011-01-17 11:58:47 UTC (rev 2077)
+++ trunk/priv/guest_x86_defs.h 2011-01-17 12:32:25 UTC (rev 2078)
@@ -108,6 +108,8 @@
extern UInt x86g_calculate_daa_das_aaa_aas ( UInt AX_and_flags, UInt opcode );
+extern UInt x86g_calculate_aad_aam ( UInt AX_and_flags, UInt opcode );
+
extern ULong x86g_check_fldcw ( UInt fpucw );
extern UInt x86g_create_fpucw ( UInt fpround );
Modified: trunk/priv/guest_x86_helpers.c
===================================================================
--- trunk/priv/guest_x86_helpers.c 2011-01-17 11:58:47 UTC (rev 2077)
+++ trunk/priv/guest_x86_helpers.c 2011-01-17 12:32:25 UTC (rev 2078)
@@ -2109,7 +2109,52 @@
return result;
}
+UInt x86g_calculate_aad_aam ( UInt flags_and_AX, UInt opcode )
+{
+ UInt r_AL = (flags_and_AX >> 0) & 0xFF;
+ UInt r_AH = (flags_and_AX >> 8) & 0xFF;
+ UInt r_O = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
+ UInt r_S = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
+ UInt r_Z = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
+ UInt r_A = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
+ UInt r_C = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
+ UInt r_P = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
+ UInt result = 0;
+ switch (opcode) {
+ case 0xD4: { /* AAM */
+ r_AH = r_AL / 10;
+ r_AL = r_AL % 10;
+ break;
+ }
+ case 0xD5: { /* AAD */
+ r_AL = ((r_AH * 10) + r_AL) & 0xff;
+ r_AH = 0;
+ break;
+ }
+ default:
+ vassert(0);
+ }
+
+ r_O = 0; /* let's say (undefined) */
+ r_C = 0; /* let's say (undefined) */
+ r_A = 0; /* let's say (undefined) */
+ r_S = (r_AL & 0x80) ? 1 : 0;
+ r_Z = (r_AL == 0) ? 1 : 0;
+ r_P = calc_parity_8bit( r_AL );
+
+ result = ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
+ | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
+ | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
+ | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
+ | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
+ | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
+ | ( (r_AH & 0xFF) << 8 )
+ | ( (r_AL & 0xFF) << 0 );
+ return result;
+}
+
+
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (non-referentially-transparent) */
/* Horrible hack. On non-x86 platforms, return 1. */
Modified: trunk/priv/guest_x86_toIR.c
===================================================================
--- trunk/priv/guest_x86_toIR.c 2011-01-17 11:58:47 UTC (rev 2077)
+++ trunk/priv/guest_x86_toIR.c 2011-01-17 12:32:25 UTC (rev 2078)
@@ -12907,26 +12907,52 @@
}
break;
-//-- case 0xD4: /* AAM */
-//-- case 0xD5: /* AAD */
-//-- d32 = getIByte(delta); delta++;
-//-- if (d32 != 10) VG_(core_panic)("disInstr: AAM/AAD but base not 10 !");
-//-- t1 = newTemp(cb);
-//-- uInstr2(cb, GET, 2, ArchReg, R_EAX, TempReg, t1);
-//-- /* Widen %AX to 32 bits, so it's all defined when we push it. */
-//-- uInstr1(cb, WIDEN, 4, TempReg, t1);
-//-- uWiden(cb, 2, False);
-//-- uInstr0(cb, CALLM_S, 0);
-//-- uInstr1(cb, PUSH, 4, TempReg, t1);
-//-- uInstr1(cb, CALLM, 0, Lit16,
-//-- opc == 0xD4 ? VGOFF_(helper_AAM) : VGOFF_(helper_AAD) );
-//-- uFlagsRWU(cb, FlagsEmpty, FlagsSZP, FlagsEmpty);
-//-- uInstr1(cb, POP, 4, TempReg, t1);
-//-- uInstr0(cb, CALLM_E, 0);
-//-- uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX);
-//-- DIP(opc == 0xD4 ? "aam\n" : "aad\n");
-//-- break;
+ case 0xD4: /* AAM */
+ case 0xD5: /* AAD */
+ d32 = getIByte(delta); delta++;
+ if (sz != 4 || d32 != 10) goto decode_failure;
+ t1 = newTemp(Ity_I32);
+ t2 = newTemp(Ity_I32);
+ /* Make up a 32-bit value (t1), with the old value of AX in the
+ bottom 16 bits, and the old OSZACP bitmask in the upper 16
+ bits. */
+ assign(t1,
+ binop(Iop_16HLto32,
+ unop(Iop_32to16,
+ mk_x86g_calculate_eflags_all()),
+ getIReg(2, R_EAX)
+ ));
+ /* Call the helper fn, to get a new AX and OSZACP value, and
+ poke both back into the guest state. Also pass the helper
+ the actual opcode so it knows which of the 2 instructions it
+ is doing the computation for. */
+ assign(t2,
+ mkIRExprCCall(
+ Ity_I32, 0/*regparm*/, "x86g_calculate_aad_aam",
+ &x86g_calculate_aad_aam,
+ mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) )
+ ));
+ putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) ));
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1,
+ binop(Iop_And32,
+ binop(Iop_Shr32, mkexpr(t2), mkU8(16)),
+ mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P
+ | X86G_CC_MASK_A | X86G_CC_MASK_Z
+ | X86G_CC_MASK_S| X86G_CC_MASK_O )
+ )
+ )
+ );
+ /* Set NDEP even though it isn't used. This makes
+ redundant-PUT elimination of previous stores to this field
+ work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+
+ DIP(opc == 0xD4 ? "aam\n" : "aad\n");
+ break;
+
/* ------------------------ CWD/CDQ -------------------- */
case 0x98: /* CBW */
|
|
From: <sv...@va...> - 2011-01-17 11:58:56
|
Author: sewardj
Date: 2011-01-17 11:58:47 +0000 (Mon, 17 Jan 2011)
New Revision: 2077
Log:
Print 8 insn bytes when failing, not 6.
Modified:
trunk/priv/guest_amd64_toIR.c
Modified: trunk/priv/guest_amd64_toIR.c
===================================================================
--- trunk/priv/guest_amd64_toIR.c 2011-01-17 10:32:18 UTC (rev 2076)
+++ trunk/priv/guest_amd64_toIR.c 2011-01-17 11:58:47 UTC (rev 2077)
@@ -18392,13 +18392,15 @@
decode_failure:
/* All decode failures end up here. */
vex_printf("vex amd64->IR: unhandled instruction bytes: "
- "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
+ "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
(Int)getUChar(delta_start+0),
(Int)getUChar(delta_start+1),
(Int)getUChar(delta_start+2),
(Int)getUChar(delta_start+3),
(Int)getUChar(delta_start+4),
- (Int)getUChar(delta_start+5) );
+ (Int)getUChar(delta_start+5),
+ (Int)getUChar(delta_start+6),
+ (Int)getUChar(delta_start+7) );
/* Tell the dispatcher that this insn cannot be decoded, and so has
not been executed, and (is currently) the next to be executed.
|
|
From: <sv...@va...> - 2011-01-17 11:42:31
|
Author: sewardj Date: 2011-01-17 11:42:19 +0000 (Mon, 17 Jan 2011) New Revision: 11501 Log: Connect up tests for PCMPxSTRx to the build system. Added: trunk/none/tests/amd64/pcmpstr64.stderr.exp trunk/none/tests/amd64/pcmpstr64.stdout.exp trunk/none/tests/amd64/pcmpstr64.vgtest trunk/none/tests/amd64/pcmpxstrx64.stderr.exp trunk/none/tests/amd64/pcmpxstrx64.stdout.exp trunk/none/tests/amd64/pcmpxstrx64.vgtest Modified: trunk/none/tests/amd64/Makefile.am Modified: trunk/none/tests/amd64/Makefile.am =================================================================== --- trunk/none/tests/amd64/Makefile.am 2011-01-17 11:17:33 UTC (rev 11500) +++ trunk/none/tests/amd64/Makefile.am 2011-01-17 11:42:19 UTC (rev 11501) @@ -45,6 +45,10 @@ lzcnt64.stderr.exp lzcnt64.stdout.exp lzcnt64.vgtest \ nibz_bennee_mmap.stderr.exp nibz_bennee_mmap.stdout.exp \ nibz_bennee_mmap.vgtest \ + pcmpstr64.stderr.exp pcmpstr64.stdout.exp \ + pcmpstr64.vgtest \ + pcmpxstrx64.stderr.exp pcmpxstrx64.stdout.exp \ + pcmpxstrx64.vgtest \ rcl-amd64.vgtest rcl-amd64.stdout.exp rcl-amd64.stderr.exp \ redundantRexW.vgtest redundantRexW.stdout.exp \ redundantRexW.stderr.exp \ @@ -77,7 +81,7 @@ check_PROGRAMS += lzcnt64 endif if BUILD_SSE42_TESTS - check_PROGRAMS += sse4-64 + check_PROGRAMS += pcmpstr64 pcmpxstrx64 sse4-64 endif # DDD: these need to be made to work on Darwin like the x86/ ones were. Added: trunk/none/tests/amd64/pcmpstr64.stderr.exp =================================================================== Added: trunk/none/tests/amd64/pcmpstr64.stdout.exp =================================================================== --- trunk/none/tests/amd64/pcmpstr64.stdout.exp (rev 0) +++ trunk/none/tests/amd64/pcmpstr64.stdout.exp 2011-01-17 11:42:19 UTC (rev 11501) @@ -0,0 +1,206 @@ +istri 4A 0000000000000000 0000000000000000 -> 08c1000f 08c1000f +istri 4A aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 0801000f 0801000f +istri 4A aaaa2aaaaaaaaaaa aaaaaaaaaaaaaaaa -> 0801000f 0801000f +istri 4A aaaaaaaaa2aaaaaa aaaaaaaaaaaaaaaa -> 0801000f 0801000f +istri 4A aaaaaaaaaaaaa2aa aaaaaaaaaaaaaaaa -> 0801000f 0801000f +istri 4A aaaaaaaaaaaaaaaa aaaa2aaaaaaaaaaa -> 0801000f 0801000f +istri 4A aaaaaaaaaaaaaaaa aaaaaaaaa2aaaaaa -> 0801000f 0801000f +istri 4A aaaaaaaaaaaaaaaa aaaaaaaaaaaaaa2a -> 0801000f 0801000f +istri 4A aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 0801000f 0801000f +istri 4A baaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 0801000e 0801000e +istri 4A b9aaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 0801000d 0801000d +istri 4A b9baaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 0801000c 0801000c +istri 4A b9baaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 0801000c 0801000c +istri 4A b9baaaaaaaaaaaaa aaaaaaaaaaaa7aaa -> 0801000c 0801000c +istri 4A b9baaaaaaaaaaaaa aaaaaaaa2aaa4aaa -> 0801000c 0801000c +istri 4A aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 0801000f 0801000f +istri 4A aaaaaaaaaaaa0aaa aaaaaaaaaaaaaaaa -> 08410002 08410002 +istri 4A aaaaaaaaaaaaaaaa aaaaaaaaaaaa0aaa -> 08810002 08810002 +istri 4A aaaaaaaaaaaa0aaa aaaaaaaaaaaa0aaa -> 08c1000f 08c1000f +istri 4A aaaaaaaa0aaaaaaa aaaaaaaaaaaaaaaa -> 08410006 08410006 +istri 4A aaaaaaaaaaaaaaaa aaaaaaaaaaaa0aaa -> 08810002 08810002 +istri 4A aaaaaaaa0aaaaaaa aaaaaaaaaaaa0aaa -> 08c1000f 08c1000f +istri 4A aaaaaaaaaaaa0aaa aaaaaaaaaaaaaaaa -> 08410002 08410002 +istri 4A aaaaaaaaaaaaaaaa aaaaaaaa0aaaaaaa -> 08810006 08810006 +istri 4A aaaaaaaaaaaa0aaa aaaaaaaa0aaaaaaa -> 08c1000f 08c1000f +istri 4A 0000000000000000 aaaaaaaa0aaaaaaa -> 00c1000f 00c1000f +istri 4A 8000000000000000 aaaaaaaa0aaaaaaa -> 00c1000f 00c1000f +istri 4A 0000000000000001 aaaaaaaa0aaaaaaa -> 00c1000f 00c1000f +istri 4A 0000000000000000 aaaaaaaaaaaaaaaa -> 00400010 00400010 +istri 4A aaaaaaaaaaaaaaaa 0000000000000000 -> 00800010 00800010 +istri 3A 0000000000000000 0000000000000000 -> 08c10000 08c10000 +istri 3A aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00000010 00000010 +istri 3A aaaa2aaaaaaaaaaa aaaaaaaaaaaaaaaa -> 0001000b 0001000b +istri 3A aaaaaaaaa2aaaaaa aaaaaaaaaaaaaaaa -> 00010006 00010006 +istri 3A aaaaaaaaaaaaa2aa aaaaaaaaaaaaaaaa -> 00010002 00010002 +istri 3A aaaaaaaaaaaaaaaa aaaa2aaaaaaaaaaa -> 0001000b 0001000b +istri 3A aaaaaaaaaaaaaaaa aaaaaaaaa2aaaaaa -> 00010006 00010006 +istri 3A aaaaaaaaaaaaaaaa aaaaaaaaaaaaaa2a -> 00010001 00010001 +istri 3A aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00000010 00000010 +istri 3A baaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 0001000f 0001000f +istri 3A b9aaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 0001000e 0001000e +istri 3A b9baaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 0001000d 0001000d +istri 3A b9baaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 0001000d 0001000d +istri 3A b9baaaaaaaaaaaaa aaaaaaaaaaaa7aaa -> 00010003 00010003 +istri 3A b9baaaaaaaaaaaaa aaaaaaaa2aaa4aaa -> 00010003 00010003 +istri 3A aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00000010 00000010 +istri 3A aaaaaaaaaaaa0aaa aaaaaaaaaaaaaaaa -> 00400010 00400010 +istri 3A aaaaaaaaaaaaaaaa aaaaaaaaaaaa0aaa -> 00810003 00810003 +istri 3A aaaaaaaaaaaa0aaa aaaaaaaaaaaa0aaa -> 00c10003 00c10003 +istri 3A aaaaaaaa0aaaaaaa aaaaaaaaaaaaaaaa -> 00400010 00400010 +istri 3A aaaaaaaaaaaaaaaa aaaaaaaaaaaa0aaa -> 00810003 00810003 +istri 3A aaaaaaaa0aaaaaaa aaaaaaaaaaaa0aaa -> 00c10003 00c10003 +istri 3A aaaaaaaaaaaa0aaa aaaaaaaaaaaaaaaa -> 00400010 00400010 +istri 3A aaaaaaaaaaaaaaaa aaaaaaaa0aaaaaaa -> 00810007 00810007 +istri 3A aaaaaaaaaaaa0aaa aaaaaaaa0aaaaaaa -> 00c10007 00c10007 +istri 3A 0000000000000000 aaaaaaaa0aaaaaaa -> 00c10007 00c10007 +istri 3A 8000000000000000 aaaaaaaa0aaaaaaa -> 00c10007 00c10007 +istri 3A 0000000000000001 aaaaaaaa0aaaaaaa -> 08c10000 08c10000 +istri 3A 0000000000000000 aaaaaaaaaaaaaaaa -> 00400010 00400010 +istri 3A aaaaaaaaaaaaaaaa 0000000000000000 -> 08810000 08810000 +istri 08 0000000000000000 0000000000000000 -> 08c10000 08c10000 +istri 08 aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000 +istri 08 aaaa2aaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000 +istri 08 aaaaaaaaa2aaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000 +istri 08 aaaaaaaaaaaaa2aa aaaaaaaaaaaaaaaa -> 08010000 08010000 +istri 08 aaaaaaaaaaaaaaaa aaaa2aaaaaaaaaaa -> 08010000 08010000 +istri 08 aaaaaaaaaaaaaaaa aaaaaaaaa2aaaaaa -> 08010000 08010000 +istri 08 aaaaaaaaaaaaaaaa aaaaaaaaaaaaaa2a -> 08010000 08010000 +istri 08 aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000 +istri 08 baaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000 +istri 08 b9aaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000 +istri 08 b9baaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000 +istri 08 b9baaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000 +istri 08 b9baaaaaaaaaaaaa aaaaaaaaaaaa7aaa -> 08010000 08010000 +istri 08 b9baaaaaaaaaaaaa aaaaaaaa2aaa4aaa -> 08010000 08010000 +istri 08 aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000 +istri 08 aaaaaaaaaaaa0aaa aaaaaaaaaaaaaaaa -> 08410000 08410000 +istri 08 aaaaaaaaaaaaaaaa aaaaaaaaaaaa0aaa -> 08810000 08810000 +istri 08 aaaaaaaaaaaa0aaa aaaaaaaaaaaa0aaa -> 08c10000 08c10000 +istri 08 aaaaaaaa0aaaaaaa aaaaaaaaaaaaaaaa -> 08410000 08410000 +istri 08 aaaaaaaaaaaaaaaa aaaaaaaaaaaa0aaa -> 08810000 08810000 +istri 08 aaaaaaaa0aaaaaaa aaaaaaaaaaaa0aaa -> 08c10000 08c10000 +istri 08 aaaaaaaaaaaa0aaa aaaaaaaaaaaaaaaa -> 08410000 08410000 +istri 08 aaaaaaaaaaaaaaaa aaaaaaaa0aaaaaaa -> 08810000 08810000 +istri 08 aaaaaaaaaaaa0aaa aaaaaaaa0aaaaaaa -> 08c10000 08c10000 +istri 08 0000000000000000 aaaaaaaa0aaaaaaa -> 00c10007 00c10007 +istri 08 8000000000000000 aaaaaaaa0aaaaaaa -> 00c10007 00c10007 +istri 08 0000000000000001 aaaaaaaa0aaaaaaa -> 00c10007 00c10007 +istri 08 0000000000000000 aaaaaaaaaaaaaaaa -> 00400010 00400010 +istri 08 aaaaaaaaaaaaaaaa 0000000000000000 -> 00800010 00800010 +istri 1A 0000000000000000 0000000000000000 -> 00c00010 00c00010 +istri 1A aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00000010 00000010 +istri 1A aaaa2aaaaaaaaaaa aaaaaaaaaaaaaaaa -> 0001000b 0001000b +istri 1A aaaaaaaaa2aaaaaa aaaaaaaaaaaaaaaa -> 00010006 00010006 +istri 1A aaaaaaaaaaaaa2aa aaaaaaaaaaaaaaaa -> 00010002 00010002 +istri 1A aaaaaaaaaaaaaaaa aaaa2aaaaaaaaaaa -> 0001000b 0001000b +istri 1A aaaaaaaaaaaaaaaa aaaaaaaaa2aaaaaa -> 00010006 00010006 +istri 1A aaaaaaaaaaaaaaaa aaaaaaaaaaaaaa2a -> 00010001 00010001 +istri 1A aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00000010 00000010 +istri 1A baaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 0001000f 0001000f +istri 1A b9aaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 0001000e 0001000e +istri 1A b9baaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 0001000d 0001000d +istri 1A b9baaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 0001000d 0001000d +istri 1A b9baaaaaaaaaaaaa aaaaaaaaaaaa7aaa -> 00010003 00010003 +istri 1A b9baaaaaaaaaaaaa aaaaaaaa2aaa4aaa -> 00010003 00010003 +istri 1A aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00000010 00000010 +istri 1A aaaaaaaaaaaa0aaa aaaaaaaaaaaaaaaa -> 00410003 00410003 +istri 1A aaaaaaaaaaaaaaaa aaaaaaaaaaaa0aaa -> 00810003 00810003 +istri 1A aaaaaaaaaaaa0aaa aaaaaaaaaaaa0aaa -> 00c00010 00c00010 +istri 1A aaaaaaaa0aaaaaaa aaaaaaaaaaaaaaaa -> 00410007 00410007 +istri 1A aaaaaaaaaaaaaaaa aaaaaaaaaaaa0aaa -> 00810003 00810003 +istri 1A aaaaaaaa0aaaaaaa aaaaaaaaaaaa0aaa -> 00c10003 00c10003 +istri 1A aaaaaaaaaaaa0aaa aaaaaaaaaaaaaaaa -> 00410003 00410003 +istri 1A aaaaaaaaaaaaaaaa aaaaaaaa0aaaaaaa -> 00810007 00810007 +istri 1A aaaaaaaaaaaa0aaa aaaaaaaa0aaaaaaa -> 00c10003 00c10003 +istri 1A 0000000000000000 aaaaaaaa0aaaaaaa -> 08c10000 08c10000 +istri 1A 8000000000000000 aaaaaaaa0aaaaaaa -> 08c10000 08c10000 +istri 1A 0000000000000001 aaaaaaaa0aaaaaaa -> 08c10000 08c10000 +istri 1A 0000000000000000 aaaaaaaaaaaaaaaa -> 08410000 08410000 +istri 1A aaaaaaaaaaaaaaaa 0000000000000000 -> 08810000 08810000 +istri 02 abcdacbdabcdabcd 000000000000000a -> 00810003 00810003 +istri 02 abcdabcdabcdabcd 000000000000000b -> 00810002 00810002 +istri 02 abcdabcdabcdabcd 00000000000000ab -> 00810002 00810002 +istri 02 abcdabc0abcdabcd 000000000000abcd -> 08c10000 08c10000 +istri 02 abcdabcdabcdabcd 000000000000abcd -> 08810000 08810000 +istri 02 0bcdabcdabcdabcd 000000000000abcd -> 08c10000 08c10000 +istri 02 abcdabcdabcda0cd 000000000000abcd -> 08c10000 08c10000 +istri 02 abcdabcdabcdab0d 000000000000abcd -> 08c10000 08c10000 +istri 02 abcdabcdabcdabc0 000000000000abcd -> 00c00010 00c00010 +istri 02 abcdabcdabcdabcd 000000000000abcd -> 08810000 08810000 +istri 02 abcdabcdabcdabcd 000000000000a0cd -> 08810000 08810000 +istri 02 abcdabcdabcdabcd 000000000000ab0d -> 08810000 08810000 +istri 02 abcdabcdabcdabcd 000000000000abc0 -> 00800010 00800010 +istri 02 0000000000000000 0000000000000000 -> 00c00010 00c00010 +istri 02 aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 08010000 08010000 +istri 02 0000abcdabcdabcd 000000000000abcd -> 08c10000 08c10000 +istri 02 0000abcdabcdabcd 000000000000dcba -> 08c10000 08c10000 +istri 02 0000abcdabcdabcd 000000000000bbbb -> 00c10002 00c10002 +istri 02 0000abcdabcdabcd 000000000000baba -> 00c10002 00c10002 +istri 02 0000abcdabcdabcd 00000000000baba0 -> 00c00010 00c00010 +istri 02 0ddc0ffeebadf00d 00000000cafebabe -> 00c00010 00c00010 +istri 02 0ddc0ffeebadfeed 00000000cafebabe -> 00c10001 00c10001 +istri 0C 111111111abcde11 00000000000abcde -> 00810002 00810002 +istri 0C 111111111abcde11 0000abcde00abcde -> 00810002 00810002 +istri 0C 1111111111abcde1 00000000000abcde -> 00810001 00810001 +istri 0C 11111111111abcde 00000000000abcde -> 08810000 08810000 +istri 0C 111111111111abcd 00000000000abcde -> 00800010 00800010 +istri 0C 111abcde1abcde11 00000000000abcde -> 00810002 00810002 +istri 0C 11abcde11abcde11 00000000000abcde -> 00810002 00810002 +istri 0C 1abcde111abcde11 00000000000abcde -> 00810002 00810002 +istri 0C abcde1111abcde11 00000000000abcde -> 00810002 00810002 +istri 0C bcde11111abcde11 00000000000abcde -> 00810002 00810002 +istri 0C cde111111abcde11 00000000000abcde -> 00810002 00810002 +istri 0C 01abcde11abcde11 00000000000abcde -> 00c10002 00c10002 +istri 0C 00abcde11abcde11 00000000000abcde -> 00c10002 00c10002 +istri 0C 000bcde11abcde11 00000000000abcde -> 00c10002 00c10002 +istri 0C 00abcde10abcde11 00000000000abcde -> 00c10002 00c10002 +istri 0C 00abcde100bcde11 00000000000abcde -> 00c00010 00c00010 +istri 0C 1111111111111234 0000000000000000 -> 08810000 08810000 +istri 0C 1111111111111234 0000000000000001 -> 00810003 00810003 +istri 0C 1111111111111234 0000000000000011 -> 00810003 00810003 +istri 0C 1111111111111234 1111111111111234 -> 08010000 08010000 +istri 0C a111111111111111 000000000000000a -> 0081000f 0081000f +istri 0C b111111111111111 000000000000000a -> 00800010 00800010 +istri 12 abcdacbdabcdabcd 000000000000000a -> 08810000 08810000 +istri 12 abcdabcdabcdabcd 000000000000000b -> 08810000 08810000 +istri 12 abcdabcdabcdabcd 00000000000000ab -> 08810000 08810000 +istri 12 abcdabc0abcdabcd 000000000000abcd -> 00c10008 00c10008 +istri 12 abcdabcdabcdabcd 000000000000abcd -> 00800010 00800010 +istri 12 0bcdabcdabcdabcd 000000000000abcd -> 00c1000f 00c1000f +istri 12 abcdabcdabcda0cd 000000000000abcd -> 00c10002 00c10002 +istri 12 abcdabcdabcdab0d 000000000000abcd -> 00c10001 00c10001 +istri 12 abcdabcdabcdabc0 000000000000abcd -> 08c10000 08c10000 +istri 12 abcdabcdabcdabcd 000000000000abcd -> 00800010 00800010 +istri 12 abcdabcdabcdabcd 000000000000a0cd -> 00810002 00810002 +istri 12 abcdabcdabcdabcd 000000000000ab0d -> 00810001 00810001 +istri 12 abcdabcdabcdabcd 000000000000abc0 -> 08810000 08810000 +istri 12 0000000000000000 0000000000000000 -> 08c10000 08c10000 +istri 12 aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00000010 00000010 +istri 12 0000abcdabcdabcd 000000000000abcd -> 00c1000c 00c1000c +istri 12 0000abcdabcdabcd 000000000000dcba -> 00c1000c 00c1000c +istri 12 0000abcdabcdabcd 000000000000bbbb -> 08c10000 08c10000 +istri 12 0000abcdabcdabcd 000000000000baba -> 08c10000 08c10000 +istri 12 0000abcdabcdabcd 00000000000baba0 -> 08c10000 08c10000 +istri 12 0ddc0ffeebadf00d 00000000cafebabe -> 08c10000 08c10000 +istri 12 0ddc0ffeebadfeed 00000000cafebabe -> 08c10000 08c10000 +istri 44 aaaabbbbccccdddd 00000000000000bc -> 00800010 00800010 +istri 44 aaaabbbbccccdddd 00000000000000cb -> 0081000b 0081000b +istri 44 baaabbbbccccdddd 00000000000000cb -> 0081000f 0081000f +istri 44 baaabbbbccccdddc 00000000000000cb -> 0881000f 0881000f +istri 44 bbbbbbbbbbbbbbbb 00000000000000cb -> 0881000f 0881000f +istri 44 bbbbbbbb0bbbbbbb 00000000000000cb -> 08c10006 08c10006 +istri 44 bbbbbbbbbbbbbb0b 00000000000000cb -> 08c10000 08c10000 +istri 44 bbbbbbbbbbbbbbb0 00000000000000cb -> 00c00010 00c00010 +istri 44 0000000000000000 00000000000000cb -> 00c00010 00c00010 +istri 44 0000000000000000 0000000000000000 -> 00c00010 00c00010 +istri 44 bbbbbbbbbbbbbbbb 00000000000000cb -> 0881000f 0881000f +istri 44 bbbbbbbbbbbbbbbb 000000000000000b -> 00800010 00800010 +istri 44 b4b4b4b4b4b4b4b4 00000000000062cb -> 0881000f 0881000f +istri 44 b4b4b4b4b4b4b4b4 00000000000002cb -> 0081000f 0081000f +istri 44 b4b4b4b4b4b4b4b4 00000000000000cb -> 0081000f 0081000f +istri 44 b4b4b4b4b4b4b4b4 000000000000000b -> 00800010 00800010 +istri 44 0123456789abcdef 000000fecb975421 -> 08c1000e 08c1000e +istri 44 123456789abcdef1 000000fecb975421 -> 0881000f 0881000f +istri 44 0123456789abcdef 00000000dca86532 -> 00c1000d 00c1000d +istri 44 123456789abcdef1 00000000dca86532 -> 0081000e 0081000e Added: trunk/none/tests/amd64/pcmpstr64.vgtest =================================================================== --- trunk/none/tests/amd64/pcmpstr64.vgtest (rev 0) +++ trunk/none/tests/amd64/pcmpstr64.vgtest 2011-01-17 11:42:19 UTC (rev 11501) @@ -0,0 +1,3 @@ +prog: pcmpstr64 +prereq: ../../../tests/x86_amd64_features amd64-sse42 +vgopts: -q Added: trunk/none/tests/amd64/pcmpxstrx64.stderr.exp =================================================================== Added: trunk/none/tests/amd64/pcmpxstrx64.stdout.exp =================================================================== --- trunk/none/tests/amd64/pcmpxstrx64.stdout.exp (rev 0) +++ trunk/none/tests/amd64/pcmpxstrx64.stdout.exp 2011-01-17 11:42:19 UTC (rev 11501) @@ -0,0 +1,210 @@ + +rdx 0000000000000000 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000000 argR aaaaaaaaaaaaaaaa00aaaaaaaaaaaaaa + istri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550006 flags 00000881 + istri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881 + istrm $0x4A: xmm0 000000000000000000ffffffffffffff rcx 5555555555555555 flags 00000881 + istrm $0x0A: xmm0 0000000000000000000000000000007f rcx 5555555555555555 flags 00000881 + estri $0x4A: xmm0 55555555555555555555555555555555 rcx 555555555555000f flags 000008c1 + estri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 000008c1 + estrm $0x4A: xmm0 ffffffffffffffffffffffffffffffff rcx 5555555555555555 flags 000008c1 + estrm $0x0A: xmm0 0000000000000000000000000000ffff rcx 5555555555555555 flags 000008c1 + +rdx 0000000000000000 argL 00000000000000000000000000000000 rax 0000000000000000 argR aaaaaaaaaaaaaaaa00aaaaaaaaaaaaaa + istri $0x4A: xmm0 55555555555555555555555555555555 rcx 555555555555000f flags 000000c1 + istri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 000000c1 + istrm $0x4A: xmm0 ffffffffffffffffff00000000000000 rcx 5555555555555555 flags 000000c1 + istrm $0x0A: xmm0 0000000000000000000000000000ff80 rcx 5555555555555555 flags 000000c1 + estri $0x4A: xmm0 55555555555555555555555555555555 rcx 555555555555000f flags 000008c1 + estri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 000008c1 + estrm $0x4A: xmm0 ffffffffffffffffffffffffffffffff rcx 5555555555555555 flags 000008c1 + estrm $0x0A: xmm0 0000000000000000000000000000ffff rcx 5555555555555555 flags 000008c1 + +rdx 0000000000000000 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000000 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + istri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 00000881 + istri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881 + istrm $0x4A: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881 + istrm $0x0A: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 00000881 + estri $0x4A: xmm0 55555555555555555555555555555555 rcx 555555555555000f flags 000008c1 + estri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 000008c1 + estrm $0x4A: xmm0 ffffffffffffffffffffffffffffffff rcx 5555555555555555 flags 000008c1 + estrm $0x0A: xmm0 0000000000000000000000000000ffff rcx 5555555555555555 flags 000008c1 + +rdx 0000000000000005 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000000 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + istri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 00000881 + istri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881 + istrm $0x4A: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881 + istrm $0x0A: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 00000881 + estri $0x4A: xmm0 55555555555555555555555555555555 rcx 555555555555000f flags 000000c1 + estri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550005 flags 000000c1 + estrm $0x4A: xmm0 ffffffffffffffffffffff0000000000 rcx 5555555555555555 flags 000000c1 + estrm $0x0A: xmm0 0000000000000000000000000000ffe0 rcx 5555555555555555 flags 000000c1 + +rdx 0000000000000000 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000006 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + istri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 00000881 + istri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881 + istrm $0x4A: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881 + istrm $0x0A: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 00000881 + estri $0x4A: xmm0 55555555555555555555555555555555 rcx 555555555555000f flags 000000c1 + estri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550006 flags 000000c1 + estrm $0x4A: xmm0 ffffffffffffffffffff000000000000 rcx 5555555555555555 flags 000000c1 + estrm $0x0A: xmm0 0000000000000000000000000000ffc0 rcx 5555555555555555 flags 000000c1 + +rdx 0000000000000005 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000006 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + istri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 00000881 + istri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881 + istrm $0x4A: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881 + istrm $0x0A: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 00000881 + estri $0x4A: xmm0 55555555555555555555555555555555 rcx 555555555555000f flags 000008c1 + estri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 000008c1 + estrm $0x4A: xmm0 ffffffffffffffffffff00ffffffffff rcx 5555555555555555 flags 000008c1 + estrm $0x0A: xmm0 0000000000000000000000000000ffdf rcx 5555555555555555 flags 000008c1 + +rdx 0000000000000005 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 000000000000000f argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + istri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 00000881 + istri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881 + istrm $0x4A: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881 + istrm $0x0A: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 00000881 + estri $0x4A: xmm0 55555555555555555555555555555555 rcx 555555555555000f flags 000008c1 + estri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 000008c1 + estrm $0x4A: xmm0 ff00000000000000000000ffffffffff rcx 5555555555555555 flags 000008c1 + estrm $0x0A: xmm0 0000000000000000000000000000801f rcx 5555555555555555 flags 000008c1 + +rdx 0000000000000005 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000010 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + istri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 00000881 + istri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881 + istrm $0x4A: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881 + istrm $0x0A: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 00000881 + estri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550004 flags 00000881 + estri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881 + estrm $0x4A: xmm0 0000000000000000000000ffffffffff rcx 5555555555555555 flags 00000881 + estrm $0x0A: xmm0 0000000000000000000000000000001f rcx 5555555555555555 flags 00000881 + +rdx 0000000000000005 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000011 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + istri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 00000881 + istri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881 + istrm $0x4A: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881 + istrm $0x0A: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 00000881 + estri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550004 flags 00000881 + estri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881 + estrm $0x4A: xmm0 0000000000000000000000ffffffffff rcx 5555555555555555 flags 00000881 + estrm $0x0A: xmm0 0000000000000000000000000000001f rcx 5555555555555555 flags 00000881 + +rdx 0000000000000005 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax fffffffffffffffa argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + istri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 00000881 + istri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881 + istrm $0x4A: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881 + istrm $0x0A: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 00000881 + estri $0x4A: xmm0 55555555555555555555555555555555 rcx 555555555555000f flags 000008c1 + estri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 000008c1 + estrm $0x4A: xmm0 ffffffffffffffffffff00ffffffffff rcx 5555555555555555 flags 000008c1 + estrm $0x0A: xmm0 0000000000000000000000000000ffdf rcx 5555555555555555 flags 000008c1 + +rdx 0000000000000005 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax fffffffffffffff1 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + istri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 00000881 + istri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881 + istrm $0x4A: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881 + istrm $0x0A: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 00000881 + estri $0x4A: xmm0 55555555555555555555555555555555 rcx 555555555555000f flags 000008c1 + estri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 000008c1 + estrm $0x4A: xmm0 ff00000000000000000000ffffffffff rcx 5555555555555555 flags 000008c1 + estrm $0x0A: xmm0 0000000000000000000000000000801f rcx 5555555555555555 flags 000008c1 + +rdx 0000000000000005 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax fffffffffffffff0 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + istri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 00000881 + istri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881 + istrm $0x4A: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881 + istrm $0x0A: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 00000881 + estri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550004 flags 00000881 + estri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881 + estrm $0x4A: xmm0 0000000000000000000000ffffffffff rcx 5555555555555555 flags 00000881 + estrm $0x0A: xmm0 0000000000000000000000000000001f rcx 5555555555555555 flags 00000881 + +rdx 0000000000000005 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax ffffffffffffffef argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + istri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 00000881 + istri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881 + istrm $0x4A: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881 + istrm $0x0A: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 00000881 + estri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550004 flags 00000881 + estri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881 + estrm $0x4A: xmm0 0000000000000000000000ffffffffff rcx 5555555555555555 flags 00000881 + estrm $0x0A: xmm0 0000000000000000000000000000001f rcx 5555555555555555 flags 00000881 + +rdx 0000000000000005 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000006 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + istri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 00000881 + istri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881 + istrm $0x4A: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881 + istrm $0x0A: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 00000881 + estri $0x4A: xmm0 55555555555555555555555555555555 rcx 555555555555000f flags 000008c1 + estri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 000008c1 + estrm $0x4A: xmm0 ffffffffffffffffffff00ffffffffff rcx 5555555555555555 flags 000008c1 + estrm $0x0A: xmm0 0000000000000000000000000000ffdf rcx 5555555555555555 flags 000008c1 + +rdx 000000000000000f argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000006 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + istri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 00000881 + istri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881 + istrm $0x4A: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881 + istrm $0x0A: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 00000881 + estri $0x4A: xmm0 55555555555555555555555555555555 rcx 555555555555000f flags 000008c1 + estri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 000008c1 + estrm $0x4A: xmm0 ff000000000000000000ffffffffffff rcx 5555555555555555 flags 000008c1 + estrm $0x0A: xmm0 0000000000000000000000000000803f rcx 5555555555555555 flags 000008c1 + +rdx 0000000000000010 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000006 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + istri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 00000881 + istri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881 + istrm $0x4A: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881 + istrm $0x0A: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 00000881 + estri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550005 flags 00000841 + estri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000841 + estrm $0x4A: xmm0 00000000000000000000ffffffffffff rcx 5555555555555555 flags 00000841 + estrm $0x0A: xmm0 0000000000000000000000000000003f rcx 5555555555555555 flags 00000841 + +rdx 0000000000000011 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000006 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + istri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 00000881 + istri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000881 + istrm $0x4A: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000881 + istrm $0x0A: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 00000881 + estri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550005 flags 00000841 + estri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000841 + estrm $0x4A: xmm0 00000000000000000000ffffffffffff rcx 5555555555555555 flags 00000841 + estrm $0x0A: xmm0 0000000000000000000000000000003f rcx 5555555555555555 flags 00000841 + +rdx fffffffffffffffb argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000006 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + istri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 00000801 + istri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000801 + istrm $0x4A: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000801 + istrm $0x0A: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 00000801 + estri $0x4A: xmm0 55555555555555555555555555555555 rcx 555555555555000f flags 000008c1 + estri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 000008c1 + estrm $0x4A: xmm0 ffffffffffffffffffff00ffffffffff rcx 5555555555555555 flags 000008c1 + estrm $0x0A: xmm0 0000000000000000000000000000ffdf rcx 5555555555555555 flags 000008c1 + +rdx fffffffffffffff1 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000006 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + istri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 00000801 + istri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000801 + istrm $0x4A: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000801 + istrm $0x0A: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 00000801 + estri $0x4A: xmm0 55555555555555555555555555555555 rcx 555555555555000f flags 000008c1 + estri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 000008c1 + estrm $0x4A: xmm0 ff000000000000000000ffffffffffff rcx 5555555555555555 flags 000008c1 + estrm $0x0A: xmm0 0000000000000000000000000000803f rcx 5555555555555555 flags 000008c1 + +rdx fffffffffffffff0 argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000006 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + istri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 00000801 + istri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000801 + istrm $0x4A: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000801 + istrm $0x0A: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 00000801 + estri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550005 flags 00000841 + estri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000841 + estrm $0x4A: xmm0 00000000000000000000ffffffffffff rcx 5555555555555555 flags 00000841 + estrm $0x0A: xmm0 0000000000000000000000000000003f rcx 5555555555555555 flags 00000841 + +rdx ffffffffffffffef argL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa rax 0000000000000006 argR aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + istri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550007 flags 00000801 + istri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000801 + istrm $0x4A: xmm0 0000000000000000ffffffffffffffff rcx 5555555555555555 flags 00000801 + istrm $0x0A: xmm0 000000000000000000000000000000ff rcx 5555555555555555 flags 00000801 + estri $0x4A: xmm0 55555555555555555555555555555555 rcx 5555555555550005 flags 00000841 + estri $0x0A: xmm0 55555555555555555555555555555555 rcx 5555555555550000 flags 00000841 + estrm $0x4A: xmm0 00000000000000000000ffffffffffff rcx 5555555555555555 flags 00000841 + estrm $0x0A: xmm0 0000000000000000000000000000003f rcx 5555555555555555 flags 00000841 Added: trunk/none/tests/amd64/pcmpxstrx64.vgtest =================================================================== --- trunk/none/tests/amd64/pcmpxstrx64.vgtest (rev 0) +++ trunk/none/tests/amd64/pcmpxstrx64.vgtest 2011-01-17 11:42:19 UTC (rev 11501) @@ -0,0 +1,3 @@ +prog: pcmpxstrx64 +prereq: ../../../tests/x86_amd64_features amd64-sse42 +vgopts: -q |
|
From: <sv...@va...> - 2011-01-17 11:17:41
|
Author: sewardj Date: 2011-01-17 11:17:33 +0000 (Mon, 17 Jan 2011) New Revision: 11500 Log: Connect up sse4 tests to the build system. Added: trunk/none/tests/amd64/sse4-64.stderr.exp trunk/none/tests/amd64/sse4-64.stdout.exp trunk/none/tests/amd64/sse4-64.vgtest Modified: trunk/none/tests/amd64/Makefile.am [... diff too large to include ...] |
|
From: <sv...@va...> - 2011-01-17 11:15:57
|
Author: sewardj
Date: 2011-01-17 11:15:48 +0000 (Mon, 17 Jan 2011)
New Revision: 11499
Log:
Add build system goop for testing SSE4.2 instructions.
Modified:
trunk/configure.in
trunk/tests/x86_amd64_features.c
Modified: trunk/configure.in
===================================================================
--- trunk/configure.in 2011-01-17 10:40:53 UTC (rev 11498)
+++ trunk/configure.in 2011-01-17 11:15:48 UTC (rev 11499)
@@ -1410,10 +1410,9 @@
AM_CONDITIONAL(BUILD_SSSE3_TESTS, test x$ac_have_as_ssse3 = xyes)
-# Note: we're really checking the assembler-level support, not gcc's ;
-# C-level code might require the flag -mpclmul be passed to gcc (e.g. to
-# compile code which uses wmmintrin.h). Doesn't matter since tests also
-# use inline assembly directly
+# does the x86/amd64 assembler understand the PCLMULQDQ instruction?
+# Note, this doesn't generate a C-level symbol. It generates a
+# automake-level symbol (BUILD_PCLMULQDQ_TESTS), used in test Makefile.am's
AC_MSG_CHECKING([if x86/amd64 assembler supports 'pclmulqdq'])
AC_TRY_COMPILE(, [
do {
@@ -1432,6 +1431,9 @@
AM_CONDITIONAL(BUILD_PCLMULQDQ_TESTS, test x$ac_have_as_pclmulqdq = xyes)
+# does the x86/amd64 assembler understand the LZCNT instruction?
+# Note, this doesn't generate a C-level symbol. It generates a
+# automake-level symbol (BUILD_LZCNT_TESTS), used in test Makefile.am's
AC_MSG_CHECKING([if x86/amd64 assembler supports 'lzcnt'])
AC_TRY_COMPILE([], [
@@ -1449,6 +1451,29 @@
AM_CONDITIONAL([BUILD_LZCNT_TESTS], [test x$ac_have_as_lzcnt = xyes])
+
+# does the x86/amd64 assembler understand SSE 4.2 instructions?
+# Note, this doesn't generate a C-level symbol. It generates a
+# automake-level symbol (BUILD_SSE42_TESTS), used in test Makefile.am's
+AC_MSG_CHECKING([if x86/amd64 assembler speaks SSE4.2])
+
+AC_TRY_COMPILE(, [
+ do { long long int x;
+ __asm__ __volatile__(
+ "crc32q %%r15,%%r15" : : : "r15" ); }
+ while (0)
+],
+[
+ac_have_as_sse42=yes
+AC_MSG_RESULT([yes])
+], [
+ac_have_as_sse42=no
+AC_MSG_RESULT([no])
+])
+
+AM_CONDITIONAL(BUILD_SSE42_TESTS, test x$ac_have_as_sse42 = xyes)
+
+
# XXX JRS 2010 Oct 13: what is this for? For sure, we don't need this
# when building the tool executables. I think we should get rid of it.
#
Modified: trunk/tests/x86_amd64_features.c
===================================================================
--- trunk/tests/x86_amd64_features.c 2011-01-17 10:40:53 UTC (rev 11498)
+++ trunk/tests/x86_amd64_features.c 2011-01-17 11:15:48 UTC (rev 11499)
@@ -92,6 +92,9 @@
level = 0x80000001;
cmask = 1 << 5;
require_amd = True;
+ } else if ( strcmp( cpu, "amd64-sse42" ) == 0 ) {
+ level = 1;
+ cmask = 1 << 20;
#endif
} else {
return 2; // Unrecognised feature.
|
|
From: <sv...@va...> - 2011-01-17 10:41:02
|
Author: sewardj
Date: 2011-01-17 10:40:53 +0000 (Mon, 17 Jan 2011)
New Revision: 11498
Log:
Add test cases for EXTRACTPS, BLENDVPD, BLENDVPS, PBLENDVB.
Modified:
trunk/none/tests/amd64/sse4-64.c
Modified: trunk/none/tests/amd64/sse4-64.c
===================================================================
--- trunk/none/tests/amd64/sse4-64.c 2011-01-14 18:48:43 UTC (rev 11497)
+++ trunk/none/tests/amd64/sse4-64.c 2011-01-17 10:40:53 UTC (rev 11498)
@@ -1726,6 +1726,17 @@
}
+void test_EXTRACTPS ( void )
+{
+ V128 src;
+ randV128(&src);
+ DO_imm_r_to_mandrscalar("extractps", 0, src, "d");
+ DO_imm_r_to_mandrscalar("extractps", 1, src, "d");
+ DO_imm_r_to_mandrscalar("extractps", 2, src, "d");
+ DO_imm_r_to_mandrscalar("extractps", 3, src, "d");
+}
+
+
void test_PHMINPOSUW ( void )
{
V128 src, dst;
@@ -3533,23 +3544,226 @@
}
}
+/* ------------ PBLENDVB ------------ */
+
+void do_PBLENDVB ( Bool mem, V128* xmm0, V128* src, /*MOD*/V128* dst )
+{
+ if (mem) {
+ __asm__ __volatile__(
+ "movupd (%2), %%xmm0" "\n\t"
+ "movupd (%1), %%xmm11" "\n\t"
+ "pblendvb (%0), %%xmm11" "\n\t"
+ "movupd %%xmm11, (%1)" "\n"
+ : /*OUT*/
+ : /*IN*/ "r"(src), "r"(dst), "r"(xmm0)
+ : /*TRASH*/ "xmm11","xmm0"
+ );
+ } else {
+ __asm__ __volatile__(
+ "movupd (%2), %%xmm0" "\n\t"
+ "movupd (%1), %%xmm11" "\n\t"
+ "movupd (%0), %%xmm2" "\n\t"
+ "pblendvb %%xmm2, %%xmm11" "\n\t"
+ "movupd %%xmm11, (%1)" "\n"
+ : /*OUT*/
+ : /*IN*/ "r"(src), "r"(dst), "r"(xmm0)
+ : /*TRASH*/ "xmm11","xmm2","xmm0"
+ );
+ }
+}
+
+void test_PBLENDVB ( void )
+{
+ V128 xmm0, src, dst, t_xmm0, t_src, t_dst;
+ Int i;
+ for (i = 0; i < 10; i++) {
+ randV128(&t_xmm0);
+ randV128(&t_src);
+ randV128(&t_dst);
+
+ memcpy(&xmm0, &t_xmm0, 16);
+ memcpy(&src, &t_src, 16);
+ memcpy(&dst, &t_dst, 16);
+ do_PBLENDVB(False/*reg*/, &xmm0, &src, &dst);
+ printf("r pblendvb ");
+ showV128(&t_xmm0);
+ printf(" ");
+ showV128(&t_src);
+ printf(" ");
+ showV128(&t_dst);
+ printf(" -> ");
+ showV128(&dst);
+ printf("\n");
+
+ memcpy(&xmm0, &t_xmm0, 16);
+ memcpy(&src, &t_src, 16);
+ memcpy(&dst, &t_dst, 16);
+ do_PBLENDVB(True/*mem*/, &xmm0, &src, &dst);
+ printf("m pblendvb ");
+ showV128(&t_xmm0);
+ printf(" ");
+ showV128(&t_src);
+ printf(" ");
+ showV128(&t_dst);
+ printf(" -> ");
+ showV128(&dst);
+ printf("\n");
+ }
+}
+
+/* ------------ BLENDVPD ------------ */
+
+void do_BLENDVPD ( Bool mem, V128* xmm0, V128* src, /*MOD*/V128* dst )
+{
+ if (mem) {
+ __asm__ __volatile__(
+ "movupd (%2), %%xmm0" "\n\t"
+ "movupd (%1), %%xmm11" "\n\t"
+ "blendvpd (%0), %%xmm11" "\n\t"
+ "movupd %%xmm11, (%1)" "\n"
+ : /*OUT*/
+ : /*IN*/ "r"(src), "r"(dst), "r"(xmm0)
+ : /*TRASH*/ "xmm11","xmm0"
+ );
+ } else {
+ __asm__ __volatile__(
+ "movupd (%2), %%xmm0" "\n\t"
+ "movupd (%1), %%xmm11" "\n\t"
+ "movupd (%0), %%xmm2" "\n\t"
+ "blendvpd %%xmm2, %%xmm11" "\n\t"
+ "movupd %%xmm11, (%1)" "\n"
+ : /*OUT*/
+ : /*IN*/ "r"(src), "r"(dst), "r"(xmm0)
+ : /*TRASH*/ "xmm11","xmm2","xmm0"
+ );
+ }
+}
+
+void test_BLENDVPD ( void )
+{
+ V128 xmm0, src, dst, t_xmm0, t_src, t_dst;
+ Int i;
+ for (i = 0; i < 10; i++) {
+ randV128(&t_xmm0);
+ randV128(&t_src);
+ randV128(&t_dst);
+
+ memcpy(&xmm0, &t_xmm0, 16);
+ memcpy(&src, &t_src, 16);
+ memcpy(&dst, &t_dst, 16);
+ do_BLENDVPD(False/*reg*/, &xmm0, &src, &dst);
+ printf("r blendvpd ");
+ showV128(&t_xmm0);
+ printf(" ");
+ showV128(&t_src);
+ printf(" ");
+ showV128(&t_dst);
+ printf(" -> ");
+ showV128(&dst);
+ printf("\n");
+
+ memcpy(&xmm0, &t_xmm0, 16);
+ memcpy(&src, &t_src, 16);
+ memcpy(&dst, &t_dst, 16);
+ do_BLENDVPD(True/*mem*/, &xmm0, &src, &dst);
+ printf("m blendvpd ");
+ showV128(&t_xmm0);
+ printf(" ");
+ showV128(&t_src);
+ printf(" ");
+ showV128(&t_dst);
+ printf(" -> ");
+ showV128(&dst);
+ printf("\n");
+ }
+}
+
+/* ------------ BLENDVPS ------------ */
+
+void do_BLENDVPS ( Bool mem, V128* xmm0, V128* src, /*MOD*/V128* dst )
+{
+ if (mem) {
+ __asm__ __volatile__(
+ "movupd (%2), %%xmm0" "\n\t"
+ "movupd (%1), %%xmm11" "\n\t"
+ "blendvps (%0), %%xmm11" "\n\t"
+ "movupd %%xmm11, (%1)" "\n"
+ : /*OUT*/
+ : /*IN*/ "r"(src), "r"(dst), "r"(xmm0)
+ : /*TRASH*/ "xmm11","xmm0"
+ );
+ } else {
+ __asm__ __volatile__(
+ "movupd (%2), %%xmm0" "\n\t"
+ "movupd (%1), %%xmm11" "\n\t"
+ "movupd (%0), %%xmm2" "\n\t"
+ "blendvps %%xmm2, %%xmm11" "\n\t"
+ "movupd %%xmm11, (%1)" "\n"
+ : /*OUT*/
+ : /*IN*/ "r"(src), "r"(dst), "r"(xmm0)
+ : /*TRASH*/ "xmm11","xmm2","xmm0"
+ );
+ }
+}
+
+void test_BLENDVPS ( void )
+{
+ V128 xmm0, src, dst, t_xmm0, t_src, t_dst;
+ Int i;
+ for (i = 0; i < 10; i++) {
+ randV128(&t_xmm0);
+ randV128(&t_src);
+ randV128(&t_dst);
+
+ memcpy(&xmm0, &t_xmm0, 16);
+ memcpy(&src, &t_src, 16);
+ memcpy(&dst, &t_dst, 16);
+ do_BLENDVPS(False/*reg*/, &xmm0, &src, &dst);
+ printf("r blendvps ");
+ showV128(&t_xmm0);
+ printf(" ");
+ showV128(&t_src);
+ printf(" ");
+ showV128(&t_dst);
+ printf(" -> ");
+ showV128(&dst);
+ printf("\n");
+
+ memcpy(&xmm0, &t_xmm0, 16);
+ memcpy(&src, &t_src, 16);
+ memcpy(&dst, &t_dst, 16);
+ do_BLENDVPS(True/*mem*/, &xmm0, &src, &dst);
+ printf("m blendvps ");
+ showV128(&t_xmm0);
+ printf(" ");
+ showV128(&t_src);
+ printf(" ");
+ showV128(&t_dst);
+ printf(" -> ");
+ showV128(&dst);
+ printf("\n");
+ }
+}
+
+/* ------------ main ------------ */
+
int main ( int argc, char** argv )
{
#if 1
// ------ SSE 4.1 ------
test_BLENDPD(); // done Apr.01.2010
test_BLENDPS(); // done Apr.02.2010
- //test_PBLENDW();
- // BLENDVPD
- // BLENDVPS
+ test_PBLENDW();
+ test_PBLENDVB();
+ test_BLENDVPD();
+ test_BLENDVPS();
test_DPPD(); // done Apr.08.2010
test_DPPS(); // done Apr.09.2010
- // EXTRACTPS
+ test_EXTRACTPS();
test_INSERTPS(); // done Apr.01.2010
- // MOVNTDQA
+ // MOVNTDQA ***
//test_MPSADBW();
//test_PACKUSDW();
- // PBLENDVB
//test_PCMPEQQ();
test_PEXTRB(); // done Apr.15.2010
test_PEXTRD(); // done Apr.14.2010
@@ -3557,7 +3771,7 @@
test_PEXTRW(); // done Apr.14.2010
test_PINSRQ(); // done Apr.16.2010
test_PINSRD(); // todo
- //test_PINSRW(); // todo
+ test_PINSRW(); /* Umm, this is SSE2, not SSE4. Right? */
test_PINSRB(); // todo
//test_PHMINPOSUW();
test_PMAXSB();
@@ -3596,9 +3810,17 @@
test_ROUNDPS_w_mxcsr_rounding();
// ------ SSE 4.2 ------
test_PCMPGTQ();
+ // CRC32B,Q
+
#else
- test_PTEST();
+#if 0
+ test_MPSADBW();
+ test_PACKUSDW();
+ test_PCMPEQQ();
+ test_PHMINPOSUW();
+ test_PMULDQ();
#endif
+#endif
return 0;
|
|
From: <sv...@va...> - 2011-01-17 10:32:27
|
Author: sewardj
Date: 2011-01-17 10:32:18 +0000 (Mon, 17 Jan 2011)
New Revision: 2076
Log:
Implement SSE4.x EXTRACTPS. Fixes #258870.
Implement SSE4.x BLENDVPD, BLENDVPS, PBLENDVB. Fixes #256968 + dup #263376.
Modified:
trunk/priv/guest_amd64_toIR.c
trunk/priv/host_amd64_isel.c
trunk/priv/host_generic_simd128.c
trunk/priv/host_generic_simd128.h
Modified: trunk/priv/guest_amd64_toIR.c
===================================================================
--- trunk/priv/guest_amd64_toIR.c 2011-01-11 19:55:39 UTC (rev 2075)
+++ trunk/priv/guest_amd64_toIR.c 2011-01-17 10:32:18 UTC (rev 2076)
@@ -14643,6 +14643,55 @@
goto decode_success;
}
+
+ /* 66 0F 3A 17 /r ib = EXTRACTPS reg/mem32, xmm2, imm8 Extract
+ float from xmm reg and store in gen.reg or mem. This is
+ identical to PEXTRD, except that REX.W appears to be ignored.
+ */
+ if ( have66noF2noF3( pfx )
+ && sz == 2 /* REX.W == 0; perhaps too strict? */
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x17 ) {
+
+ Int imm8_10;
+ IRTemp xmm_vec = newTemp(Ity_V128);
+ IRTemp src_dword = newTemp(Ity_I32);
+
+ modrm = insn[3];
+ assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
+ breakup128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
+
+ if ( epartIsReg( modrm ) ) {
+ imm8_10 = (Int)(insn[3+1] & 3);
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
+ imm8_10 = (Int)(insn[3+alen] & 3);
+ }
+
+ switch ( imm8_10 ) {
+ case 0: assign( src_dword, mkexpr(t0) ); break;
+ case 1: assign( src_dword, mkexpr(t1) ); break;
+ case 2: assign( src_dword, mkexpr(t2) ); break;
+ case 3: assign( src_dword, mkexpr(t3) ); break;
+ default: vassert(0);
+ }
+
+ if ( epartIsReg( modrm ) ) {
+ putIReg32( eregOfRexRM(pfx,modrm), mkexpr(src_dword) );
+ delta += 3+1+1;
+ DIP( "extractps $%d, %s,%s\n", imm8_10,
+ nameXMMReg( gregOfRexRM(pfx, modrm) ),
+ nameIReg32( eregOfRexRM(pfx, modrm) ) );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(src_dword) );
+ delta += 3+alen+1;
+ DIP( "extractps $%d, %s,%s\n",
+ imm8_10, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
+ }
+
+ goto decode_success;
+ }
+
+
/* 66 0F 38 37 = PCMPGTQ
64x2 comparison (signed, presumably; the Intel docs don't say :-)
*/
@@ -15731,7 +15780,75 @@
goto decode_success;
}
+ /* 66 0F 38 15 /r = BLENDVPD xmm1, xmm2/m128 (double gran)
+ 66 0F 38 14 /r = BLENDVPS xmm1, xmm2/m128 (float gran)
+ 66 0F 38 10 /r = PBLENDVB xmm1, xmm2/m128 (byte gran)
+ Blend at various granularities, with XMM0 (implicit operand)
+ providing the controlling mask.
+ */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x15 || insn[2] == 0x14 || insn[2] == 0x10)) {
+ modrm = insn[3];
+ HChar* nm = NULL;
+ UInt gran = 0;
+ IROp opSAR = Iop_INVALID;
+ switch (insn[2]) {
+ case 0x15:
+ nm = "blendvpd"; gran = 8; opSAR = Iop_SarN64x2;
+ break;
+ case 0x14:
+ nm = "blendvps"; gran = 4; opSAR = Iop_SarN32x4;
+ break;
+ case 0x10:
+ nm = "pblendvb"; gran = 1; opSAR = Iop_SarN8x16;
+ break;
+ }
+ vassert(nm);
+
+ IRTemp vecE = newTemp(Ity_V128);
+ IRTemp vecG = newTemp(Ity_V128);
+ IRTemp vec0 = newTemp(Ity_V128);
+
+ if ( epartIsReg(modrm) ) {
+ assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm)));
+ delta += 3+1;
+ DIP( "%s %s,%s\n", nm,
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
+ delta += 3+alen;
+ DIP( "%s %s,%s\n", nm,
+ dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm)));
+ assign(vec0, getXMMReg(0));
+
+ /* Now the tricky bit is to convert vec0 into a suitable mask,
+ by copying the most significant bit of each lane into all
+ positions in the lane. */
+ IRTemp sh = newTemp(Ity_I8);
+ assign(sh, mkU8(8 * gran - 1));
+
+ IRTemp mask = newTemp(Ity_V128);
+ assign(mask, binop(opSAR, mkexpr(vec0), mkexpr(sh)));
+
+ IRTemp notmask = newTemp(Ity_V128);
+ assign(notmask, unop(Iop_NotV128, mkexpr(mask)));
+
+ IRExpr* res = binop(Iop_OrV128,
+ binop(Iop_AndV128, mkexpr(vecE), mkexpr(mask)),
+ binop(Iop_AndV128, mkexpr(vecG), mkexpr(notmask)));
+ putXMMReg(gregOfRexRM(pfx, modrm), res);
+
+ goto decode_success;
+ }
+
/* ---------------------------------------------------- */
/* --- end of the SSE4 decoder --- */
/* ---------------------------------------------------- */
Modified: trunk/priv/host_amd64_isel.c
===================================================================
--- trunk/priv/host_amd64_isel.c 2011-01-11 19:55:39 UTC (rev 2075)
+++ trunk/priv/host_amd64_isel.c 2011-01-17 10:32:18 UTC (rev 2076)
@@ -3679,6 +3679,54 @@
return dst;
}
+ case Iop_SarN64x2: fn = (HWord)h_generic_calc_SarN64x2;
+ goto do_SseAssistedVectorAndScalar;
+ case Iop_SarN8x16: fn = (HWord)h_generic_calc_SarN8x16;
+ goto do_SseAssistedVectorAndScalar;
+ do_SseAssistedVectorAndScalar: {
+ /* RRRufff! RRRufff code is what we're generating here. Oh
+ well. */
+ vassert(fn != 0);
+ HReg dst = newVRegV(env);
+ HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ HReg argp = newVRegI(env);
+ /* subq $112, %rsp -- make a space*/
+ sub_from_rsp(env, 112);
+ /* leaq 48(%rsp), %r_argp -- point into it */
+ addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
+ argp));
+ /* andq $-16, %r_argp -- 16-align the pointer */
+ addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
+ AMD64RMI_Imm( ~(UInt)15 ),
+ argp));
+ /* Prepare 2 vector arg regs:
+ leaq 0(%r_argp), %rdi
+ leaq 16(%r_argp), %rsi
+ */
+ addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
+ hregAMD64_RDI()));
+ addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
+ hregAMD64_RSI()));
+ /* Store the vector arg, at (%rsi):
+ movupd %argL, 0(%rsi)
+ */
+ addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL,
+ AMD64AMode_IR(0, hregAMD64_RSI())));
+ /* And get the scalar value into rdx */
+ addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RDX()));
+
+ /* call the helper */
+ addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3 ));
+ /* fetch the result from memory, using %r_argp, which the
+ register allocator will keep alive across the call. */
+ addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst,
+ AMD64AMode_IR(0, argp)));
+ /* and finally, clear the space */
+ add_to_rsp(env, 112);
+ return dst;
+ }
+
default:
break;
} /* switch (e->Iex.Binop.op) */
Modified: trunk/priv/host_generic_simd128.c
===================================================================
--- trunk/priv/host_generic_simd128.c 2011-01-11 19:55:39 UTC (rev 2075)
+++ trunk/priv/host_generic_simd128.c 2011-01-17 10:32:18 UTC (rev 2076)
@@ -94,6 +94,16 @@
? 0xFFFFFFFFFFFFFFFFULL : 0ULL;
}
+static inline ULong sar64 ( ULong v, UInt n )
+{
+ return ((Long)v) >> n;
+}
+
+static inline UChar sar8 ( UChar v, UInt n )
+{
+ return toUChar(((Char)v) >> n);
+}
+
void h_generic_calc_Mul32x4 ( /*OUT*/V128* res,
V128* argL, V128* argR )
{
@@ -214,7 +224,45 @@
res->w64[1] = cmpGT64S(argL->w64[1], argR->w64[1]);
}
+/* ------------ Shifting ------------ */
+/* Note that because these primops are undefined if the shift amount
+ equals or exceeds the lane width, the shift amount is masked so
+ that the scalar shifts are always in range. In fact, given the
+ semantics of these primops (Sar64x2, etc) it is an error if in
+ fact we are ever given an out-of-range shift amount.
+*/
+void h_generic_calc_SarN64x2 ( /*OUT*/V128* res,
+ V128* argL, UInt nn)
+{
+ /* vassert(nn < 64); */
+ nn &= 63;
+ res->w64[0] = sar64(argL->w64[0], nn);
+ res->w64[1] = sar64(argL->w64[1], nn);
+}
+void h_generic_calc_SarN8x16 ( /*OUT*/V128* res,
+ V128* argL, UInt nn)
+{
+ /* vassert(nn < 8); */
+ nn &= 7;
+ res->w8[ 0] = sar8(argL->w8[ 0], nn);
+ res->w8[ 1] = sar8(argL->w8[ 1], nn);
+ res->w8[ 2] = sar8(argL->w8[ 2], nn);
+ res->w8[ 3] = sar8(argL->w8[ 3], nn);
+ res->w8[ 4] = sar8(argL->w8[ 4], nn);
+ res->w8[ 5] = sar8(argL->w8[ 5], nn);
+ res->w8[ 6] = sar8(argL->w8[ 6], nn);
+ res->w8[ 7] = sar8(argL->w8[ 7], nn);
+ res->w8[ 8] = sar8(argL->w8[ 8], nn);
+ res->w8[ 9] = sar8(argL->w8[ 9], nn);
+ res->w8[10] = sar8(argL->w8[10], nn);
+ res->w8[11] = sar8(argL->w8[11], nn);
+ res->w8[12] = sar8(argL->w8[12], nn);
+ res->w8[13] = sar8(argL->w8[13], nn);
+ res->w8[14] = sar8(argL->w8[14], nn);
+ res->w8[15] = sar8(argL->w8[15], nn);
+}
+
/*---------------------------------------------------------------*/
/*--- end host_generic_simd128.c ---*/
/*---------------------------------------------------------------*/
Modified: trunk/priv/host_generic_simd128.h
===================================================================
--- trunk/priv/host_generic_simd128.h 2011-01-11 19:55:39 UTC (rev 2075)
+++ trunk/priv/host_generic_simd128.h 2011-01-17 10:32:18 UTC (rev 2076)
@@ -58,6 +58,8 @@
extern void h_generic_calc_Max8Sx16 ( /*OUT*/V128*, V128*, V128* );
extern void h_generic_calc_Min8Sx16 ( /*OUT*/V128*, V128*, V128* );
extern void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_SarN64x2 ( /*OUT*/V128*, V128*, UInt );
+extern void h_generic_calc_SarN8x16 ( /*OUT*/V128*, V128*, UInt );
#endif /* ndef __VEX_HOST_GENERIC_SIMD128_H */
|