You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
1
(13) |
2
(33) |
3
(25) |
4
(22) |
5
(22) |
6
(21) |
7
(19) |
|
8
(29) |
9
(34) |
10
(29) |
11
(37) |
12
(36) |
13
(28) |
14
(25) |
|
15
(28) |
16
(23) |
17
(36) |
18
(21) |
19
(12) |
20
(14) |
21
(10) |
|
22
(7) |
23
(15) |
24
(41) |
25
(15) |
26
(9) |
27
(7) |
28
(6) |
|
29
(16) |
30
(24) |
31
(22) |
|
|
|
|
|
From: Tom H. <th...@cy...> - 2005-05-11 07:47:38
|
Nightly build on ginetta ( i686, Red Hat 8.0 ) started at 2005-05-11 03:20:02 BST Checking out vex source tree ... done Building vex ... done Checking out valgrind source tree ... done Configuring valgrind ... done Building valgrind ... done Running regression tests ... failed Regression test results follow == 173 tests, 3 stderr failures, 0 stdout failures ================= corecheck/tests/fdleak_cmsg (stderr) none/tests/faultstatus (stderr) none/tests/x86/int (stderr) |
|
From: Tom H. <th...@cy...> - 2005-05-11 07:32:26
|
Nightly build on gill ( x86_64, Fedora Core 2 ) started at 2005-05-11 03:00:02 BST Checking out vex source tree ... done Building vex ... done Checking out valgrind source tree ... done Configuring valgrind ... done Building valgrind ... done Running regression tests ... failed Regression test results follow == 150 tests, 77 stderr failures, 4 stdout failures ================= memcheck/tests/addressable (stderr) memcheck/tests/badaddrvalue (stderr) memcheck/tests/badfree-2trace (stderr) memcheck/tests/badfree (stderr) memcheck/tests/badjump (stderr) memcheck/tests/badjump2 (stderr) memcheck/tests/badloop (stderr) memcheck/tests/badpoll (stderr) memcheck/tests/badrw (stderr) memcheck/tests/brk (stderr) memcheck/tests/brk2 (stderr) memcheck/tests/buflen_check (stderr) memcheck/tests/clientperm (stderr) memcheck/tests/custom_alloc (stderr) memcheck/tests/describe-block (stderr) memcheck/tests/doublefree (stderr) memcheck/tests/error_counts (stdout) memcheck/tests/errs1 (stderr) memcheck/tests/execve (stderr) memcheck/tests/execve2 (stderr) memcheck/tests/exitprog (stderr) memcheck/tests/fprw (stderr) memcheck/tests/fwrite (stderr) memcheck/tests/inits (stderr) memcheck/tests/inline (stderr) memcheck/tests/leak-0 (stderr) memcheck/tests/leak-cycle (stderr) memcheck/tests/leak-regroot (stderr) memcheck/tests/leak-tree (stderr) memcheck/tests/leakotron (stdout) memcheck/tests/malloc1 (stderr) memcheck/tests/malloc2 (stderr) memcheck/tests/malloc3 (stderr) memcheck/tests/manuel1 (stderr) memcheck/tests/manuel2 (stderr) memcheck/tests/manuel3 (stderr) memcheck/tests/match-overrun (stderr) memcheck/tests/memalign2 (stderr) memcheck/tests/memalign_test (stderr) memcheck/tests/memcmptest (stderr) memcheck/tests/mempool (stderr) memcheck/tests/mismatches (stderr) memcheck/tests/mmaptest (stderr) memcheck/tests/nanoleak (stderr) memcheck/tests/nanoleak_supp (stderr) memcheck/tests/new_nothrow (stderr) memcheck/tests/new_override (stderr) memcheck/tests/null_socket (stderr) memcheck/tests/overlap (stderr) memcheck/tests/pointer-trace (stderr) memcheck/tests/post-syscall (stderr) memcheck/tests/realloc1 (stderr) memcheck/tests/realloc2 (stderr) memcheck/tests/realloc3 (stderr) memcheck/tests/sigaltstack (stderr) memcheck/tests/signal2 (stderr) memcheck/tests/sigprocmask (stderr) memcheck/tests/str_tester (stderr) memcheck/tests/supp1 (stderr) memcheck/tests/supp2 (stderr) memcheck/tests/suppfree (stderr) memcheck/tests/toobig-allocs (stderr) memcheck/tests/trivialleak (stderr) memcheck/tests/vgtest_ume (stderr) memcheck/tests/weirdioctl (stderr) memcheck/tests/writev (stderr) memcheck/tests/zeropage (stderr) corecheck/tests/fdleak_cmsg (stderr) corecheck/tests/fdleak_creat (stderr) corecheck/tests/fdleak_dup (stderr) corecheck/tests/fdleak_dup2 (stderr) corecheck/tests/fdleak_fcntl (stderr) corecheck/tests/fdleak_ipv4 (stderr) corecheck/tests/fdleak_open (stderr) corecheck/tests/fdleak_pipe (stderr) corecheck/tests/fdleak_socketpair (stderr) massif/tests/toobig-allocs (stderr) none/tests/faultstatus (stderr) none/tests/selfrun (stdout) none/tests/selfrun (stderr) none/tests/yield (stdout) |
|
From: Tom H. <th...@cy...> - 2005-05-11 07:24:09
|
Nightly build on ginetta ( Red Hat 8.0 ) started at 2005-05-11 03:10:02 BST Checking out source tree ... done Configuring ... done Building ... done Running regression tests ... done Last 20 lines of log.verbose follow insn_cmov: valgrind ./insn_cmov insn_fpu: valgrind ./insn_fpu insn_mmx: valgrind ./insn_mmx insn_mmxext: valgrind ./insn_mmxext insn_sse: valgrind ./insn_sse insn_sse2: (skipping, prereq failed: ../../../tests/cputest x86-sse2) int: valgrind ./int pushpopseg: valgrind ./pushpopseg rcl_assert: valgrind ./rcl_assert seg_override: valgrind ./seg_override -- Finished tests in none/tests/x86 ------------------------------------ yield: valgrind ./yield -- Finished tests in none/tests ---------------------------------------- == 205 tests, 3 stderr failures, 0 stdout failures ================= memcheck/tests/pth_once (stderr) memcheck/tests/scalar (stderr) memcheck/tests/threadederrno (stderr) make: *** [regtest] Error 1 |
|
From: Nicholas N. <nj...@cs...> - 2005-05-11 03:11:02
|
Hi, In vg_main.c:process_cmd_line_options(), around line 1856, there's a switch on VG_(clo_log_to). In the File/FileExactly cases, VG_(safe_fd)() is called on the resulting file descriptors. In the Socket case, it's not called. Then around line 1957, 7 lines of code are executed that moves fd into a safe range. This looks almost identical to what VG_(safe_fd)() is doing. One difference is that it doesn't close the fd if the DUPFD operation fails. (VG_(safe_fd)() does close it in that case). However, the close() is pretty pointless since VG_(safe_fd)() asserts shortly after in the failure case, whereas the 7 lines don't. So, some questions: - can the VG_(safe_fd)() calls be removed from the switch, since the post-switch 7 linrd do basically the same thing? - about VG_(safe_fd)() itself: is it appropriate that if the DUPFD ever fails it will die with an assertion failure? - can we call VG_(safe_fd)() in the post-switch code rather than having those 7 lines? Currently those lines just print out a warning and continue if the DUPFD fails -- perhaps aborting would be better? Thanks. N |
|
From: <js...@ac...> - 2005-05-11 03:04:45
|
Nightly build on phoenix ( SuSE 9.1 ) started at 2005-05-11 03:50:01 BST Checking out source tree ... done Configuring ... done Building ... done Running regression tests ... done Last 20 lines of log.verbose follow insn_mmx: valgrind ./insn_mmx insn_mmxext: (skipping, prereq failed: ../../../tests/cputest x86-mmxext) insn_sse: valgrind ./insn_sse insn_sse2: (skipping, prereq failed: ../../../tests/cputest x86-sse2) int: valgrind ./int pushpopseg: valgrind ./pushpopseg rcl_assert: valgrind ./rcl_assert seg_override: valgrind ./seg_override -- Finished tests in none/tests/x86 ------------------------------------ yield: valgrind ./yield -- Finished tests in none/tests ---------------------------------------- == 201 tests, 5 stderr failures, 0 stdout failures ================= memcheck/tests/pth_once (stderr) memcheck/tests/scalar (stderr) memcheck/tests/threadederrno (stderr) memcheck/tests/writev (stderr) corecheck/tests/fdleak_fcntl (stderr) make: *** [regtest] Error 1 |
|
From: <sv...@va...> - 2005-05-11 02:55:56
|
Author: sewardj
Date: 2005-05-11 03:55:54 +0100 (Wed, 11 May 2005)
New Revision: 1182
Modified:
trunk/priv/guest-amd64/toIR.c
Log:
To a first approximation, this commit completes SSE2 support for AMD64.
Modified: trunk/priv/guest-amd64/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-amd64/toIR.c 2005-05-11 02:55:00 UTC (rev 1181)
+++ trunk/priv/guest-amd64/toIR.c 2005-05-11 02:55:54 UTC (rev 1182)
@@ -8712,15 +8712,19 @@
/* else fall through */
}
=20
+ /* 66 0F 2B =3D MOVNTPD -- for us, just a plain SSE store. */
/* 0F 2B =3D MOVNTPS -- for us, just a plain SSE store. */
- if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
- && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x2B) {
+ if ( ( (haveNo66noF2noF3(pfx) && sz =3D=3D 4)
+ || (have66noF2noF3(pfx) && sz =3D=3D 2)=20
+ )
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x2B) {
modrm =3D getUChar(delta+2);
if (!epartIsReg(modrm)) {
addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
- DIP("movntps %s,%s\n", dis_buf,
- nameXMMReg(gregOfRexRM(pfx,modrm)));
+ DIP("movntp%s %s,%s\n", sz=3D=3D2 ? "d" : "s",
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
delta +=3D 2+alen;
goto decode_success;
}
@@ -9962,7 +9966,6 @@
if (epartIsReg(modrm)) {
delta +=3D 2+1;
if (sz =3D=3D 4) {
- goto decode_failure; /* awaiting test case */
putXMMReg(
gregOfRexRM(pfx,modrm),
unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) )=20
@@ -10043,67 +10046,65 @@
goto decode_success;
}
=20
-//.. /* F3 0F 6F =3D MOVDQU -- move from E (mem or xmm) to G (xmm). *=
/
-//.. /* Unfortunately can't simply use the MOVDQA case since the
-//.. prefix lengths are different (66 vs F3) */
-//.. if (insn[0] =3D=3D 0xF3 && insn[1] =3D=3D 0x0F && insn[2] =3D=3D=
0x6F) {
-//.. vassert(sz =3D=3D 4);
-//.. modrm =3D getUChar(delta+3);
-//.. if (epartIsReg(modrm)) {
-//.. putXMMReg( gregOfRM(modrm),=20
-//.. getXMMReg( eregOfRM(modrm) ));
-//.. DIP("movdqu %s,%s\n", nameXMMReg(eregOfRM(modrm)),
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. delta +=3D 3+1;
-//.. } else {
-//.. addr =3D disAMode ( &alen, sorb, delta+3, dis_buf );
-//.. putXMMReg( gregOfRM(modrm),=20
-//.. loadLE(Ity_V128, mkexpr(addr)) );
-//.. DIP("movdqu %s,%s\n", dis_buf,
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. delta +=3D 3+alen;
-//.. }
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* F3 0F 7F =3D MOVDQU -- move from G (xmm) to E (mem or xmm). *=
/
-//.. /* Unfortunately can't simply use the MOVDQA case since the
-//.. prefix lengths are different (66 vs F3) */
-//.. if (insn[0] =3D=3D 0xF3 && insn[1] =3D=3D 0x0F && insn[2] =3D=3D=
0x7F) {
-//.. vassert(sz =3D=3D 4);
-//.. modrm =3D getUChar(delta+3);
-//.. if (epartIsReg(modrm)) {
-//.. delta +=3D 3+1;
-//.. putXMMReg( eregOfRM(modrm),
-//.. getXMMReg(gregOfRM(modrm)) );
-//.. DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)),=20
-//.. nameXMMReg(eregOfRM(modrm)));
-//.. } else {
-//.. addr =3D disAMode( &alen, sorb, delta+3, dis_buf );
-//.. delta +=3D 3+alen;
-//.. storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
-//.. DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_bu=
f);
-//.. }
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* F2 0F D6 =3D MOVDQ2Q -- move from E (lo half xmm, not mem) to=
G (mmx). */
-//.. if (insn[0] =3D=3D 0xF2 && insn[1] =3D=3D 0x0F && insn[2] =3D=3D=
0xD6) {
-//.. vassert(sz =3D=3D 4);
-//.. modrm =3D getUChar(delta+3);
-//.. if (epartIsReg(modrm)) {
-//.. do_MMX_preamble();
-//.. putMMXReg( gregOfRM(modrm),=20
-//.. getXMMRegLane64( eregOfRM(modrm), 0 ));
-//.. DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRM(modrm)),
-//.. nameMMXReg(gregOfRM(modrm)));
-//.. delta +=3D 3+1;
-//.. goto decode_success;
-//.. } else {
-//.. /* fall through, apparently no mem case for this insn */
-//.. }
-//.. }
+ /* F3 0F 6F =3D MOVDQU -- move from E (mem or xmm) to G (xmm). */
+ if (haveF3no66noF2(pfx) && sz =3D=3D 4
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x6F) {
+ modrm =3D getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ putXMMReg( gregOfRexRM(pfx,modrm),=20
+ getXMMReg( eregOfRexRM(pfx,modrm) ));
+ DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+1;
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ putXMMReg( gregOfRexRM(pfx,modrm),=20
+ loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("movdqu %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+alen;
+ }
+ goto decode_success;
+ }
=20
+ /* F3 0F 7F =3D MOVDQU -- move from G (xmm) to E (mem or xmm). */
+ if (haveF3no66noF2(pfx) && sz =3D=3D 4
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x7F) {
+ modrm =3D getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ goto decode_failure; /* awaiting test case */
+ delta +=3D 2+1;
+ putXMMReg( eregOfRexRM(pfx,modrm),
+ getXMMReg(gregOfRexRM(pfx,modrm)) );
+ DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),=20
+ nameXMMReg(eregOfRexRM(pfx,modrm)));
+ } else {
+ addr =3D disAMode( &alen, pfx, delta+2, dis_buf, 0 );
+ delta +=3D 2+alen;
+ storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
+ DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_=
buf);
+ }
+ goto decode_success;
+ }
+
+ /* F2 0F D6 =3D MOVDQ2Q -- move from E (lo half xmm, not mem) to G (m=
mx). */
+ if (haveF2no66noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD6) {
+ modrm =3D getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ do_MMX_preamble();
+ putMMXReg( gregLO3ofRM(modrm),=20
+ getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
+ DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ delta +=3D 2+1;
+ goto decode_success;
+ } else {
+ /* apparently no mem case for this insn */
+ goto decode_failure;
+ }
+ }
+
/* 66 0F 16 =3D MOVHPD -- move from mem to high half of XMM. */
/* These seems identical to MOVHPS. This instruction encoding is
completely crazy. */
@@ -10174,46 +10175,50 @@
/* else fall through */
}
=20
-//.. /* 66 0F 50 =3D MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(=
E) to
-//.. 2 lowest bits of ireg(G) */
-//.. if (insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x50) {
-//.. modrm =3D getUChar(delta+2);
-//.. if (sz =3D=3D 2 && epartIsReg(modrm)) {
-//.. Int src;
-//.. t0 =3D newTemp(Ity_I32);
-//.. t1 =3D newTemp(Ity_I32);
-//.. delta +=3D 2+1;
-//.. src =3D eregOfRM(modrm);
-//.. assign( t0, binop( Iop_And32,
-//.. binop(Iop_Shr32, getXMMRegLane32(src,1)=
, mkU8(31)),
-//.. mkU32(1) ));
-//.. assign( t1, binop( Iop_And32,
-//.. binop(Iop_Shr32, getXMMRegLane32(src,3)=
, mkU8(30)),
-//.. mkU32(2) ));
-//.. putIReg(4, gregOfRM(modrm),
-//.. binop(Iop_Or32, mkexpr(t0), mkexpr(t1))
-//.. );
-//.. DIP("movmskpd %s,%s\n", nameXMMReg(src),=20
-//.. nameIReg(4, gregOfRM(modrm)));
-//.. goto decode_success;
-//.. }
-//.. /* else fall through */
-//.. }
-//..=20
-//.. /* 66 0F E7 =3D MOVNTDQ -- for us, just a plain SSE store. */
-//.. if (insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE7) {
-//.. modrm =3D getUChar(delta+2);
-//.. if (sz =3D=3D 2 && !epartIsReg(modrm)) {
-//.. addr =3D disAMode ( &alen, sorb, delta+2, dis_buf );
-//.. storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
-//.. DIP("movntdq %s,%s\n", dis_buf,
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. delta +=3D 2+alen;
-//.. goto decode_success;
-//.. }
-//.. /* else fall through */
-//.. }
+ /* 66 0F 50 =3D MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
+ 2 lowest bits of ireg(G) */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x50) {
+ modrm =3D getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ Int src;
+ t0 =3D newTemp(Ity_I32);
+ t1 =3D newTemp(Ity_I32);
+ delta +=3D 2+1;
+ src =3D eregOfRexRM(pfx,modrm);
+ assign( t0, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,1), mkU=
8(31)),
+ mkU32(1) ));
+ assign( t1, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,3), mkU=
8(30)),
+ mkU32(2) ));
+ putIReg32( gregOfRexRM(pfx,modrm),
+ binop(Iop_Or32, mkexpr(t0), mkexpr(t1))
+ );
+ DIP("movmskpd %s,%s\n", nameXMMReg(src),=20
+ nameIReg32(gregOfRexRM(pfx,modrm)));
+ goto decode_success;
+ }
+ /* else fall through */
+ goto decode_failure;
+ }
=20
+ /* 66 0F E7 =3D MOVNTDQ -- for us, just a plain SSE store. */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE7) {
+ modrm =3D getUChar(delta+2);
+ if (!epartIsReg(modrm)) {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
+ DIP("movntdq %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+alen;
+ goto decode_success;
+ }
+ /* else fall through */
+ goto decode_failure;
+ }
+
/* 0F C3 =3D MOVNTI -- for us, just a plain ireg store. */
if (haveNo66noF2noF3(pfx) &&
insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xC3) {
@@ -10247,34 +10252,36 @@
}
}
=20
-//.. /* F3 0F D6 =3D MOVQ2DQ -- move from E (mmx) to G (lo half xmm, =
zero
-//.. hi half). */
-//.. if (insn[0] =3D=3D 0xF3 && insn[1] =3D=3D 0x0F && insn[2] =3D=3D=
0xD6) {
-//.. vassert(sz =3D=3D 4);
-//.. modrm =3D getUChar(delta+3);
-//.. if (epartIsReg(modrm)) {
-//.. do_MMX_preamble();
-//.. putXMMReg( gregOfRM(modrm),=20
-//.. unop(Iop_64Uto128, getMMXReg( eregOfRM(modrm) )=
) );
-//.. DIP("movq2dq %s,%s\n", nameMMXReg(eregOfRM(modrm)),
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. delta +=3D 3+1;
-//.. goto decode_success;
-//.. } else {
-//.. /* fall through, apparently no mem case for this insn */
-//.. }
-//.. }
-//..=20
-//.. /* F3 0F 7E =3D MOVQ -- move 64 bits from E (mem or lo half xmm)=
to
-//.. G (lo half xmm). If E is mem, upper half of G is zeroed out.=
*/
+ /* F3 0F D6 =3D MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
+ hi half). */
+ if (haveF3no66noF2(pfx) && sz =3D=3D 4
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD6) {
+ modrm =3D getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ do_MMX_preamble();
+ putXMMReg( gregOfRexRM(pfx,modrm),=20
+ unop(Iop_64UtoV128, getMMXReg( eregLO3ofRM(modrm) ))=
);
+ DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+1;
+ goto decode_success;
+ } else {
+ /* apparently no mem case for this insn */
+ goto decode_failure;
+ }
+ }
+
+ /* F3 0F 7E =3D MOVQ -- move 64 bits from E (mem or lo half xmm) to
+ G (lo half xmm). If E is mem, upper half of G is zeroed out. */
/* F2 0F 10 =3D MOVSD -- move 64 bits from E (mem or lo half xmm) to
G (lo half xmm). If E is mem, upper half of G is zeroed out.
(original defn) */
- if ( (haveF2no66noF3(pfx) && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x=
10)
+ if ( (haveF2no66noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x10)
||=20
- (haveF3no66noF2(pfx) && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x=
7E)
+ (haveF3no66noF2(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x7E)
) {
- vassert(sz =3D=3D 4);
modrm =3D getUChar(delta+2);
if (epartIsReg(modrm)) {
putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
|
|
From: <sv...@va...> - 2005-05-11 02:55:03
|
Author: sewardj
Date: 2005-05-11 03:55:00 +0100 (Wed, 11 May 2005)
New Revision: 1181
Modified:
trunk/priv/guest-x86/toIR.c
Log:
Comment-only change.
Modified: trunk/priv/guest-x86/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-x86/toIR.c 2005-05-11 02:13:42 UTC (rev 1180)
+++ trunk/priv/guest-x86/toIR.c 2005-05-11 02:55:00 UTC (rev 1181)
@@ -7765,13 +7765,15 @@
}
=20
/* 0F 2B =3D MOVNTPS -- for us, just a plain SSE store. */
+ /* 66 0F 2B =3D MOVNTPD -- for us, just a plain SSE store. */
if (insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x2B) {
modrm =3D getIByte(delta+2);
if (!epartIsReg(modrm)) {
addr =3D disAMode ( &alen, sorb, delta+2, dis_buf );
storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
- DIP("movntps %s,%s\n", dis_buf,
- nameXMMReg(gregOfRM(modrm)));
+ DIP("movntp%s %s,%s\n", sz=3D=3D2 ? "d" : "s",
+ dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
delta +=3D 2+alen;
goto decode_success;
}
|
|
From: Tom H. <to...@co...> - 2005-05-11 02:36:02
|
Nightly build on dunsmere ( athlon, Fedora Core 3 ) started at 2005-05-11 03:30:03 BST Checking out vex source tree ... done Building vex ... done Checking out valgrind source tree ... done Configuring valgrind ... done Building valgrind ... done Running regression tests ... failed Regression test results follow == 175 tests, 167 stderr failures, 1 stdout failure ================= memcheck/tests/addressable (stderr) memcheck/tests/badaddrvalue (stderr) memcheck/tests/badfree-2trace (stderr) memcheck/tests/badfree (stderr) memcheck/tests/badjump (stderr) memcheck/tests/badjump2 (stderr) memcheck/tests/badloop (stderr) memcheck/tests/badpoll (stderr) memcheck/tests/badrw (stderr) memcheck/tests/brk (stderr) memcheck/tests/brk2 (stderr) memcheck/tests/buflen_check (stderr) memcheck/tests/clientperm (stderr) memcheck/tests/custom_alloc (stderr) memcheck/tests/describe-block (stderr) memcheck/tests/doublefree (stderr) memcheck/tests/errs1 (stderr) memcheck/tests/execve (stderr) memcheck/tests/execve2 (stderr) memcheck/tests/exitprog (stderr) memcheck/tests/fprw (stderr) memcheck/tests/fwrite (stderr) memcheck/tests/inits (stderr) memcheck/tests/inline (stderr) memcheck/tests/leak-0 (stderr) memcheck/tests/leak-cycle (stderr) memcheck/tests/leak-regroot (stderr) memcheck/tests/leak-tree (stderr) memcheck/tests/malloc1 (stderr) memcheck/tests/malloc2 (stderr) memcheck/tests/malloc3 (stderr) memcheck/tests/manuel1 (stderr) memcheck/tests/manuel2 (stderr) memcheck/tests/manuel3 (stderr) memcheck/tests/match-overrun (stderr) memcheck/tests/memalign2 (stderr) memcheck/tests/memalign_test (stderr) memcheck/tests/memcmptest (stderr) memcheck/tests/mempool (stderr) memcheck/tests/mismatches (stderr) memcheck/tests/mmaptest (stderr) memcheck/tests/nanoleak (stderr) memcheck/tests/nanoleak_supp (stderr) memcheck/tests/new_nothrow (stderr) memcheck/tests/new_override (stderr) memcheck/tests/null_socket (stderr) memcheck/tests/overlap (stderr) memcheck/tests/pointer-trace (stderr) memcheck/tests/post-syscall (stderr) memcheck/tests/realloc1 (stderr) memcheck/tests/realloc2 (stderr) memcheck/tests/realloc3 (stderr) memcheck/tests/sigaltstack (stderr) memcheck/tests/signal2 (stderr) memcheck/tests/sigprocmask (stderr) memcheck/tests/str_tester (stderr) memcheck/tests/supp1 (stderr) memcheck/tests/supp2 (stderr) memcheck/tests/suppfree (stderr) memcheck/tests/toobig-allocs (stderr) memcheck/tests/trivialleak (stderr) memcheck/tests/vgtest_ume (stderr) memcheck/tests/weirdioctl (stderr) memcheck/tests/writev (stderr) memcheck/tests/x86/fpeflags (stderr) memcheck/tests/x86/pushfpopf (stderr) memcheck/tests/x86/scalar (stderr) memcheck/tests/x86/scalar_exit_group (stderr) memcheck/tests/x86/scalar_fork (stderr) memcheck/tests/x86/scalar_supp (stderr) memcheck/tests/x86/scalar_vfork (stderr) memcheck/tests/x86/tronical (stderr) memcheck/tests/zeropage (stderr) cachegrind/tests/chdir (stderr) cachegrind/tests/dlclose (stderr) cachegrind/tests/x86/fpu-28-108 (stderr) corecheck/tests/as_mmap (stderr) corecheck/tests/as_shm (stderr) corecheck/tests/erringfds (stderr) corecheck/tests/fdleak_cmsg (stderr) corecheck/tests/fdleak_creat (stderr) corecheck/tests/fdleak_dup (stderr) corecheck/tests/fdleak_dup2 (stderr) corecheck/tests/fdleak_fcntl (stderr) corecheck/tests/fdleak_ipv4 (stderr) corecheck/tests/fdleak_open (stderr) corecheck/tests/fdleak_pipe (stderr) corecheck/tests/fdleak_socketpair (stderr) corecheck/tests/pth_atfork1 (stderr) corecheck/tests/pth_cancel1 (stderr) corecheck/tests/pth_cancel2 (stderr) corecheck/tests/pth_cvsimple (stderr) corecheck/tests/pth_empty (stderr) corecheck/tests/pth_exit (stderr) corecheck/tests/pth_exit2 (stderr) corecheck/tests/pth_mutexspeed (stderr) corecheck/tests/pth_once (stderr) corecheck/tests/pth_rwlock (stderr) corecheck/tests/res_search (stderr) corecheck/tests/sigkill (stderr) corecheck/tests/threadederrno (stderr) corecheck/tests/vgprintf (stderr) massif/tests/toobig-allocs (stderr) massif/tests/true_html (stderr) massif/tests/true_text (stderr) lackey/tests/true (stderr) none/tests/args (stderr) none/tests/async-sigs (stderr) none/tests/bitfield1 (stderr) none/tests/blockfault (stderr) none/tests/closeall (stderr) none/tests/coolo_sigaction (stderr) none/tests/coolo_strlen (stderr) none/tests/discard (stderr) none/tests/exec-sigmask (stderr) none/tests/execve (stderr) none/tests/faultstatus (stderr) none/tests/fcntl_setown (stderr) none/tests/floored (stderr) none/tests/fork (stderr) none/tests/fucomip (stderr) none/tests/gxx304 (stderr) none/tests/manythreads (stderr) none/tests/map_unaligned (stderr) none/tests/map_unmap (stderr) none/tests/mq (stderr) none/tests/mremap (stderr) none/tests/munmap_exe (stderr) none/tests/pending (stderr) none/tests/pth_blockedsig (stderr) none/tests/pth_stackalign (stderr) none/tests/rcrl (stderr) none/tests/readline1 (stderr) none/tests/resolv (stderr) none/tests/rlimit_nofile (stderr) none/tests/selfrun (stdout) none/tests/selfrun (stderr) none/tests/sem (stderr) none/tests/semlimit (stderr) none/tests/sha1_test (stderr) none/tests/shortpush (stderr) none/tests/shorts (stderr) none/tests/sigstackgrowth (stderr) none/tests/smc1 (stderr) none/tests/stackgrowth (stderr) none/tests/syscall-restart1 (stderr) none/tests/syscall-restart2 (stderr) none/tests/system (stderr) none/tests/thread-exits (stderr) none/tests/threaded-fork (stderr) none/tests/tls (stderr) none/tests/x86/badseg (stderr) none/tests/x86/bt_everything (stderr) none/tests/x86/bt_literal (stderr) none/tests/x86/cpuid (stderr) none/tests/x86/fpu_lazy_eflags (stderr) none/tests/x86/getseg (stderr) none/tests/x86/insn_basic (stderr) none/tests/x86/insn_cmov (stderr) none/tests/x86/insn_fpu (stderr) none/tests/x86/insn_mmx (stderr) none/tests/x86/insn_mmxext (stderr) none/tests/x86/insn_sse (stderr) none/tests/x86/int (stderr) none/tests/x86/pushpopseg (stderr) none/tests/x86/seg_override (stderr) none/tests/x86/sigcontext (stderr) none/tests/yield (stderr) |
|
From: Tom H. <to...@co...> - 2005-05-11 02:26:49
|
Nightly build on dunsmere ( Fedora Core 3 ) started at 2005-05-11 03:20:04 BST Checking out source tree ... done Configuring ... done Building ... done Running regression tests ... done Last 20 lines of log.verbose follow insn_mmx: valgrind ./insn_mmx insn_mmxext: valgrind ./insn_mmxext insn_sse: valgrind ./insn_sse insn_sse2: (skipping, prereq failed: ../../../tests/cputest x86-sse2) int: valgrind ./int sh: line 1: 4948 Segmentation fault VALGRINDLIB=/tmp/valgrind.11706/valgrind/.in_place /tmp/valgrind.11706/valgrind/./coregrind/valgrind --command-line-only=yes --memcheck:leak-check=no --addrcheck:leak-check=no --tool=none ./int >int.stdout.out 2>int.stderr.out pushpopseg: valgrind ./pushpopseg rcl_assert: valgrind ./rcl_assert seg_override: valgrind ./seg_override -- Finished tests in none/tests/x86 ------------------------------------ yield: valgrind ./yield -- Finished tests in none/tests ---------------------------------------- == 207 tests, 4 stderr failures, 0 stdout failures ================= memcheck/tests/execve (stderr) memcheck/tests/execve2 (stderr) memcheck/tests/scalar (stderr) memcheck/tests/scalar_supp (stderr) make: *** [regtest] Error 1 |
|
From: <sv...@va...> - 2005-05-11 02:13:45
|
Author: sewardj
Date: 2005-05-11 03:13:42 +0100 (Wed, 11 May 2005)
New Revision: 1180
Modified:
trunk/priv/guest-amd64/toIR.c
trunk/priv/host-amd64/hdefs.c
trunk/priv/host-amd64/hdefs.h
trunk/priv/host-amd64/isel.c
Log:
Lots more SSE2 instructions.
Modified: trunk/priv/guest-amd64/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-amd64/toIR.c 2005-05-11 00:03:06 UTC (rev 1179)
+++ trunk/priv/guest-amd64/toIR.c 2005-05-11 02:13:42 UTC (rev 1180)
@@ -9260,21 +9260,14 @@
goto decode_success;
}
=20
-//.. /* ---------------------------------------------------- */
-//.. /* --- end of the SSE decoder. --- */
-//.. /* ---------------------------------------------------- */
-//..=20
-//.. /* ---------------------------------------------------- */
-//.. /* --- start of the SSE2 decoder. --- */
-//.. /* ---------------------------------------------------- */
-//..=20
-//.. /* Skip parts of the decoder which don't apply given the stated
-//.. guest subarchitecture. */
-//.. if (subarch =3D=3D VexSubArchX86_sse0 || subarch =3D=3D VexSubAr=
chX86_sse1)
-//.. goto after_sse_decoders;
-//..=20
-//.. insn =3D (UChar*)&guest_code[delta];
+ /* ---------------------------------------------------- */
+ /* --- end of the SSE decoder. --- */
+ /* ---------------------------------------------------- */
=20
+ /* ---------------------------------------------------- */
+ /* --- start of the SSE2 decoder. --- */
+ /* ---------------------------------------------------- */
+
/* 66 0F 58 =3D ADDPD -- add 32Fx4 from R/M to R */
if (have66noF2noF3(pfx) && sz =3D=3D 2=20
&& insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x58) {
@@ -9388,67 +9381,79 @@
goto decode_success;
}
=20
-//.. /* 0F 5B =3D CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 i=
n
-//.. xmm(G) */
-//.. if (sz =3D=3D 4 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x5B) {
-//.. IRTemp argV =3D newTemp(Ity_V128);
-//.. IRTemp rmode =3D newTemp(Ity_I32);
-//..=20
-//.. modrm =3D getUChar(delta+2);
-//.. if (epartIsReg(modrm)) {
-//.. assign( argV, getXMMReg(eregOfRM(modrm)) );
-//.. delta +=3D 2+1;
-//.. DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. } else {
-//.. addr =3D disAMode ( &alen, sorb, delta+2, dis_buf );
-//.. assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
-//.. delta +=3D 2+alen;
-//.. DIP("cvtdq2ps %s,%s\n", dis_buf,
-//.. nameXMMReg(gregOfRM(modrm)) );
-//.. }
-//.. =20
-//.. assign( rmode, get_sse_roundingmode() );
-//.. breakup128to32s( argV, &t3, &t2, &t1, &t0 );
-//..=20
-#if 0 /* stop gcc multi-line comment warning */
-/.. # define CVT(_t) binop( Iop_F64toF32, \
-/.. mkexpr(rmode), \
-/.. unop(Iop_I32toF64,mkexpr(_t)))
-#endif /* stop gcc multi-line comment warning */
-//.. =20
-//.. putXMMRegLane32F( gregOfRM(modrm), 3, CVT(t3) );
-//.. putXMMRegLane32F( gregOfRM(modrm), 2, CVT(t2) );
-//.. putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) );
-//.. putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) );
-//..=20
-//.. # undef CVT
-//..=20
-//.. goto decode_success;
-//.. }
-
- /* F2 0F E6 =3D CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
- lo half xmm(G), and zero upper half */
- if (haveF2no66noF3(pfx) && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE6=
) {
+ /* 0F 5B =3D CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
+ xmm(G) */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x5B) {
IRTemp argV =3D newTemp(Ity_V128);
IRTemp rmode =3D newTemp(Ity_I32);
- if (sz !=3D 4) goto decode_failure;
=20
modrm =3D getUChar(delta+2);
if (epartIsReg(modrm)) {
assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) );
delta +=3D 2+1;
- DIP("cvtpd2dq %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
nameXMMReg(gregOfRexRM(pfx,modrm)));
} else {
addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
- assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
+ assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
delta +=3D 2+alen;
- DIP("cvtpd2dq %s,%s\n", dis_buf,
+ DIP("cvtdq2ps %s,%s\n", dis_buf,
nameXMMReg(gregOfRexRM(pfx,modrm)) );
}
=20
assign( rmode, get_sse_roundingmode() );
+ breakup128to32s( argV, &t3, &t2, &t1, &t0 );
+
+# define CVT(_t) binop( Iop_F64toF32, \
+ mkexpr(rmode), \
+ unop(Iop_I32toF64,mkexpr(_t)))
+ =20
+ putXMMRegLane32F( gregOfRexRM(pfx,modrm), 3, CVT(t3) );
+ putXMMRegLane32F( gregOfRexRM(pfx,modrm), 2, CVT(t2) );
+ putXMMRegLane32F( gregOfRexRM(pfx,modrm), 1, CVT(t1) );
+ putXMMRegLane32F( gregOfRexRM(pfx,modrm), 0, CVT(t0) );
+
+# undef CVT
+
+ goto decode_success;
+ }
+
+ /* 66 0F E6 =3D CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
+ lo half xmm(G), and zero upper half, rounding towards zero */
+ /* F2 0F E6 =3D CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
+ lo half xmm(G), according to prevailing rounding mode, and zero
+ upper half */
+ if ( ( (haveF2no66noF3(pfx) && sz =3D=3D 4)
+ || (have66noF2noF3(pfx) && sz =3D=3D 2)
+ )
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE6) {
+ IRTemp argV =3D newTemp(Ity_V128);
+ IRTemp rmode =3D newTemp(Ity_I32);
+ Bool r2zero =3D toBool(sz =3D=3D 2);
+
+ modrm =3D getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta +=3D 2+1;
+ DIP("cvt%spd2dq %s,%s\n", r2zero ? "t" : "",
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta +=3D 2+alen;
+ DIP("cvt%spd2dq %s,%s\n", r2zero ? "t" : "",
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ }
+ =20
+ if (r2zero) {
+ assign(rmode, mkU32((UInt)Irrm_ZERO) );
+ } else {
+ assign( rmode, get_sse_roundingmode() );
+ }
+
t0 =3D newTemp(Ity_F64);
t1 =3D newTemp(Ity_F64);
assign( t0, unop(Iop_ReinterpI64asF64,=20
@@ -9470,213 +9475,229 @@
goto decode_success;
}
=20
-//.. /* 66 0F 2D =3D CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
-//.. I32 in mmx, according to prevailing SSE rounding mode */
-//.. /* 66 0F 2C =3D CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
-//.. I32 in mmx, rounding towards zero */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && (insn[1] =3D=3D 0x2D |=
| insn[1] =3D=3D 0x2C)) {
-//.. IRTemp dst64 =3D newTemp(Ity_I64);
-//.. IRTemp rmode =3D newTemp(Ity_I32);
-//.. IRTemp f64lo =3D newTemp(Ity_F64);
-//.. IRTemp f64hi =3D newTemp(Ity_F64);
-//.. Bool r2zero =3D insn[1] =3D=3D 0x2C;
-//..=20
-//.. do_MMX_preamble();
-//.. modrm =3D getUChar(delta+2);
-//..=20
-//.. if (epartIsReg(modrm)) {
-//.. delta +=3D 2+1;
-//.. assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
-//.. assign(f64hi, getXMMRegLane64F(eregOfRM(modrm), 1));
-//.. DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "",
-//.. nameXMMReg(eregOfRM(modrm)),
-//.. nameMMXReg(gregOfRM(modrm)));
-//.. } else {
-//.. addr =3D disAMode ( &alen, sorb, delta+2, dis_buf );
-//.. assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
-//.. assign(f64hi, loadLE(Ity_F64, binop( Iop_Add32,=20
-//.. mkexpr(addr),=20
-//.. mkU32(8) )));
-//.. delta +=3D 2+alen;
-//.. DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "",
-//.. dis_buf,
-//.. nameMMXReg(gregOfRM(modrm)));
-//.. }
-//..=20
-//.. if (r2zero) {
-//.. assign(rmode, mkU32((UInt)Irrm_ZERO) );
-//.. } else {
-//.. assign( rmode, get_sse_roundingmode() );
-//.. }
-//..=20
-//.. assign(=20
-//.. dst64,
-//.. binop( Iop_32HLto64,
-//.. binop( Iop_F64toI32, mkexpr(rmode), mkexpr(f64hi) )=
,
-//.. binop( Iop_F64toI32, mkexpr(rmode), mkexpr(f64lo) )
-//.. )
-//.. );
-//..=20
-//.. putMMXReg(gregOfRM(modrm), mkexpr(dst64));
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 5A =3D CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F3=
2 in
-//.. lo half xmm(G), and zero upper half */
-//.. /* Note, this is practically identical to CVTPD2DQ. It would ha=
ve
-//.. been nicer to merge them together, but the insn[] offsets dif=
fer
-//.. by one. */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x5A) {
-//.. IRTemp argV =3D newTemp(Ity_V128);
-//.. IRTemp rmode =3D newTemp(Ity_I32);
-//..=20
-//.. modrm =3D getUChar(delta+2);
-//.. if (epartIsReg(modrm)) {
-//.. assign( argV, getXMMReg(eregOfRM(modrm)) );
-//.. delta +=3D 2+1;
-//.. DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. } else {
-//.. addr =3D disAMode ( &alen, sorb, delta+2, dis_buf );
-//.. assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
-//.. delta +=3D 2+alen;
-//.. DIP("cvtpd2ps %s,%s\n", dis_buf,
-//.. nameXMMReg(gregOfRM(modrm)) );
-//.. }
-//.. =20
-//.. assign( rmode, get_sse_roundingmode() );
-//.. t0 =3D newTemp(Ity_F64);
-//.. t1 =3D newTemp(Ity_F64);
-//.. assign( t0, unop(Iop_ReinterpI64asF64,=20
-//.. unop(Iop_128to64, mkexpr(argV))) );
-//.. assign( t1, unop(Iop_ReinterpI64asF64,=20
-//.. unop(Iop_128HIto64, mkexpr(argV))) );
-//.. =20
-#if 0 /* stop gcc multi-line comment warning */
-/.. # define CVT(_t) binop( Iop_F64toF32, \
-/.. mkexpr(rmode), \
-/.. mkexpr(_t) )
-#endif /* stop gcc multi-line comment warning */
-//.. =20
-//.. putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
-//.. putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
-//.. putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) );
-//.. putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) );
-//..=20
-//.. # undef CVT
-//..=20
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 2A =3D CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F6=
4 in
-//.. xmm(G) */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x2A) {
-//.. IRTemp arg64 =3D newTemp(Ity_I64);
-//..=20
-//.. modrm =3D getUChar(delta+2);
-//.. do_MMX_preamble();
-//.. if (epartIsReg(modrm)) {
-//.. assign( arg64, getMMXReg(eregOfRM(modrm)) );
-//.. delta +=3D 2+1;
-//.. DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregOfRM(modrm)),
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. } else {
-//.. addr =3D disAMode ( &alen, sorb, delta+2, dis_buf );
-//.. assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
-//.. delta +=3D 2+alen;
-//.. DIP("cvtpi2pd %s,%s\n", dis_buf,
-//.. nameXMMReg(gregOfRM(modrm)) );
-//.. }
-//..=20
-//.. putXMMRegLane64F(=20
-//.. gregOfRM(modrm), 0,
-//.. unop(Iop_I32toF64, unop(Iop_64to32, mkexpr(arg64)) )
-//.. );
-//..=20
-//.. putXMMRegLane64F(=20
-//.. gregOfRM(modrm), 1,
-//.. unop(Iop_I32toF64, unop(Iop_64HIto32, mkexpr(arg64)) )
-//.. );
-//..=20
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 5B =3D CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I3=
2 in
-//.. xmm(G) */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x5B) {
-//.. IRTemp argV =3D newTemp(Ity_V128);
-//.. IRTemp rmode =3D newTemp(Ity_I32);
-//..=20
-//.. modrm =3D getUChar(delta+2);
-//.. if (epartIsReg(modrm)) {
-//.. assign( argV, getXMMReg(eregOfRM(modrm)) );
-//.. delta +=3D 2+1;
-//.. DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. } else {
-//.. addr =3D disAMode ( &alen, sorb, delta+2, dis_buf );
-//.. assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
-//.. delta +=3D 2+alen;
-//.. DIP("cvtps2dq %s,%s\n", dis_buf,
-//.. nameXMMReg(gregOfRM(modrm)) );
-//.. }
-//.. =20
-//.. assign( rmode, get_sse_roundingmode() );
-//.. breakup128to32s( argV, &t3, &t2, &t1, &t0 );
-//..=20
-//.. /* This is less than ideal. If it turns out to be a performa=
nce
-//.. bottleneck it can be improved. */
-#if 0 /* stop gcc multi-line comment warning */
-/.. # define CVT(_t) \
-/.. binop( Iop_F64toI32, \
-/.. mkexpr(rmode), \
-/.. unop( Iop_F32toF64, \
-/.. unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
-#endif /* stop gcc multi-line comment warning */
-//.. =20
-//.. putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) );
-//.. putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) );
-//.. putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
-//.. putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
-//..=20
-//.. # undef CVT
-//..=20
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 0F 5A =3D CVTPS2PD -- convert 2 x F32 in low half mem/xmm to =
2 x
-//.. F64 in xmm(G). */
-//.. if (sz =3D=3D 4 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x5A) {
-//.. IRTemp f32lo =3D newTemp(Ity_F32);
-//.. IRTemp f32hi =3D newTemp(Ity_F32);
-//..=20
-//.. modrm =3D getUChar(delta+2);
-//.. if (epartIsReg(modrm)) {
-//.. assign( f32lo, getXMMRegLane32F(eregOfRM(modrm), 0) );
-//.. assign( f32hi, getXMMRegLane32F(eregOfRM(modrm), 1) );
-//.. delta +=3D 2+1;
-//.. DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. } else {
-//.. addr =3D disAMode ( &alen, sorb, delta+2, dis_buf );
-//.. assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) );
-//.. assign( f32hi, loadLE(Ity_F32,=20
-//.. binop(Iop_Add32,mkexpr(addr),mkU32(4=
))) );
-//.. delta +=3D 2+alen;
-//.. DIP("cvtps2pd %s,%s\n", dis_buf,
-//.. nameXMMReg(gregOfRM(modrm)) );
-//.. }
-//..=20
-//.. putXMMRegLane64F( gregOfRM(modrm), 1,
-//.. unop(Iop_F32toF64, mkexpr(f32hi)) );
-//.. putXMMRegLane64F( gregOfRM(modrm), 0,
-//.. unop(Iop_F32toF64, mkexpr(f32lo)) );
-//..=20
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* F2 0F 2D =3D CVTSD2SI -- convert F64 in mem/low half xmm to
-//.. I32 in ireg, according to prevailing SSE rounding mode */
+ /* 66 0F 2D =3D CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
+ I32 in mmx, according to prevailing SSE rounding mode */
+ /* 66 0F 2C =3D CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
+ I32 in mmx, rounding towards zero */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && (insn[1] =3D=3D 0x2D || insn[1] =3D=3D =
0x2C)) {
+ IRTemp dst64 =3D newTemp(Ity_I64);
+ IRTemp rmode =3D newTemp(Ity_I32);
+ IRTemp f64lo =3D newTemp(Ity_F64);
+ IRTemp f64hi =3D newTemp(Ity_F64);
+ Bool r2zero =3D insn[1] =3D=3D 0x2C;
+
+ do_MMX_preamble();
+ modrm =3D getUChar(delta+2);
+
+ if (epartIsReg(modrm)) {
+ delta +=3D 2+1;
+ assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
+ assign(f64hi, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 1));
+ DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "",
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
+ assign(f64hi, loadLE(Ity_F64, binop( Iop_Add64,=20
+ mkexpr(addr),=20
+ mkU64(8) )));
+ delta +=3D 2+alen;
+ DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "",
+ dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+
+ if (r2zero) {
+ assign(rmode, mkU32((UInt)Irrm_ZERO) );
+ } else {
+ assign( rmode, get_sse_roundingmode() );
+ }
+
+ assign(=20
+ dst64,
+ binop( Iop_32HLto64,
+ binop( Iop_F64toI32, mkexpr(rmode), mkexpr(f64hi) ),
+ binop( Iop_F64toI32, mkexpr(rmode), mkexpr(f64lo) )
+ )
+ );
+
+ putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
+ goto decode_success;
+ }
+
+ /* 66 0F 5A =3D CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
+ lo half xmm(G), rounding according to prevailing SSE rounding
+ mode, and zero upper half */
+ /* Note, this is practically identical to CVTPD2DQ. It would have
+ been nicer to merge them together, but the insn[] offsets differ
+ by one. */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x5A) {
+ IRTemp argV =3D newTemp(Ity_V128);
+ IRTemp rmode =3D newTemp(Ity_I32);
+
+ modrm =3D getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta +=3D 2+1;
+ DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta +=3D 2+alen;
+ DIP("cvtpd2ps %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ }
+ =20
+ assign( rmode, get_sse_roundingmode() );
+ t0 =3D newTemp(Ity_F64);
+ t1 =3D newTemp(Ity_F64);
+ assign( t0, unop(Iop_ReinterpI64asF64,=20
+ unop(Iop_V128to64, mkexpr(argV))) );
+ assign( t1, unop(Iop_ReinterpI64asF64,=20
+ unop(Iop_V128HIto64, mkexpr(argV))) );
+ =20
+# define CVT(_t) binop( Iop_F64toF32, \
+ mkexpr(rmode), \
+ mkexpr(_t) )
+ =20
+ putXMMRegLane32( gregOfRexRM(pfx,modrm), 3, mkU32(0) );
+ putXMMRegLane32( gregOfRexRM(pfx,modrm), 2, mkU32(0) );
+ putXMMRegLane32F( gregOfRexRM(pfx,modrm), 1, CVT(t1) );
+ putXMMRegLane32F( gregOfRexRM(pfx,modrm), 0, CVT(t0) );
+
+# undef CVT
+
+ goto decode_success;
+ }
+
+ /* 66 0F 2A =3D CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
+ xmm(G) */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x2A) {
+ IRTemp arg64 =3D newTemp(Ity_I64);
+
+ modrm =3D getUChar(delta+2);
+ do_MMX_preamble();
+ if (epartIsReg(modrm)) {
+ assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
+ delta +=3D 2+1;
+ DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
+ delta +=3D 2+alen;
+ DIP("cvtpi2pd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ }
+
+ putXMMRegLane64F(=20
+ gregOfRexRM(pfx,modrm), 0,
+ unop(Iop_I32toF64, unop(Iop_64to32, mkexpr(arg64)) )
+ );
+
+ putXMMRegLane64F(=20
+ gregOfRexRM(pfx,modrm), 1,
+ unop(Iop_I32toF64, unop(Iop_64HIto32, mkexpr(arg64)) )
+ );
+
+ goto decode_success;
+ }
+
+ /* F3 0F 5B =3D CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
+ xmm(G), rounding towards zero */
+ /* 66 0F 5B =3D CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
+ xmm(G), as per the prevailing rounding mode */
+ if ( ( (have66noF2noF3(pfx) && sz =3D=3D 2)
+ || (haveF3no66noF2(pfx) && sz =3D=3D 4)
+ )
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x5B) {
+ IRTemp argV =3D newTemp(Ity_V128);
+ IRTemp rmode =3D newTemp(Ity_I32);
+ Bool r2zero =3D toBool(sz =3D=3D 4);
+
+ modrm =3D getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta +=3D 2+1;
+ DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta +=3D 2+alen;
+ DIP("cvtps2dq %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ }
+ =20
+ if (r2zero) {
+ assign( rmode, mkU32((UInt)Irrm_ZERO) );
+ } else {
+ assign( rmode, get_sse_roundingmode() );
+ }
+
+ breakup128to32s( argV, &t3, &t2, &t1, &t0 );
+
+ /* This is less than ideal. If it turns out to be a performance
+ bottleneck it can be improved. */
+# define CVT(_t) \
+ binop( Iop_F64toI32, \
+ mkexpr(rmode), \
+ unop( Iop_F32toF64, \
+ unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
+ =20
+ putXMMRegLane32( gregOfRexRM(pfx,modrm), 3, CVT(t3) );
+ putXMMRegLane32( gregOfRexRM(pfx,modrm), 2, CVT(t2) );
+ putXMMRegLane32( gregOfRexRM(pfx,modrm), 1, CVT(t1) );
+ putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, CVT(t0) );
+
+# undef CVT
+
+ goto decode_success;
+ }
+
+ /* 0F 5A =3D CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
+ F64 in xmm(G). */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x5A) {
+ IRTemp f32lo =3D newTemp(Ity_F32);
+ IRTemp f32hi =3D newTemp(Ity_F32);
+
+ modrm =3D getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ assign( f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0) );
+ assign( f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1) );
+ delta +=3D 2+1;
+ DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) );
+ assign( f32hi, loadLE(Ity_F32,=20
+ binop(Iop_Add64,mkexpr(addr),mkU64(4))) )=
;
+ delta +=3D 2+alen;
+ DIP("cvtps2pd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ }
+
+ putXMMRegLane64F( gregOfRexRM(pfx,modrm), 1,
+ unop(Iop_F32toF64, mkexpr(f32hi)) );
+ putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
+ unop(Iop_F32toF64, mkexpr(f32lo)) );
+
+ goto decode_success;
+ }
+
+ /* F2 0F 2D =3D CVTSD2SI=20
+ when sz=3D=3D4 -- convert F64 in mem/low half xmm to I32 in ireg,=20
+ according to prevailing SSE rounding mode
+ when sz=3D=3D8 -- convert F64 in mem/low half xmm to I64 in ireg,=20
+ according to prevailing SSE rounding mode
+ */
/* F2 0F 2C =3D CVTTSD2SI=20
when sz=3D=3D4 -- convert F64 in mem/low half xmm to I32 in ireg,=20
truncating towards zero
@@ -9685,7 +9706,7 @@
*/
if (haveF2no66noF3(pfx)=20
&& insn[0] =3D=3D 0x0F=20
- && ( /* insn[1] =3D=3D 0x2D || */ insn[1] =3D=3D 0x2C)) {
+ && (insn[1] =3D=3D 0x2D || insn[1] =3D=3D 0x2C)) {
IRTemp rmode =3D newTemp(Ity_I32);
IRTemp f64lo =3D newTemp(Ity_F64);
Bool r2zero =3D toBool(insn[1] =3D=3D 0x2C);
@@ -9835,100 +9856,12 @@
goto decode_success;
}
=20
-//.. /* 66 0F E6 =3D CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I=
32 in
-//.. lo half xmm(G), and zero upper half, rounding towards zero */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE6) {
-//.. IRTemp argV =3D newTemp(Ity_V128);
-//.. IRTemp rmode =3D newTemp(Ity_I32);
-//..=20
-//.. modrm =3D getUChar(delta+2);
-//.. if (epartIsReg(modrm)) {
-//.. assign( argV, getXMMReg(eregOfRM(modrm)) );
-//.. delta +=3D 2+1;
-//.. DIP("cvttpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. } else {
-//.. addr =3D disAMode ( &alen, sorb, delta+2, dis_buf );
-//.. assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
-//.. delta +=3D 2+alen;
-//.. DIP("cvttpd2dq %s,%s\n", dis_buf,
-//.. nameXMMReg(gregOfRM(modrm)) );
-//.. }
-//..=20
-//.. assign( rmode, mkU32((UInt)Irrm_ZERO) );
-//..=20
-//.. t0 =3D newTemp(Ity_F64);
-//.. t1 =3D newTemp(Ity_F64);
-//.. assign( t0, unop(Iop_ReinterpI64asF64,=20
-//.. unop(Iop_128to64, mkexpr(argV))) );
-//.. assign( t1, unop(Iop_ReinterpI64asF64,=20
-//.. unop(Iop_128HIto64, mkexpr(argV))) );
-//.. =20
-#if 0 /* stop gcc multi-line comment warning */
-/.. # define CVT(_t) binop( Iop_F64toI32, \
-/.. mkexpr(rmode), \
-/.. mkexpr(_t) )
-#endif /* stop gcc multi-line comment warning */
-//.. =20
-//.. putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
-//.. putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
-//.. putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
-//.. putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
-//..=20
-//.. # undef CVT
-//..=20
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* F3 0F 5B =3D CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I=
32 in
-//.. xmm(G), rounding towards zero */
-//.. if (insn[0] =3D=3D 0xF3 && insn[1] =3D=3D 0x0F && insn[2] =3D=3D=
0x5B) {
-//.. IRTemp argV =3D newTemp(Ity_V128);
-//.. IRTemp rmode =3D newTemp(Ity_I32);
-//.. vassert(sz =3D=3D 4);
-//..=20
-//.. modrm =3D getUChar(delta+3);
-//.. if (epartIsReg(modrm)) {
-//.. assign( argV, getXMMReg(eregOfRM(modrm)) );
-//.. delta +=3D 3+1;
-//.. DIP("cvttps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. } else {
-//.. addr =3D disAMode ( &alen, sorb, delta+3, dis_buf );
-//.. assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
-//.. delta +=3D 3+alen;
-//.. DIP("cvttps2dq %s,%s\n", dis_buf,
-//.. nameXMMReg(gregOfRM(modrm)) );
-//.. }
-//.. =20
-//.. assign( rmode, mkU32((UInt)Irrm_ZERO) );
-//.. breakup128to32s( argV, &t3, &t2, &t1, &t0 );
-//..=20
-//.. /* This is less than ideal. If it turns out to be a performa=
nce
-//.. bottleneck it can be improved. */
-#if 0 /* stop gcc multi-line comment warning */
-/.. # define CVT(_t) \
-/.. binop( Iop_F64toI32, \
-/.. mkexpr(rmode), \
-/.. unop( Iop_F32toF64, \
-/.. unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
-#endif /* stop gcc multi-line comment warning */
-//.. =20
-//.. putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) );
-//.. putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) );
-//.. putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
-//.. putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
-//..=20
-//.. # undef CVT
-//..=20
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 5E =3D DIVPD -- div 64Fx2 from R/M to R */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x5E) {
-//.. delta =3D dis_SSE_E_to_G_all( sorb, delta+2, "divpd", Iop_Div=
64Fx2 );
-//.. goto decode_success;
-//.. }
+ /* 66 0F 5E =3D DIVPD -- div 64Fx2 from R/M to R */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x5E) {
+ delta =3D dis_SSE_E_to_G_all( pfx, delta+2, "divpd", Iop_Div64Fx2 =
);
+ goto decode_success;
+ }
=20
/* F2 0F 5E =3D DIVSD -- div 64F0x2 from R/M to R */
if (haveF2no66noF3(pfx) && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x5E=
) {
@@ -9937,26 +9870,27 @@
goto decode_success;
}
=20
-//.. /* 0F AE /5 =3D LFENCE -- flush pending operations to memory */
-//.. /* 0F AE /6 =3D MFENCE -- flush pending operations to memory */
-//.. if (insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xAE
-//.. && epartIsReg(insn[2])=20
-//.. && (gregOfRM(insn[2]) =3D=3D 5 || gregOfRM(insn[2]) =3D=3D 6=
)) {
-//.. vassert(sz =3D=3D 4);
-//.. delta +=3D 3;
-//.. /* Insert a memory fence. It's sometimes important that thes=
e
-//.. are carried through to the generated code. */
-//.. stmt( IRStmt_MFence() );
-//.. DIP("%sfence\n", gregOfRM(insn[2])=3D=3D5 ? "l" : "m");
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 5F =3D MAXPD -- max 64Fx2 from R/M to R */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x5F) {
-//.. delta =3D dis_SSE_E_to_G_all( sorb, delta+2, "maxpd", Iop_Max=
64Fx2 );
-//.. goto decode_success;
-//.. }
+ /* 0F AE /5 =3D LFENCE -- flush pending operations to memory */
+ /* 0F AE /6 =3D MFENCE -- flush pending operations to memory */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xAE
+ && epartIsReg(insn[2])=20
+ && (gregLO3ofRM(insn[2]) =3D=3D 5 || gregLO3ofRM(insn[2]) =3D=3D =
6)) {
+ delta +=3D 3;
+ /* Insert a memory fence. It's sometimes important that these
+ are carried through to the generated code. */
+ stmt( IRStmt_MFence() );
+ DIP("%sfence\n", gregLO3ofRM(insn[2])=3D=3D5 ? "l" : "m");
+ goto decode_success;
+ }
=20
+ /* 66 0F 5F =3D MAXPD -- max 64Fx2 from R/M to R */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x5F) {
+ delta =3D dis_SSE_E_to_G_all( pfx, delta+2, "maxpd", Iop_Max64Fx2 =
);
+ goto decode_success;
+ }
+
/* F2 0F 5F =3D MAXSD -- max 64F0x2 from R/M to R */
if (haveF2no66noF3(pfx) && sz =3D=3D 4
&& insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x5F) {
@@ -9964,11 +9898,12 @@
goto decode_success;
}
=20
-//.. /* 66 0F 5D =3D MINPD -- min 64Fx2 from R/M to R */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x5D) {
-//.. delta =3D dis_SSE_E_to_G_all( sorb, delta+2, "minpd", Iop_Min=
64Fx2 );
-//.. goto decode_success;
-//.. }
+ /* 66 0F 5D =3D MINPD -- min 64Fx2 from R/M to R */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x5D) {
+ delta =3D dis_SSE_E_to_G_all( pfx, delta+2, "minpd", Iop_Min64Fx2 =
);
+ goto decode_success;
+ }
=20
/* F2 0F 5D =3D MINSD -- min 64F0x2 from R/M to R */
if (haveF2no66noF3(pfx) && sz =3D=3D 4
@@ -10591,41 +10526,46 @@
goto decode_success;
}
=20
-//.. /* 66 0F FD =3D PADDW */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xFD) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "paddw", Iop_Add16x8, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F EC =3D PADDSB */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xEC) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "paddsb", Iop_QAdd8Sx16, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F ED =3D PADDSW */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xED) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "paddsw", Iop_QAdd16Sx8, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F DC =3D PADDUSB */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xDC) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "paddusb", Iop_QAdd8Ux16, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F DD =3D PADDUSW */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xDD) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "paddusw", Iop_QAdd16Ux8, False );
-//.. goto decode_success;
-//.. }
+ /* 66 0F FD =3D PADDW */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xFD) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "paddw", Iop_Add16x8, False );
+ goto decode_success;
+ }
=20
+ /* 66 0F EC =3D PADDSB */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xEC) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "paddsb", Iop_QAdd8Sx16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F ED =3D PADDSW */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xED) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "paddsw", Iop_QAdd16Sx8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F DC =3D PADDUSB */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xDC) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "paddusb", Iop_QAdd8Ux16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F DD =3D PADDUSW */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xDD) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "paddusw", Iop_QAdd16Ux8, False );
+ goto decode_success;
+ }
+
/* 66 0F DB =3D PAND */
if (have66noF2noF3(pfx) && sz =3D=3D 2=20
&& insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xDB) {
@@ -10633,68 +10573,77 @@
goto decode_success;
}
=20
-//.. /* 66 0F DF =3D PANDN */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xDF) {
-//.. delta =3D dis_SSE_E_to_G_all_invG( sorb, delta+2, "pandn", Io=
p_And128 );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F E0 =3D PAVGB */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE0) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "pavgb", Iop_Avg8Ux16, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F E3 =3D PAVGW */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE3) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "pavgw", Iop_Avg16Ux8, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 74 =3D PCMPEQB */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x74) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "pcmpeqb", Iop_CmpEQ8x16, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 76 =3D PCMPEQD */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x76) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "pcmpeqd", Iop_CmpEQ32x4, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 75 =3D PCMPEQW */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x75) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "pcmpeqw", Iop_CmpEQ16x8, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 64 =3D PCMPGTB */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x64) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "pcmpgtb", Iop_CmpGT8Sx16, False )=
;
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 66 =3D PCMPGTD */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x66) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "pcmpgtd", Iop_CmpGT32Sx4, False )=
;
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 65 =3D PCMPGTW */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x65) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "pcmpgtw", Iop_CmpGT16Sx8, False )=
;
-//.. goto decode_success;
-//.. }
+ /* 66 0F DF =3D PANDN */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xDF) {
+ delta =3D dis_SSE_E_to_G_all_invG( pfx, delta+2, "pandn", Iop_AndV=
128 );
+ goto decode_success;
+ }
=20
+ /* 66 0F E0 =3D PAVGB */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE0) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "pavgb", Iop_Avg8Ux16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F E3 =3D PAVGW */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE3) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "pavgw", Iop_Avg16Ux8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 74 =3D PCMPEQB */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x74) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "pcmpeqb", Iop_CmpEQ8x16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 76 =3D PCMPEQD */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x76) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "pcmpeqd", Iop_CmpEQ32x4, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 75 =3D PCMPEQW */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x75) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "pcmpeqw", Iop_CmpEQ16x8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 64 =3D PCMPGTB */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x64) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "pcmpgtb", Iop_CmpGT8Sx16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 66 =3D PCMPGTD */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x66) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "pcmpgtd", Iop_CmpGT32Sx4, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 65 =3D PCMPGTW */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x65) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "pcmpgtw", Iop_CmpGT16Sx8, False );
+ goto decode_success;
+ }
+
/* 66 0F C5 =3D PEXTRW -- extract 16-bit field from xmm(E) and put=20
zero-extend of it in ireg(G). */
if (have66noF2noF3(pfx) && sz =3D=3D 2=20
Modified: trunk/priv/host-amd64/hdefs.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-amd64/hdefs.c 2005-05-11 00:03:06 UTC (rev 1179)
+++ trunk/priv/host-amd64/hdefs.c 2005-05-11 02:13:42 UTC (rev 1180)
@@ -607,10 +607,10 @@
case Asse_ADD16: return "paddw";
case Asse_ADD32: return "paddd";
case Asse_ADD64: return "paddq";
-//.. case Xsse_QADD8U: return "paddusb";
-//.. case Xsse_QADD16U: return "paddusw";
-//.. case Xsse_QADD8S: return "paddsb";
-//.. case Xsse_QADD16S: return "paddsw";
+ case Asse_QADD8U: return "paddusb";
+ case Asse_QADD16U: return "paddusw";
+ case Asse_QADD8S: return "paddsb";
+ case Asse_QADD16S: return "paddsw";
case Asse_SUB8: return "psubb";
case Asse_SUB16: return "psubw";
case Asse_SUB32: return "psubd";
@@ -622,18 +622,18 @@
case Asse_MUL16: return "pmullw";
case Asse_MULHI16U: return "pmulhuw";
case Asse_MULHI16S: return "pmulhw";
-//.. case Xsse_AVG8U: return "pavgb";
-//.. case Xsse_AVG16U: return "pavgw";
+ case Asse_AVG8U: return "pavgb";
+ case Asse_AVG16U: return "pavgw";
case Asse_MAX16S: return "pmaxw";
case Asse_MAX8U: return "pmaxub";
case Asse_MIN16S: return "pminw";
case Asse_MIN8U: return "pminub";
-//.. case Xsse_CMPEQ8: return "pcmpeqb";
-//.. case Xsse_CMPEQ16: return "pcmpeqw";
+ case Asse_CMPEQ8: return "pcmpeqb";
+ case Asse_CMPEQ16: return "pcmpeqw";
case Asse_CMPEQ32: return "pcmpeqd";
-//.. case Xsse_CMPGT8S: return "pcmpgtb";
-//.. case Xsse_CMPGT16S: return "pcmpgtw";
-//.. case Xsse_CMPGT32S: return "pcmpgtd";
+ case Asse_CMPGT8S: return "pcmpgtb";
+ case Asse_CMPGT16S: return "pcmpgtw";
+ case Asse_CMPGT32S: return "pcmpgtd";
case Asse_SHL16: return "psllw";
case Asse_SHL32: return "pslld";
case Asse_SHL64: return "psllq";
@@ -3222,9 +3222,9 @@
*p++ =3D 0x0F;
switch (i->Ain.Sse64Fx2.op) {
case Asse_ADDF: *p++ =3D 0x58; break;
-//.. case Xsse_DIVF: *p++ =3D 0x5E; break;
-//.. case Xsse_MAXF: *p++ =3D 0x5F; break;
-//.. case Xsse_MINF: *p++ =3D 0x5D; break;
+ case Asse_DIVF: *p++ =3D 0x5E; break;
+ case Asse_MAXF: *p++ =3D 0x5F; break;
+ case Asse_MINF: *p++ =3D 0x5D; break;
case Asse_MULF: *p++ =3D 0x59; break;
//.. case Xsse_RCPF: *p++ =3D 0x53; break;
//.. case Xsse_RSQRTF: *p++ =3D 0x52; break;
@@ -3314,21 +3314,21 @@
case Asse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63); brea=
k;
case Asse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67); brea=
k;
case Asse_ADD8: XX(0x66); XX(rex); XX(0x0F); XX(0xFC); brea=
k;
-//.. case Xsse_ADD16: XX(0x66); XX(rex); XX(0x0F); XX(0xFD);=
break;
+ case Asse_ADD16: XX(0x66); XX(rex); XX(0x0F); XX(0xFD); brea=
k;
case Asse_ADD32: XX(0x66); XX(rex); XX(0x0F); XX(0xFE); brea=
k;
case Asse_ADD64: XX(0x66); XX(rex); XX(0x0F); XX(0xD4); brea=
k;
-//.. case Xsse_QADD8S: XX(0x66); XX(rex); XX(0x0F); XX(0xEC);=
break;
-//.. case Xsse_QADD16S: XX(0x66); XX(rex); XX(0x0F); XX(0xED);=
break;
-//.. case Xsse_QADD8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDC);=
break;
-//.. case Xsse_QADD16U: XX(0x66); XX(rex); XX(0x0F); XX(0xDD);=
break;
-//.. case Xsse_AVG8U: XX(0x66); XX(rex); XX(0x0F); XX(0xE0);=
break;
-//.. case Xsse_AVG16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE3);=
break;
-//.. case Xsse_CMPEQ8: XX(0x66); XX(rex); XX(0x0F); XX(0x74);=
break;
-//.. case Xsse_CMPEQ16: XX(0x66); XX(rex); XX(0x0F); XX(0x75);=
break;
+ case Asse_QADD8S: XX(0x66); XX(rex); XX(0x0F); XX(0xEC); brea=
k;
+ case Asse_QADD16S: XX(0x66); XX(rex); XX(0x0F); XX(0xED); brea=
k;
+ case Asse_QADD8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDC); brea=
k;
+ case Asse_QADD16U: XX(0x66); XX(rex); XX(0x0F); XX(0xDD); brea=
k;
+ case Asse_AVG8U: XX(0x66); XX(rex); XX(0x0F); XX(0xE0); brea=
k;
+ case Asse_AVG16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE3); brea=
k;
+ case Asse_CMPEQ8: XX(0x66); XX(rex); XX(0x0F); XX(0x74); brea=
k;
+ case Asse_CMPEQ16: XX(0x66); XX(rex); XX(0x0F); XX(0x75); brea=
k;
case Asse_CMPEQ32: XX(0x66); XX(rex); XX(0x0F); XX(0x76); brea=
k;
-//.. case Xsse_CMPGT8S: XX(0x66); XX(rex); XX(0x0F); XX(0x64);=
break;
-//.. case Xsse_CMPGT16S: XX(0x66); XX(rex); XX(0x0F); XX(0x65);=
break;
-//.. case Xsse_CMPGT32S: XX(0x66); XX(rex); XX(0x0F); XX(0x66);=
break;
+ case Asse_CMPGT8S: XX(0x66); XX(rex); XX(0x0F); XX(0x64); brea=
k;
+ case Asse_CMPGT16S: XX(0x66); XX(rex); XX(0x0F); XX(0x65); brea=
k;
+ case Asse_CMPGT32S: XX(0x66); XX(rex); XX(0x0F); XX(0x66); brea=
k;
case Asse_MAX16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEE); brea=
k;
case Asse_MAX8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDE); brea=
k;
case Asse_MIN16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEA); brea=
k;
Modified: trunk/priv/host-amd64/hdefs.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-amd64/hdefs.h 2005-05-11 00:03:06 UTC (rev 1179)
+++ trunk/priv/host-amd64/hdefs.h 2005-05-11 02:13:42 UTC (rev 1180)
@@ -324,22 +324,21 @@
Asse_AND, Asse_OR, Asse_XOR, Asse_ANDN,
//.. /* Integer binary */
Asse_ADD8, Asse_ADD16, Asse_ADD32, Asse_ADD64,
-//.. Xsse_QADD8U, Xsse_QADD16U,
-//.. Xsse_QADD8S, Xsse_QADD16S,
+ Asse_QADD8U, Asse_QADD16U,
+ Asse_QADD8S, Asse_QADD16S,
Asse_SUB8, Asse_SUB16, Asse_SUB32, Asse_SUB64,
Asse_QSUB8U, Asse_QSUB16U,
Asse_QSUB8S, Asse_QSUB16S,
Asse_MUL16,
Asse_MULHI16U,
Asse_MULHI16S,
-//.. Xsse_AVG8U, Xsse_AVG16U,
+ Asse_AVG8U, Asse_AVG16U,
Asse_MAX16S,
Asse_MAX8U,
Asse_MIN16S,
Asse_MIN8U,
-//.. Xsse_CMPEQ8, Xsse_CMPEQ16, =20
- Asse_CMPEQ32,
-//.. Xsse_CMPGT8S, Xsse_CMPGT16S, Xsse_CMPGT32S,
+ Asse_CMPEQ8, Asse_CMPEQ16, Asse_CMPEQ32,
+ Asse_CMPGT8S, Asse_CMPGT16S, Asse_CMPGT32S,
Asse_SHL16, Asse_SHL32, Asse_SHL64,
Asse_SHR16, Asse_SHR32, Asse_SHR64,
Asse_SAR16, Asse_SAR32,=20
Modified: trunk/priv/host-amd64/isel.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-amd64/isel.c 2005-05-11 00:03:06 UTC (rev 1179)
+++ trunk/priv/host-amd64/isel.c 2005-05-11 02:13:42 UTC (rev 1180)
@@ -2634,20 +2634,17 @@
return res;
}
=20
-//.. if (e->tag =3D=3D Iex_Unop
-//.. && e->Iex.Unop.op =3D=3D Iop_ReinterpI32asF32) {
-//.. /* Given an I32, produce an IEEE754 float with the same bit
-//.. pattern. */
-//.. HReg dst =3D newVRegF(env);
-//.. X86RMI* rmi =3D iselIntExpr_RMI(env, e->Iex.Unop.arg);
-//.. /* paranoia */
-//.. addInstr(env, X86Instr_Push(rmi));
-//.. addInstr(env, X86Instr_FpLdSt(
-//.. True/*load*/, 4, dst,=20
-//.. X86AMode_IR(0, hregX86_ESP())));
-//.. add_to_esp(env, 4);
-//.. return dst;
-//.. }
+ if (e->tag =3D=3D Iex_Unop
+ && e->Iex.Unop.op =3D=3D Iop_ReinterpI32asF32) {
+ /* Given an I32, produce an IEEE754 float with the same bit
+ pattern. */
+ HReg dst =3D newVRegV(env);
+ HReg src =3D iselIntExpr_R(env, e->Iex.Unop.arg);
+ AMD64AMode* m4_rsp =3D AMD64AMode_IR(-4, hregAMD64_RSP());
+ addInstr(env, AMD64Instr_Store(4, src, m4_rsp));
+ addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, dst, m4_rsp ))=
;
+ return dst;
+ }
=20
ppIRExpr(e);
vpanic("iselFltExpr_wrk");
@@ -3320,9 +3317,9 @@
case Iop_CmpLT64Fx2: op =3D Asse_CMPLTF; goto do_64Fx2;
case Iop_CmpLE64Fx2: op =3D Asse_CMPLEF; goto do_64Fx2;
case Iop_Add64Fx2: op =3D Asse_ADDF; goto do_64Fx2;
-//.. case Iop_Div64Fx2: op =3D Xsse_DIVF; goto do_64Fx2;
-//.. case Iop_Max64Fx2: op =3D Xsse_MAXF; goto do_64Fx2;
-//.. case Iop_Min64Fx2: op =3D Xsse_MINF; goto do_64Fx2;
+ case Iop_Div64Fx2: op =3D Asse_DIVF; goto do_64Fx2;
+ case Iop_Max64Fx2: op =3D Asse_MAXF; goto do_64Fx2;
+ case Iop_Min64Fx2: op =3D Asse_MINF; goto do_64Fx2;
case Iop_Mul64Fx2: op =3D Asse_MULF; goto do_64Fx2;
case Iop_Sub64Fx2: op =3D Asse_SUBF; goto do_64Fx2;
do_64Fx2:
@@ -3400,21 +3397,21 @@
case Iop_OrV128: op =3D Asse_OR; goto do_SseReRg;
case Iop_XorV128: op =3D Asse_XOR; goto do_SseReRg;
case Iop_Add8x16: op =3D Asse_ADD8; goto do_SseReRg;
-//.. case Iop_Add16x8: op =3D Xsse_ADD16; goto do_SseReRg;
+ case Iop_Add16x8: op =3D Asse_ADD16; goto do_SseReRg;
case Iop_Add32x4: op =3D Asse_ADD32; goto do_SseReRg;
case Iop_Add64x2: op =3D Asse_ADD64; goto do_SseReRg;
-//.. case Iop_QAdd8Sx16: op =3D Xsse_QADD8S; goto do_SseReRg;
-//.. case Iop_QAdd16Sx8: op =3D Xsse_QADD16S; goto do_SseReRg;
-//.. case Iop_QAdd8Ux16: op =3D Xsse_QADD8U; goto do_SseReRg;
-//.. case Iop_QAdd16Ux8: op =3D Xsse_QADD16U; goto do_SseReRg;
-//.. case Iop_Avg8Ux16: op =3D Xsse_AVG8U; goto do_SseReRg;
-//.. case Iop_Avg16Ux8: op =3D Xsse_AVG16U; goto do_SseReRg;
-//.. case Iop_CmpEQ8x16: op =3D Xsse_CMPEQ8; goto do_SseReRg;
-//.. case Iop_CmpEQ16x8: op =3D Xsse_CMPEQ16; goto do_SseReRg;
-//.. case Iop_CmpEQ32x4: op =3D Xsse_CMPEQ32; goto do_SseReRg;
-//.. case Iop_CmpGT8Sx16: op =3D Xsse_CMPGT8S; goto do_SseReRg;
-//.. case Iop_CmpGT16Sx8: op =3D Xsse_CMPGT16S; goto do_SseReRg;
-//.. case Iop_CmpGT32Sx4: op =3D Xsse_CMPGT32S; goto do_SseReRg;
+ case Iop_QAdd8Sx16: op =3D Asse_QADD8S; goto do_SseReRg;
+ case Iop_QAdd16Sx8: op =3D Asse_QADD16S; goto do_SseReRg;
+ case Iop_QAdd8Ux16: op =3D Asse_QADD8U; goto do_SseReRg;
+ case Iop_QAdd16Ux8: op =3D Asse_QADD16U; goto do_SseReRg;
+ case Iop_Avg8Ux16: op =3D Asse_AVG8U; goto do_SseReRg;
+ case Iop_Avg16Ux8: op =3D Asse_AVG16U; goto do_SseReRg;
+ case Iop_CmpEQ8x16: op =3D Asse_CMPEQ8; goto do_SseReRg;
+ case Iop_CmpEQ16x8: op =3D Asse_CMPEQ16; goto do_SseReRg;
+ case Iop_CmpEQ32x4: op =3D Asse_CMPEQ32; goto do_SseReRg;
+ case Iop_CmpGT8Sx16: op =3D Asse_CMPGT8S; goto do_SseReRg;
+ case Iop_CmpGT16Sx8: op =3D Asse_CMPGT16S; goto do_SseReRg;
+ case Iop_CmpGT32Sx4: op =3D Asse_CMPGT32S; goto do_SseReRg;
case Iop_Max16Sx8: op =3D Asse_MAX16S; goto do_SseReRg;
case Iop_Max8Ux16: op =3D Asse_MAX8U; goto do_SseReRg;
case Iop_Min16Sx8: op =3D Asse_MIN16S; goto do_SseReRg;
|
|
From: <sv...@va...> - 2005-05-11 02:12:36
|
Author: sewardj Date: 2005-05-11 03:12:31 +0100 (Wed, 11 May 2005) New Revision: 3656 Added: trunk/none/tests/amd64/insn_sse2.def Log: SSE2 tests for amd64. Added: trunk/none/tests/amd64/insn_sse2.def =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- trunk/none/tests/amd64/insn_sse2.def 2005-05-10 05:00:55 UTC (rev 365= 5) +++ trunk/none/tests/amd64/insn_sse2.def 2005-05-11 02:12:31 UTC (rev 365= 6) @@ -0,0 +1,330 @@ +addpd xmm.pd[1234.5678,8765.4321] xmm.pd[2222.2222,1111.1111] =3D> 1.pd[= 3456.79,9876.5432] +addpd m128.pd[1234.5678,8765.4321] xmm.pd[2222.2222,1111.1111] =3D> 1.pd= [3456.79,9876.5432] +addsd xmm.pd[1234.5678,8765.4321] xmm.pd[2222.2222,1111.1111] =3D> 1.pd[= 3456.79,1111.1111] +addsd m128.pd[1234.5678,8765.4321] xmm.pd[2222.2222,1111.1111] =3D> 1.pd= [3456.79,1111.1111] +andpd xmm.uq[0xfdb97531eca86420,0x0123456789abcdef] xmm.uq[0x0123456789a= bcdef,0xfdb97531eca86420] =3D> 1.uq[0x0121452188a84420,0x0121452188a84420= ] +andpd m128.uq[0xfdb97531eca86420,0x0123456789abcdef] xmm.uq[0x0123456789= abcdef,0xfdb97531eca86420] =3D> 1.uq[0x0121452188a84420,0x0121452188a8442= 0] +andnpd xmm.uq[0xfdb97531eca86420,0x0123456789abcdef] xmm.uq[0x0123456789= abcdef,0xfdb97531eca86420] =3D> 1.uq[0xfc98301064002000,0x00020046010389c= f] +andnpd m128.uq[0xfdb97531eca86420,0x0123456789abcdef] xmm.uq[0x012345678= 9abcdef,0xfdb97531eca86420] =3D> 1.uq[0xfc98301064002000,0x00020046010389= cf] +cmpeqpd xmm.pd[1234.5678,1234.5678] xmm.pd[1234.5678,1234.5679] =3D> 1.u= q[0xffffffffffffffff,0x0000000000000000] +cmpeqpd m128.pd[1234.5678,1234.5678] xmm.pd[1234.5678,1234.5679] =3D> 1.= uq[0xffffffffffffffff,0x0000000000000000] +cmpltpd xmm.pd[1234.5678,1234.5678] xmm.pd[1234.5677,1234.5679] =3D> 1.u= q[0xffffffffffffffff,0x0000000000000000] +cmpltpd m128.pd[1234.5678,1234.5678] xmm.pd[1234.5677,1234.5679] =3D> 1.= uq[0xffffffffffffffff,0x0000000000000000] +cmplepd xmm.pd[1234.5678,1234.5678] xmm.pd[1234.5678,1234.5679] =3D> 1.u= q[0xffffffffffffffff,0x0000000000000000] +cmplepd m128.pd[1234.5678,1234.5678] xmm.pd[1234.5678,1234.5679] =3D> 1.= uq[0xffffffffffffffff,0x0000000000000000] +cmpneqpd xmm.pd[1234.5678,1234.5678] xmm.pd[1234.5679,1234.5678] =3D> 1.= uq[0xffffffffffffffff,0x0000000000000000] +cmpneqpd m128.pd[1234.5678,1234.5678] xmm.pd[1234.5679,1234.5678] =3D> 1= .uq[0xffffffffffffffff,0x0000000000000000] +cmpnltpd xmm.pd[1234.5678,1234.5678] xmm.pd[1234.5679,1234.5677] =3D> 1.= uq[0xffffffffffffffff,0x0000000000000000] +cmpnltpd m128.pd[1234.5678,1234.5678] xmm.pd[1234.5679,1234.5677] =3D> 1= .uq[0xffffffffffffffff,0x0000000000000000] +cmpnlepd xmm.pd[1234.5678,1234.5678] xmm.pd[1234.5679,1234.5678] =3D> 1.= uq[0xffffffffffffffff,0x0000000000000000] +cmpnlepd m128.pd[1234.5678,1234.5678] xmm.pd[1234.5679,1234.5678] =3D> 1= .uq[0xffffffffffffffff,0x0000000000000000] +cmpeqsd xmm.pd[1234.5678,0.0] xmm.pd[1234.5678,0.0] =3D> 1.uq[0xffffffff= ffffffff,0] +cmpeqsd m128.pd[1234.5678,0.0] xmm.pd[1234.5679,0.0] =3D> 1.uq[0x0000000= 000000000,0] +cmpltsd xmm.pd[1234.5678,0.0] xmm.pd[1234.5677,0.0] =3D> 1.uq[0xffffffff= ffffffff,0] +cmpltsd m128.pd[1234.5678,0.0] xmm.pd[1234.5679,0.0] =3D> 1.uq[0x0000000= 000000000,0] +cmplesd xmm.pd[1234.5678,0.0] xmm.pd[1234.5678,0.0] =3D> 1.uq[0xffffffff= ffffffff,0] +cmplesd m128.pd[1234.5678,0.0] xmm.pd[1234.5679,0.0] =3D> 1.uq[0x0000000= 000000000,0] +cmpneqsd xmm.pd[1234.5678,0.0] xmm.pd[1234.5679,0.0] =3D> 1.uq[0xfffffff= fffffffff,0] +cmpneqsd m128.pd[1234.5678,0.0] xmm.pd[1234.5678,0.0] =3D> 1.uq[0x000000= 0000000000,0] +cmpnltsd xmm.pd[1234.5678,0.0] xmm.pd[1234.5679,0.0] =3D> 1.uq[0xfffffff= fffffffff,0] +cmpnltsd m128.pd[1234.5678,0.0] xmm.pd[1234.5677,0.0] =3D> 1.uq[0x000000= 0000000000,0] +cmpnlesd xmm.pd[1234.5678,0.0] xmm.pd[1234.5679,0.0] =3D> 1.uq[0xfffffff= fffffffff,0] +cmpnlesd m128.pd[1234.5678,0.0] xmm.pd[1234.5678,0.0] =3D> 1.uq[0x000000= 0000000000,0] +comisd xmm.pd[1234.5678,0.0] xmm.pd[1234.5679,0.0] =3D> eflags[0x8d5,0x0= 00] +comisd xmm.pd[1234.5678,0.0] xmm.pd[1234.5677,0.0] =3D> eflags[0x8d5,0x0= 01] +comisd xmm.pd[1234.5678,0.0] xmm.pd[1234.5678,0.0] =3D> eflags[0x8d5,0x0= 40] +comisd m64.pd[1234.5678] xmm.pd[1234.5679,0.0] =3D> eflags[0x8d5,0x000] +comisd m64.pd[1234.5678] xmm.pd[1234.5677,0.0] =3D> eflags[0x8d5,0x001] +comisd m64.pd[1234.5678] xmm.pd[1234.5678,0.0] =3D> eflags[0x8d5,0x040] +cvtdq2pd xmm.sd[1234,5678,0,0] xmm.pd[0.0,0.0] =3D> 1.pd[1234.0,5678.0] +cvtdq2pd m128.sd[1234,5678,0,0] xmm.pd[0.0,0.0] =3D> 1.pd[1234.0,5678.0] +cvtdq2ps xmm.sd[1234,5678,-1234,-5678] xmm.ps[0.0,0.0,0.0,0.0] =3D> 1.ps= [1234.0,5678.0,-1234.0,-5678.0] +cvtdq2ps m128.sd[1234,5678,-1234,-5678] xmm.ps[0.0,0.0,0.0,0.0] =3D> 1.p= s[1234.0,5678.0,-1234.0,-5678.0] +cvtpd2dq xmm.pd[12.34,56.78] xmm.sd[1,2,3,4] =3D> 1.sd[12,57,0,0] +cvtpd2dq m128.pd[12.34,56.78] xmm.sd[1,2,3,4] =3D> 1.sd[12,57,0,0] +cvtpd2pi xmm.pd[12.34,56.78] mm.sd[1,2] =3D> 1.sd[12,57] +cvtpd2pi m128.pd[12.34,56.78] mm.sd[1,2] =3D> 1.sd[12,57] +cvtpd2ps xmm.pd[12.34,56.78] xmm.ps[1.1,2.2,3.3,4.4] =3D> 1.ps[12.34,56.= 78,0.0,0.0] +cvtpd2ps m128.pd[12.34,56.78] xmm.ps[1.1,2.2,3.3,4.4] =3D> 1.ps[12.34,56= .78,0.0,0.0] +cvtpi2pd mm.sd[1234,5678] xmm.pd[1.1,2.2] =3D> 1.pd[1234.0,5678.0] +cvtpi2pd m64.sd[1234,5678] xmm.pd[1.1,2.2] =3D> 1.pd[1234.0,5678.0] +cvtps2dq xmm.ps[12.34,56.78,43.21,87.65] xmm.sd[1,2,3,4] =3D> 1.sd[12,57= ,43,88] +cvtps2dq m128.ps[12.34,56.78,43.21,87.65] xmm.sd[1,2,3,4] =3D> 1.sd[12,5= 7,43,88] +cvtps2pd xmm.ps[12.34,56.78,1.1,2.2] xmm.pd[3.3,4.4] =3D> 1.pd[12.34,56.= 78] +cvtps2pd m128.ps[12.34,56.78,1.1,2.2] xmm.pd[3.3,4.4] =3D> 1.pd[12.34,56= .78] +cvtsd2si xmm.pd[12.34,56.78] r32.sd[99] =3D> 1.sd[12] +cvtsd2si m128.pd[56.78,12.34] r32.sd[99] =3D> 1.sd[57] +cvtsd2ss xmm.pd[12.34,56.78] xmm.ps[1.11,2.22,3.33,4.44] =3D> 1.ps[12.34= ,2.22,3.33,4.44] +cvtsd2ss m128.pd[12.34,56.78] xmm.ps[1.11,2.22,3.33,4.44] =3D> 1.ps[12.3= 4,2.22,3.33,4.44] +cvtsi2sd r32.sd[12] xmm.pd[1.11,2.22] =3D> 1.pd[12.0,2.22] +cvtsi2sd m32.sd[12] xmm.pd[1.11,2.22] =3D> 1.pd[12.0,2.22] +cvtss2sd xmm.ps[12.34,3.33,4.44,5.55] xmm.pd[1.11,2.22] =3D> 1.pd[12.34,= 2.22] +cvtss2sd m128.ps[12.34,3.33,4.44,5.55] xmm.pd[1.11,2.22] =3D> 1.pd[12.34= ,2.22] +cvttpd2pi xmm.pd[12.34,56.78] mm.sd[1,2] =3D> 1.sd[12,56] +cvttpd2pi m128.pd[12.34,56.78] mm.sd[1,2] =3D> 1.sd[12,56] +cvttpd2dq xmm.pd[12.34,56.78] xmm.sd[1,2,3,4] =3D> 1.sd[12,56,0,0] +cvttpd2dq m128.pd[12.34,56.78] xmm.sd[1,2,3,4] =3D> 1.sd[12,56,0,0] +cvttps2dq xmm.ps[12.34,56.78,43.21,87.65] xmm.sd[1,2,3,4] =3D> 1.sd[12,5= 6,43,87] +cvttps2dq m128.ps[12.34,56.78,43.21,87.65] xmm.sd[1,2,3,4] =3D> 1.sd[12,= 56,43,87] +cvttsd2si xmm.pd[12.34,56.78] r32.sd[99] =3D> 1.sd[12] +cvttsd2si m128.pd[56.78,12.34] r32.sd[99] =3D> 1.sd[56] +divpd xmm.pd[2.0,3.0] xmm.pd[24.68,3.69] =3D> 1.pd[12.34,1.23] +divpd m128.pd[2.0,3.0] xmm.pd[24.68,3.69] =3D> 1.pd[12.34,1.23] +divsd xmm.pd[2.0,3.0] xmm.pd[24.68,3.69] =3D> 1.pd[12.34,3.69] +divsd m128.pd[2.0,3.0] xmm.pd[24.68,3.69] =3D> 1.pd[12.34,3.69] +lfence +maxpd xmm.pd[22.222,44.444] xmm.pd[55.555,33.333] =3D> 1.pd[55.555,44.44= 4] +maxpd m128.pd[22.222,44.444] xmm.pd[55.555,33.333] =3D> 1.pd[55.555,44.4= 44] +maxsd xmm.pd[22.222,44.444] xmm.pd[55.555,33.333] =3D> 1.pd[55.555,33.33= 3] +maxsd m128.pd[44.444,22.222] xmm.pd[33.333,55.555] =3D> 1.pd[44.444,55.5= 55] +mfence +minpd xmm.pd[22.222,44.444] xmm.pd[55.555,33.333] =3D> 1.pd[22.222,33.33= 3] +minpd m128.pd[22.222,44.444] xmm.pd[55.555,33.333] =3D> 1.pd[22.222,33.3= 33] +minsd xmm.pd[22.222,44.444] xmm.pd[55.555,33.333] =3D> 1.pd[22.222,33.33= 3] +minsd m128.pd[44.444,22.222] xmm.pd[33.333,55.555] =3D> 1.pd[33.333,55.5= 55] +movapd xmm.pd[1234.5678,8765.4321] xmm.pd[1111.1111,2222.2222] =3D> 1.pd= [1234.5678,8765.4321] +movapd m128.pd[1234.5678,8765.4321] xmm.pd[1111.1111,2222.2222] =3D> 1.p= d[1234.5678,8765.4321] +movd r32.sd[1234] xmm.sd[1111,2222,3333,4444] =3D> 1.sd[1234,0,0,0] +movd m32.sd[1234] xmm.sd[1111,2222,3333,4444] =3D> 1.sd[1234,0,0,0] +movd xmm.sd[1234,2222,3333,4444] r32.sd[1111] =3D> 1.sd[1234] +movd xmm.sd[1234,2222,3333,4444] m32.sd[1111] =3D> 1.sd[1234] +movdqa xmm.uq[0x012345678abcdef,0xfedcba9876543210] xmm.uq[0x12121212343= 43434,0x5656565678787878] =3D> 1.uq[0x012345678abcdef,0xfedcba9876543210] +movdqa m128.uq[0x012345678abcdef,0xfedcba9876543210] xmm.uq[0x1212121234= 343434,0x5656565678787878] =3D> 1.uq[0x012345678abcdef,0xfedcba9876543210= ] +movdqa xmm.uq[0x012345678abcdef,0xfedcba9876543210] m128.uq[0x1212121234= 343434,0x5656565678787878] =3D> 1.uq[0x012345678abcdef,0xfedcba9876543210= ] +movdqu xmm.uq[0x012345678abcdef,0xfedcba9876543210] xmm.uq[0x12121212343= 43434,0x5656565678787878] =3D> 1.uq[0x012345678abcdef,0xfedcba9876543210] +movdqu m128.uq[0x012345678abcdef,0xfedcba9876543210] xmm.uq[0x1212121234= 343434,0x5656565678787878] =3D> 1.uq[0x012345678abcdef,0xfedcba9876543210= ] +movdqu xmm.uq[0x012345678abcdef,0xfedcba9876543210] m128.uq[0x1212121234= 343434,0x5656565678787878] =3D> 1.uq[0x012345678abcdef,0xfedcba9876543210= ] +movdq2q xmm.uq[0x012345678abcdef,0xfedcba9876543210] mm.uq[0x12121212343= 43434] =3D> 1.uq[0x012345678abcdef] +movhpd m64.pd[1234.5678] xmm.pd[1111.1111,2222.2222] =3D> 1.pd[1111.1111= ,1234.5678] +movhpd xmm.pd[1234.5678,8765.4321] m64.pd[1111.1111] =3D> 1.pd[8765.4321= ] +movlpd m64.pd[1234.5678] xmm.pd[1111.1111,2222.2222] =3D> 1.pd[1234.5678= ,2222.2222] +movlpd xmm.pd[1234.5678,8765.4321] m64.pd[1111.1111] =3D> 1.pd[1234.5678= ] +movmskpd xmm.pd[1234.5678,-1234.5678] r32.sd[0] =3D> 1.sd[2] +movntdq xmm.uq[0x012345678abcdef,0xfedcba9876543210] m128.uq[0x121212123= 4343434,0x5656565678787878] =3D> 1.uq[0x012345678abcdef,0xfedcba987654321= 0] +movnti r32.sd[12345678] m32.sd[11111111] =3D> 1.sd[12345678] +movntpd xmm.pd[1234.5678,8765.4321] m128.pd[1111.1111,2222.2222] =3D> 1.= pd[1234.5678,8765.4321] +movq2dq mm.uq[0x012345678abcdef] xmm.uq[0x1212121234343434,0x56565656787= 87878] =3D> 1.uq[0x012345678abcdef,0] +movsd xmm.pd[1234.5678,8765.4321] xmm.pd[1111.1111,2222.2222] =3D> 1.pd[= 1234.5678,2222.2222] +movsd m64.pd[1234.5678] xmm.pd[1111.1111,2222.2222] =3D> 1.pd[1234.5678,= 0.0] +movsd xmm.pd[1234.5678,8765.4321] m64.pd[1111.1111] =3D> 1.pd[1234.5678] +movupd xmm.pd[1234.5678,8765.4321] xmm.pd[1111.1111,2222.2222] =3D> 1.pd= [1234.5678,8765.4321] +movupd m128.pd[1234.5678,8765.4321] xmm.pd[1111.1111,2222.2222] =3D> 1.p= d[1234.5678,8765.4321] +mulpd xmm.pd[1234.5678,8765.4321] xmm.pd[3.0,2.0] =3D> 1.pd[3703.7034,17= 530.8642] +mulpd m128.pd[1234.5678,8765.4321] xmm.pd[3.0,2.0] =3D> 1.pd[3703.7034,1= 7530.8642] +mulsd xmm.pd[1234.5678,8765.4321] xmm.pd[3.0,2.0] =3D> 1.pd[3703.7034,2.= 0] +mulsd m128.pd[1234.5678,8765.4321] xmm.pd[3.0,2.0] =3D> 1.pd[3703.7034,2= .0] +orpd xmm.uq[0xfdb97531eca86420,0x0123456789abcdef] xmm.uq[0x0123456789ab= cdef,0xfdb97531eca86420] =3D> 1.uq[0xfdbb7577edabedef,0xfdbb7577edabedef] +orpd m128.uq[0xfdb97531eca86420,0x0123456789abcdef] xmm.uq[0x0123456789a= bcdef,0xfdb97531eca86420] =3D> 1.uq[0xfdbb7577edabedef,0xfdbb7577edabedef= ] +packssdw xmm.sd[12345,-12345,123456,-123456] xmm.sd[4321,-4321,54321,-54= 321] =3D> 1.sw[4321,-4321,32767,-32768,12345,-12345,32767,-32768] +packssdw m128.sd[12345,-12345,123456,-123456] xmm.sd[4321,-4321,54321,-5= 4321] =3D> 1.sw[4321,-4321,32767,-32768,12345,-12345,32767,-32768] +packsswb xmm.sw[123,-123,1234,-1234,123,-123,1234,-1234] xmm.sw[21,-21,3= 21,-321,21,-21,321,-321] =3D> 1.sb[21,-21,127,-128,21,-21,127,-128,123,-1= 23,127,-128,123,-123,127,-128] +packsswb m128.sw[123,-123,1234,-1234,123,-123,1234,-1234] xmm.sw[21,-21,= 321,-321,21,-21,321,-321] =3D> 1.sb[21,-21,127,-128,21,-21,127,-128,123,-= 123,127,-128,123,-123,127,-128] +packuswb xmm.sw[123,-123,1234,-1234,123,-123,1234,-1234] xmm.sw[21,-21,3= 21,-321,21,-21,321,-321] =3D> 1.ub[21,0,255,0,21,0,255,0,123,0,255,0,123,= 0,255,0] +packuswb m128.sw[123,-123,1234,-1234,123,-123,1234,-1234] xmm.sw[21,-21,= 321,-321,21,-21,321,-321] =3D> 1.ub[21,0,255,0,21,0,255,0,123,0,255,0,123= ,0,255,0] +paddb xmm.sb[12,34,56,78,21,43,65,87,12,34,56,78,21,43,65,87] xmm.sb[8,7= ,6,5,4,3,2,1,8,7,6,5,4,3,2,1] =3D> 1.sb[20,41,62,83,25,46,67,88,20,41,62,= 83,25,46,67,88] +paddb m128.sb[12,34,56,78,21,43,65,87,12,34,56,78,21,43,65,87] xmm.sb[8,= 7,6,5,4,3,2,1,8,7,6,5,4,3,2,1] =3D> 1.sb[20,41,62,83,25,46,67,88,20,41,62= ,83,25,46,67,88] +paddd xmm.sd[12345678,87654321,12345678,87654321] xmm.sd[8765,4321,8765,= 4321] =3D> 1.sd[12354443,87658642,12354443,87658642] +paddd m128.sd[12345678,87654321,12345678,87654321] xmm.sd[8765,4321,8765= ,4321] =3D> 1.sd[12354443,87658642,12354443,87658642] +paddq mm.sq[11111111] mm.sq[22222222] =3D> 1.sq[33333333] +paddq m64.sq[11111111] mm.sq[22222222] =3D> 1.sq[33333333] +paddq xmm.sq[11111111,22222222] xmm.sq[22222222,33333333] =3D> 1.sq[3333= 3333,55555555] +paddq m128.sq[11111111,22222222] xmm.sq[22222222,33333333] =3D> 1.sq[333= 33333,55555555] +paddsb xmm.sb[25,-25,50,-50,100,-100,125,-125,25,-25,50,-50,100,-100,125= ,-125] xmm.sb[40,-40,30,-30,20,-20,10,-10,40,-40,30,-30,20,-20,10,-10] =3D= > 1.sb[65,-65,80,-80,120,-120,127,-128,65,-65,80,-80,120,-120,127,-128] +paddsb m128.sb[25,-25,50,-50,100,-100,125,-125,25,-25,50,-50,100,-100,12= 5,-125] xmm.sb[40,-40,30,-30,20,-20,10,-10,40,-40,30,-30,20,-20,10,-10] =3D= > 1.sb[65,-65,80,-80,120,-120,127,-128,65,-65,80,-80,120,-120,127,-128] +paddsw xmm.sw[12345,-12345,32145,-32145,12345,-12345,32145,-32145] xmm.s= w[32145,-32145,-12345,12345,32145,-32145,-12345,12345] =3D> 1.sw[32767,-3= 2768,19800,-19800,32767,-32768,19800,-19800] +paddsw m128.sw[12345,-12345,32145,-32145,12345,-12345,32145,-32145] xmm.= sw[32145,-32145,-12345,12345,32145,-32145,-12345,12345] =3D> 1.sw[32767,-= 32768,19800,-19800,32767,-32768,19800,-19800] +paddusb xmm.ub[25,50,75,100,125,150,175,200,25,50,75,100,125,150,175,200= ] xmm.ub[10,20,30,40,50,60,70,80,10,20,30,40,50,60,70,80] =3D> 1.ub[35,70= ,105,140,175,210,245,255,35,70,105,140,175,210,245,255] +paddusb m128.ub[25,50,75,100,125,150,175,200,25,50,75,100,125,150,175,20= 0] xmm.ub[10,20,30,40,50,60,70,80,10,20,30,40,50,60,70,80] =3D> 1.ub[35,7= 0,105,140,175,210,245,255,35,70,105,140,175,210,245,255] +paddusw xmm.uw[22222,33333,44444,55555,22222,33333,44444,55555] xmm.uw[6= 666,7777,8888,9999,6666,7777,8888,9999] =3D> 1.uw[28888,41110,53332,65535= ,28888,41110,53332,65535] +paddusw m128.uw[22222,33333,44444,55555,22222,33333,44444,55555] xmm.uw[= 6666,7777,8888,9999,6666,7777,8888,9999] =3D> 1.uw[28888,41110,53332,6553= 5,28888,41110,53332,65535] +paddw xmm.sw[1234,5678,4321,8765,1234,5678,4321,8765] xmm.sw[87,65,43,21= ,87,65,43,21] =3D> 1.sw[1321,5743,4364,8786,1321,5743,4364,8786] +paddw m128.sw[1234,5678,4321,8765,1234,5678,4321,8765] xmm.sw[87,65,43,2= 1,87,65,43,21] =3D> 1.sw[1321,5743,4364,8786,1321,5743,4364,8786] +pand xmm.uq[0xfdb97531eca86420,0x0123456789abcdef] xmm.uq[0x0123456789ab= cdef,0xfdb97531eca86420] =3D> 1.uq[0x0121452188a84420,0x0121452188a84420] +pand m128.uq[0xfdb97531eca86420,0x0123456789abcdef] xmm.uq[0x0123456789a= bcdef,0xfdb97531eca86420] =3D> 1.uq[0x0121452188a84420,0x0121452188a84420= ] +pandn xmm.uq[0xfdb97531eca86420,0x0123456789abcdef] xmm.uq[0x0123456789a= bcdef,0xfdb97531eca86420] =3D> 1.uq[0xfc98301064002000,0x00020046010389cf= ] +pandn m128.uq[0xfdb97531eca86420,0x0123456789abcdef] xmm.uq[0x0123456789= abcdef,0xfdb97531eca86420] =3D> 1.uq[0xfc98301064002000,0x00020046010389c= f] +pavgb xmm.ub[11,22,33,44,55,66,77,88,11,22,33,44,55,66,77,88] xmm.ub[15,= 25,35,45,55,65,75,85,15,25,35,45,55,65,75,85] =3D> 1.ub[13,24,34,45,55,66= ,76,87,13,24,34,45,55,66,76,87] +pavgb m128.ub[11,22,33,44,55,66,77,88,11,22,33,44,55,66,77,88] xmm.ub[15= ,25,35,45,55,65,75,85,15,25,35,45,55,65,75,85] =3D> 1.ub[13,24,34,45,55,6= 6,76,87,13,24,34,45,55,66,76,87] +pavgw xmm.uw[1122,3344,5566,7788,1122,3344,5566,7788] xmm.uw[1525,3545,5= 565,7585,1525,3545,5565,7585] =3D> 1.uw[1324,3445,5566,7687,1324,3445,556= 6,7687] +pavgw m128.uw[1122,3344,5566,7788,1122,3344,5566,7788] xmm.uw[1525,3545,= 5565,7585,1525,3545,5565,7585] =3D> 1.uw[1324,3445,5566,7687,1324,3445,55= 66,7687] +pcmpeqb xmm.ub[11,22,33,44,55,66,77,88,11,22,33,44,55,66,77,88] xmm.ub[1= 1,11,33,33,55,55,77,77,11,11,33,33,55,55,77,77] =3D> 1.ub[0xff,0x00,0xff,= 0x00,0xff,0x00,0xff,0x00,0xff,0x00,0xff,0x00,0xff,0x00,0xff,0x00] +pcmpeqb m128.ub[11,22,33,44,55,66,77,88,11,22,33,44,55,66,77,88] xmm.ub[= 11,11,33,33,55,55,77,77,11,11,33,33,55,55,77,77] =3D> 1.ub[0xff,0x00,0xff= ,0x00,0xff,0x00,0xff,0x00,0xff,0x00,0xff,0x00,0xff,0x00,0xff,0x00] +pcmpeqd xmm.ud[11223344,55667788,11223344,55667788] xmm.ud[11223344,1122= 3344,11223344,11223344] =3D> 1.ud[0xffffffff,0x00000000,0xffffffff,0x0000= 0000] +pcmpeqd m128.ud[11223344,55667788,11223344,55667788] xmm.ud[11223344,112= 23344,11223344,11223344] =3D> 1.ud[0xffffffff,0x00000000,0xffffffff,0x000= 00000] +pcmpeqw xmm.uw[1122,3344,5566,7788,1122,3344,5566,7788] xmm.uw[1122,1122= ,5566,5566,1122,1122,5566,5566] =3D> 1.uw[0xffff,0x0000,0xffff,0x0000,0xf= fff,0x0000,0xffff,0x0000] +pcmpeqw m128.uw[1122,3344,5566,7788,1122,3344,5566,7788] xmm.uw[1122,112= 2,5566,5566,1122,1122,5566,5566] =3D> 1.uw[0xffff,0x0000,0xffff,0x0000,0x= ffff,0x0000,0xffff,0x0000] +pcmpgtb xmm.sb[-77,-55,-33,-11,11,33,55,77,-77,-55,-33,-11,11,33,55,77] = xmm.sb[77,55,33,11,-11,-33,-55,-77,77,55,33,11,-11,-33,-55,-77] =3D> 1.ub= [0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,0x00,0x00,0x= 00,0x00] +pcmpgtb m128.sb[-77,-55,-33,-11,11,33,55,77,-77,-55,-33,-11,11,33,55,77]= xmm.sb[77,55,33,11,-11,-33,-55,-77,77,55,33,11,-11,-33,-55,-77] =3D> 1.u= b[0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,0x00,0x00,0= x00,0x00] +pcmpgtd xmm.sd[-11111111,11111111,-11111111,11111111] xmm.sd[11111111,-1= 1111111,11111111,-11111111] =3D> 1.ud[0xffffffff,0x00000000,0xffffffff,0x= 00000000] +pcmpgtd m128.sd[-11111111,11111111,-11111111,11111111] xmm.sd[11111111,-= 11111111,11111111,-11111111] =3D> 1.ud[0xffffffff,0x00000000,0xffffffff,0= x00000000] +pcmpgtw xmm.sw[-3333,-1111,1111,3333,-3333,-1111,1111,3333] xmm.sw[3333,= 1111,-1111,-3333,3333,1111,-1111,-3333] =3D> 1.uw[0xffff,0xffff,0x0000,0x= 0000,0xffff,0xffff,0x0000,0x0000] +pcmpgtw m128.sw[-3333,-1111,1111,3333,-3333,-1111,1111,3333] xmm.sw[3333= ,1111,-1111,-3333,3333,1111,-1111,-3333] =3D> 1.uw[0xffff,0xffff,0x0000,0= x0000,0xffff,0xffff,0x0000,0x0000] +pextrw imm8[0] xmm.uw[1234,5678,4321,8765,1111,2222,3333,4444] r32.ud[0x= ffffffff] =3D> 2.ud[1234] +pextrw imm8[1] xmm.uw[1234,5678,4321,8765,1111,2222,3333,4444] r32.ud[0x= ffffffff] =3D> 2.ud[5678] +pextrw imm8[2] xmm.uw[1234,5678,4321,8765,1111,2222,3333,4444] r32.ud[0x= ffffffff] =3D> 2.ud[4321] +pextrw imm8[3] xmm.uw[1234,5678,4321,8765,1111,2222,3333,4444] r32.ud[0x= ffffffff] =3D> 2.ud[8765] +pextrw imm8[4] xmm.uw[1234,5678,4321,8765,1111,2222,3333,4444] r32.ud[0x= ffffffff] =3D> 2.ud[1111] +pextrw imm8[5] xmm.uw[1234,5678,4321,8765,1111,2222,3333,4444] r32.ud[0x= ffffffff] =3D> 2.ud[2222] +pextrw imm8[6] xmm.uw[1234,5678,4321,8765,1111,2222,3333,4444] r32.ud[0x= ffffffff] =3D> 2.ud[3333] +pextrw imm8[7] xmm.uw[1234,5678,4321,8765,1111,2222,3333,4444] r32.ud[0x= ffffffff] =3D> 2.ud[4444] +pinsrw imm8[0] r32.ud[0xffffffff] xmm.uw[1234,5678,4321,8765,1111,2222,3= 333,4444] =3D> 2.uw[65535,5678,4321,8765,1111,2222,3333,4444] +pinsrw imm8[1] r32.ud[0xffffffff] xmm.uw[1234,5678,4321,8765,1111,2222,3= 333,4444] =3D> 2.uw[1234,65535,4321,8765,1111,2222,3333,4444] +pinsrw imm8[2] r32.ud[0xffffffff] xmm.uw[1234,5678,4321,8765,1111,2222,3= 333,4444] =3D> 2.uw[1234,5678,65535,8765,1111,2222,3333,4444] +pinsrw imm8[3] r32.ud[0xffffffff] xmm.uw[1234,5678,4321,8765,1111,2222,3= 333,4444] =3D> 2.uw[1234,5678,4321,65535,1111,2222,3333,4444] +pinsrw imm8[4] r32.ud[0xffffffff] xmm.uw[1234,5678,4321,8765,1111,2222,3= 333,4444] =3D> 2.uw[1234,5678,4321,8765,65535,2222,3333,4444] +pinsrw imm8[5] r32.ud[0xffffffff] xmm.uw[1234,5678,4321,8765,1111,2222,3= 333,4444] =3D> 2.uw[1234,5678,4321,8765,1111,65535,3333,4444] +pinsrw imm8[6] r32.ud[0xffffffff] xmm.uw[1234,5678,4321,8765,1111,2222,3= 333,4444] =3D> 2.uw[1234,5678,4321,8765,1111,2222,65535,4444] +pinsrw imm8[7] r32.ud[0xffffffff] xmm.uw[1234,5678,4321,8765,1111,2222,3= 333,4444] =3D> 2.uw[1234,5678,4321,8765,1111,2222,3333,65535] +pinsrw imm8[0] m16.uw[0xffff] xmm.uw[1234,5678,4321,8765,1111,2222,3333,= 4444] =3D> 2.uw[65535,5678,4321,8765,1111,2222,3333,4444] +pinsrw imm8[1] m16.uw[0xffff] xmm.uw[1234,5678,4321,8765,1111,2222,3333,= 4444] =3D> 2.uw[1234,65535,4321,8765,1111,2222,3333,4444] +pinsrw imm8[2] m16.uw[0xffff] xmm.uw[1234,5678,4321,8765,1111,2222,3333,= 4444] =3D> 2.uw[1234,5678,65535,8765,1111,2222,3333,4444] +pinsrw imm8[3] m16.uw[0xffff] xmm.uw[1234,5678,4321,8765,1111,2222,3333,= 4444] =3D> 2.uw[1234,5678,4321,65535,1111,2222,3333,4444] +pinsrw imm8[4] m16.uw[0xffff] xmm.uw[1234,5678,4321,8765,1111,2222,3333,= 4444] =3D> 2.uw[1234,5678,4321,8765,65535,2222,3333,4444] +pinsrw imm8[5] m16.uw[0xffff] xmm.uw[1234,5678,4321,8765,1111,2222,3333,= 4444] =3D> 2.uw[1234,5678,4321,8765,1111,65535,3333,4444] +pinsrw imm8[6] m16.uw[0xffff] xmm.uw[1234,5678,4321,8765,1111,2222,3333,= 4444] =3D> 2.uw[1234,5678,4321,8765,1111,2222,65535,4444] +pinsrw imm8[7] m16.uw[0xffff] xmm.uw[1234,5678,4321,8765,1111,2222,3333,= 4444] =3D> 2.uw[1234,5678,4321,8765,1111,2222,3333,65535] +pmaxsw xmm.sw[-1,2,-3,4,-5,6,-7,8] xmm.sw[2,-3,4,-5,6,-7,8,-9] =3D> 1.sw= [2,2,4,4,6,6,8,8] +pmaxsw m128.sw[-1,2,-3,4,-5,6,-7,8] xmm.sw[2,-3,4,-5,6,-7,8,-9] =3D> 1.s= w[2,2,4,4,6,6,8,8] +pmaxub xmm.ub[10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25] xmm.ub[25= ,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10] =3D> 1.ub[25,24,23,22,21,2= 0,19,18,18,19,20,21,22,23,24,25] +pmaxub m128.ub[10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25] xmm.ub[2= 5,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10] =3D> 1.ub[25,24,23,22,21,= 20,19,18,18,19,20,21,22,23,24,25] +pminsw xmm.sw[-1,2,-3,4,-5,6,-7,8] xmm.sw[2,-3,4,-5,6,-7,8,-9] =3D> 1.sw= [-1,-3,-3,-5,-5,-7,-7,-9] +pminsw m128.sw[-1,2,-3,4,-5,6,-7,8] xmm.sw[2,-3,4,-5,6,-7,8,-9] =3D> 1.s= w[-1,-3,-3,-5,-5,-7,-7,-9] +pminub xmm.ub[10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25] xmm.ub[25= ,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10] =3D> 1.ub[10,11,12,13,14,1= 5,16,17,17,16,15,14,13,12,11,10] +pminub m128.ub[10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25] xmm.ub[2= 5,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10] =3D> 1.ub[10,11,12,13,14,= 15,16,17,17,16,15,14,13,12,11,10] +pmovmskb xmm.uq[0x8000000080008088,0x8000000080008088] r32.ud[0] =3D> 1.= ud[0x8b8b] +pmulhuw xmm.uw[1111,2222,3333,4444,5555,6666,7777,8888] xmm.uw[5555,6666= ,7777,8888,9999,1111,2222,3333] =3D> 1.uw[0x005e,0x00e2,0x018b,0x025a,0x0= 34f,0x0071,0x0107,0x01c4] +pmulhuw m128.uw[1111,2222,3333,4444,5555,6666,7777,8888] xmm.uw[5555,666= 6,7777,8888,9999,1111,2222,3333] =3D> 1.uw[0x005e,0x00e2,0x018b,0x025a,0x= 034f,0x0071,0x0107,0x01c4] +pmulhw xmm.sw[1111,2222,-1111,-2222,1111,2222,-1111,-2222] xmm.sw[3333,-= 4444,3333,-4444,3333,-4444,3333,-4444] =3D> 1.uw[0x0038,0xff69,0xffc7,0x0= 096,0x0038,0xff69,0xffc7,0x0096] +pmulhw m128.sw[1111,2222,-1111,-2222,1111,2222,-1111,-2222] xmm.sw[3333,= -4444,3333,-4444,3333,-4444,3333,-4444] =3D> 1.uw[0x0038,0xff69,0xffc7,0x= 0096,0x0038,0xff69,0xffc7,0x0096] +pmullw xmm.sw[1111,2222,-1111,-2222,1111,2222,-1111,-2222] xmm.sw[3333,-= 4444,3333,-4444,3333,-4444,3333,-4444] =3D> 1.uw[0x80b3,0x5378,0x7f4d,0xa= c88,0x80b3,0x5378,0x7f4d,0xac88] +pmullw m128.sw[1111,2222,-1111,-2222,1111,2222,-1111,-2222] xmm.sw[3333,= -4444,3333,-4444,3333,-4444,3333,-4444] =3D> 1.uw[0x80b3,0x5378,0x7f4d,0x= ac88,0x80b3,0x5378,0x7f4d,0xac88] +pmuludq mm.ud[12345678,0] mm.ud[87654321,0] =3D> 1.uq[1082152022374638] +pmuludq m64.ud[12345678,0] mm.ud[87654321,0] =3D> 1.uq[1082152022374638] +pmuludq xmm.ud[12345678,0,87654321,0] xmm.ud[87654321,0,12345678,0] =3D>= 1.uq[1082152022374638,1082152022374638] +pmuludq m128.ud[12345678,0,87654321,0] xmm.ud[87654321,0,12345678,0] =3D= > 1.uq[1082152022374638,1082152022374638] +por xmm.uq[0xfdb97531eca86420,0x0123456789abcdef] xmm.uq[0x0123456789abc= def,0xfdb97531eca86420] =3D> 1.uq[0xfdbb7577edabedef,0xfdbb7577edabedef] +por m128.uq[0xfdb97531eca86420,0x0123456789abcdef] xmm.uq[0x0123456789ab= cdef,0xfdb97531eca86420] =3D> 1.uq[0xfdbb7577edabedef,0xfdbb7577edabedef] +#####psadbw xmm.ub[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16] xmm.ub[16,15,= 14,13,12,11,10,9,8,7,6,5,4,3,2,1] =3D> 1.sw[64,0,0,0,64,0,0,0] +#####psadbw m128.ub[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16] xmm.ub[16,15= ,14,13,12,11,10,9,8,7,6,5,4,3,2,1] =3D> 1.sw[64,0,0,0,64,0,0,0] +pshufd imm8[0x1b] xmm.sd[1122,3344,5566,7788] xmm.sd[0,0,0,0] =3D> 2.sd[= 7788,5566,3344,1122] +pshufd imm8[0x1b] m128.sd[1122,3344,5566,7788] xmm.sd[0,0,0,0] =3D> 2.sd= [7788,5566,3344,1122] +pshufhw imm8[0x1b] xmm.sw[11,22,33,44,55,66,77,88] xmm.sw[0,0,0,0,0,0,0,= 0] =3D> 2.sw[11,22,33,44,88,77,66,55] +pshufhw imm8[0x1b] m128.sw[11,22,33,44,55,66,77,88] xmm.sw[0,0,0,0,0,0,0= ,0] =3D> 2.sw[11,22,33,44,88,77,66,55] +pshuflw imm8[0x1b] xmm.sw[11,22,33,44,55,66,77,88] xmm.sw[0,0,0,0,0,0,0,= 0] =3D> 2.sw[44,33,22,11,55,66,77,88] +pshuflw imm8[0x1b] m128.sw[11,22,33,44,55,66,77,88] xmm.sw[0,0,0,0,0,0,0= ,0] =3D> 2.sw[44,33,22,11,55,66,77,88] +pslld imm8[4] xmm.ud[0x01234567,0x89abcdef,0x01234567,0x89abcdef] =3D> 1= .ud[0x12345670,0x9abcdef0,0x12345670,0x9abcdef0] +pslld xmm.uq[4,0] xmm.ud[0x01234567,0x89abcdef,0x01234567,0x89abcdef] =3D= > 1.ud[0x12345670,0x9abcdef0,0x12345670,0x9abcdef0] +pslld m128.uq[4,0] xmm.ud[0x01234567,0x89abcdef,0x01234567,0x89abcdef] =3D= > 1.ud[0x12345670,0x9abcdef0,0x12345670,0x9abcdef0] +pslldq imm8[0] xmm.uq[0x8899aabbccddeeff,0x0011223344556677] =3D> 1.uq[= 0x8899aabbccddeeff,0x0011223344556677] +pslldq imm8[1] xmm.uq[0x8899aabbccddeeff,0x0011223344556677] =3D> 1.uq[= 0x99aabbccddeeff00,0x1122334455667788] +pslldq imm8[2] xmm.uq[0x8899aabbccddeeff,0x0011223344556677] =3D> 1.uq[= 0xaabbccddeeff0000,0x2233445566778899] +pslldq imm8[3] xmm.uq[0x8899aabbccddeeff,0x0011223344556677] =3D> 1.uq[= 0xbbccddeeff000000,0x33445566778899aa] +pslldq imm8[4] xmm.uq[0x8899aabbccddeeff,0x0011223344556677] =3D> 1.uq[= 0xccddeeff00000000,0x445566778899aabb] +pslldq imm8[5] xmm.uq[0x8899aabbccddeeff,0x0011223344556677] =3D> 1.uq[= 0xddeeff0000000000,0x5566778899aabbcc] +pslldq imm8[6] xmm.uq[0x8899aabbccddeeff,0x0011223344556677] =3D> 1.uq[= 0xeeff000000000000,0x66778899aabbccdd] +pslldq imm8[7] xmm.uq[0x8899aabbccddeeff,0x0011223344556677] =3D> 1.uq[= 0xff00000000000000,0x778899aabbccddee] +pslldq imm8[8] xmm.uq[0x8899aabbccddeeff,0x0011223344556677] =3D> 1.uq[= 0x0000000000000000,0x8899aabbccddeeff] +pslldq imm8[9] xmm.uq[0x8899aabbccddeeff,0x0011223344556677] =3D> 1.uq[= 0x0000000000000000,0x99aabbccddeeff00] +pslldq imm8[10] xmm.uq[0x8899aabbccddeeff,0x0011223344556677] =3D> 1.uq[= 0x0000000000000000,0xaabbccddeeff0000] +pslldq imm8[11] xmm.uq[0x8899aabbccddeeff,0x0011223344556677] =3D> 1.uq[= 0x0000000000000000,0xbbccddeeff000000] +pslldq imm8[12] xmm.uq[0x8899aabbccddeeff,0x0011223344556677] =3D> 1.uq[= 0x0000000000000000,0xccddeeff00000000] +pslldq imm8[13] xmm.uq[0x8899aabbccddeeff,0x0011223344556677] =3D> 1.uq[= 0x0000000000000000,0xddeeff0000000000] +pslldq imm8[14] xmm.uq[0x8899aabbccddeeff,0x0011223344556677] =3D> 1.uq[= 0x0000000000000000,0xeeff000000000000] +pslldq imm8[15] xmm.uq[0x8899aabbccddeeff,0x0011223344556677] =3D> 1.uq[= 0x0000000000000000,0xff00000000000000] +pslldq imm8[16] xmm.uq[0x8899aabbccddeeff,0x0011223344556677] =3D> 1.uq[= 0x0000000000000000,0x0000000000000000] +psllq imm8[4] xmm.uq[0x0123456789abcdef,0x0123456789abcdef] =3D> 1.uq[0x= 123456789abcdef0,0x123456789abcdef0] +psllq xmm.uq[4,0] xmm.uq[0x0123456789abcdef,0x0123456789abcdef] =3D> 1.u= q[0x123456789abcdef0,0x123456789abcdef0] +psllq m128.uq[4,0] xmm.uq[0x0123456789abcdef,0x0123456789abcdef] =3D> 1.= uq[0x123456789abcdef0,0x123456789abcdef0] +psllw imm8[4] xmm.uw[0x0123,0x4567,0x89ab,0xcdef,0x0123,0x4567,0x89ab,0x= cdef] =3D> 1.uw[0x1230,0x5670,0x9ab0,0xdef0,0x1230,0x5670,0x9ab0,0xdef0] +psllw xmm.uq[4,0] xmm.uw[0x0123,0x4567,0x89ab,0xcdef,0x0123,0x4567,0x89a= b,0xcdef] =3D> 1.uw[0x1230,0x5670,0x9ab0,0xdef0,0x1230,0x5670,0x9ab0,0xde= f0] +psllw m128.uq[4,0] xmm.uw[0x0123,0x4567,0x89ab,0xcdef,0x0123,0x4567,0x89= ab,0xcdef] =3D> 1.uw[0x1230,0x5670,0x9ab0,0xdef0,0x1230,0x5670,0x9ab0,0xd= ef0] +psrad imm8[4] xmm.ud[0x01234567,0x89abcdef,0x01234567,0x89abcdef] =3D> 1= .ud[0x00123456,0xf89abcde,0x00123456,0xf89abcde] +psrad xmm.uq[4,0] xmm.ud[0x01234567,0x89abcdef,0x01234567,0x89abcdef] =3D= > 1.ud[0x00123456,0xf89abcde,0x00123456,0xf89abcde] +psrad m128.uq[4,0] xmm.ud[0x01234567,0x89abcdef,0x01234567,0x89abcdef] =3D= > 1.ud[0x00123456,0xf89abcde,0x00123456,0xf89abcde] +psraw imm8[4] xmm.uw[0x0123,0x4567,0x89ab,0xcdef,0x0123,0x4567,0x89ab,0x= cdef] =3D> 1.uw[0x0012,0x0456,0xf89a,0xfcde,0x0012,0x0456,0xf89a,0xfcde] +psraw xmm.uq[4,0] xmm.uw[0x0123,0x4567,0x89ab,0xcdef,0x0123,0x4567,0x89a= b,0xcdef] =3D> 1.uw[0x0012,0x0456,0xf89a,0xfcde,0x0012,0x0456,0xf89a,0xfc= de] +psraw m128.uq[4,0] xmm.uw[0x0123,0x4567,0x89ab,0xcdef,0x0123,0x4567,0x89= ab,0xcdef] =3D> 1.uw[0x0012,0x0456,0xf89a,0xfcde,0x0012,0x0456,0xf89a,0xf= cde] +psrld imm8[4] xmm.ud[0x01234567,0x89abcdef,0x01234567,0x89abcdef] =3D> 1= .ud[0x00123456,0x089abcde,0x00123456,0x089abcde] +psrld xmm.uq[4,0] xmm.ud[0x01234567,0x89abcdef,0x01234567,0x89abcdef] =3D= > 1.ud[0x00123456,0x089abcde,0x00123456,0x089abcde] +psrld m128.uq[4,0] xmm.ud[0x01234567,0x89abcdef,0x01234567,0x89abcdef] =3D= > 1.ud[0x00123456,0x089abcde,0x00123456,0x089abcde] +psrldq imm8[0] xmm.uq[0x8899aabbccddeeff,0x9911223344556677] =3D> 1.uq[= 0x8899aabbccddeeff,0x9911223344556677] +psrldq imm8[1] xmm.uq[0x8899aabbccddeeff,0x9911223344556677] =3D> 1.uq[= 0x778899aabbccddee,0x0099112233445566] +psrldq imm8[2] xmm.uq[0x8899aabbccddeeff,0x9911223344556677] =3D> 1.uq[= 0x66778899aabbccdd,0x0000991122334455] +psrldq imm8[3] xmm.uq[0x8899aabbccddeeff,0x9911223344556677] =3D> 1.uq[= 0x5566778899aabbcc,0x0000009911223344] +psrldq imm8[4] xmm.uq[0x8899aabbccddeeff,0x9911223344556677] =3D> 1.uq[= 0x445566778899aabb,0x0000000099112233] +psrldq imm8[5] xmm.uq[0x8899aabbccddeeff,0x9911223344556677] =3D> 1.uq[= 0x33445566778899aa,0x0000000000991122] +psrldq imm8[6] xmm.uq[0x8899aabbccddeeff,0x9911223344556677] =3D> 1.uq[= 0x2233445566778899,0x0000000000009911] +psrldq imm8[7] xmm.uq[0x8899aabbccddeeff,0x9911223344556677] =3D> 1.uq[= 0x1122334455667788,0x0000000000000099] +psrldq imm8[8] xmm.uq[0x8899aabbccddeeff,0x9911223344556677] =3D> 1.uq[= 0x9911223344556677,0x0000000000000000] +psrldq imm8[9] xmm.uq[0x8899aabbccddeeff,0x9911223344556677] =3D> 1.uq[= 0x0099112233445566,0x0000000000000000] +psrldq imm8[10] xmm.uq[0x8899aabbccddeeff,0x9911223344556677] =3D> 1.uq[= 0x0000991122334455,0x0000000000000000] +psrldq imm8[11] xmm.uq[0x8899aabbccddeeff,0x9911223344556677] =3D> 1.uq[= 0x0000009911223344,0x0000000000000000] +psrldq imm8[12] xmm.uq[0x8899aabbccddeeff,0x9911223344556677] =3D> 1.uq[= 0x0000000099112233,0x0000000000000000] +psrldq imm8[13] xmm.uq[0x8899aabbccddeeff,0x9911223344556677] =3D> 1.uq[= 0x0000000000991122,0x0000000000000000] +psrldq imm8[14] xmm.uq[0x8899aabbccddeeff,0x9911223344556677] =3D> 1.uq[= 0x0000000000009911,0x0000000000000000] +psrldq imm8[15] xmm.uq[0x8899aabbccddeeff,0x9911223344556677] =3D> 1.uq[= 0x0000000000000099,0x0000000000000000] +psrldq imm8[16] xmm.uq[0x8899aabbccddeeff,0x9911223344556677] =3D> 1.uq[= 0x0000000000000000,0x0000000000000000] +psrlq imm8[4] xmm.uq[0x0123456789abcdef,0x0123456789abcdef] =3D> 1.uq[0x= 00123456789abcde,0x00123456789abcde] +psrlq xmm.uq[4,0] xmm.uq[0x0123456789abcdef,0x0123456789abcdef] =3D> 1.u= q[0x00123456789abcde,0x00123456789abcde] +psrlq m128.uq[4,0] xmm.uq[0x0123456789abcdef,0x0123456789abcdef] =3D> 1.= uq[0x00123456789abcde,0x00123456789abcde] +psrlw imm8[4] xmm.uw[0x0123,0x4567,0x89ab,0xcdef,0x0123,0x4567,0x89ab,0x= cdef] =3D> 1.uw[0x0012,0x0456,0x089a,0x0cde,0x0012,0x0456,0x089a,0x0cde] +psrlw xmm.uq[4,0] xmm.uw[0x0123,0x4567,0x89ab,0xcdef,0x0123,0x4567,0x89a= b,0xcdef] =3D> 1.uw[0x0012,0x0456,0x089a,0x0cde,0x0012,0x0456,0x089a,0x0c= de] +psrlw m128.uq[4,0] xmm.uw[0x0123,0x4567,0x89ab,0xcdef,0x0123,0x4567,0x89= ab,0xcdef] =3D> 1.uw[0x0012,0x0456,0x089a,0x0cde,0x0012,0x0456,0x089a,0x0= cde] +psubb xmm.sb[8,7,6,5,4,3,2,1,8,7,6,5,4,3,2,1] xmm.sb[12,34,56,78,21,43,6= 5,87,12,34,56,78,21,43,65,87] =3D> 1.sb[4,27,50,73,17,40,63,86,4,27,50,73= ,17,40,63,86] +psubb m128.sb[8,7,6,5,4,3,2,1,8,7,6,5,4,3,2,1] xmm.sb[12,34,56,78,21,43,= 65,87,12,34,56,78,21,43,65,87] =3D> 1.sb[4,27,50,73,17,40,63,86,4,27,50,7= 3,17,40,63,86] +psubd xmm.sd[8765,4321,8765,4321] xmm.sd[12345678,87654321,12345678,8765= 4321] =3D> 1.sd[12336913,87650000,12336913,87650000] +psubd m128.sd[8765,4321,8765,4321] xmm.sd[12345678,87654321,12345678,876= 54321] =3D> 1.sd[12336913,87650000,12336913,87650000] +psubq mm.sq[11111111] mm.sq[33333333] =3D> 1.sq[22222222] +psubq m64.sq[11111111] mm.sq[33333333] =3D> 1.sq[22222222] +psubq xmm.sq[11111111,22222222] xmm.sq[55555555,33333333] =3D> 1.sq[4444= 4444,11111111] +psubq m128.sq[11111111,22222222] xmm.sq[55555555,33333333] =3D> 1.sq[444= 44444,11111111] +psubsb xmm.sb[-50,50,-40,40,-30,30,-20,20,-50,50,-40,40,-30,30,-20,20] x= mm.sb[25,-25,50,-50,100,-100,125,-125,25,-25,50,-50,100,-100,125,-125] =3D= > 1.sb[75,-75,90,-90,127,-128,127,-128,75,-75,90,-90,127,-128,127,-128] +psubsb m128.sb[-50,50,-40,40,-30,30,-20,20,-50,50,-40,40,-30,30,-20,20] = xmm.sb[25,-25,50,-50,100,-100,125,-125,25,-25,50,-50,100,-100,125,-125] =3D= > 1.sb[75,-75,90,-90,127,-128,127,-128,75,-75,90,-90,127,-128,127,-128] +psubsw xmm.sw[-32145,32145,12345,-12345,-32145,32145,12345,-12345] xmm.s= w[12345,-12345,32145,-32145,12345,-12345,32145,-32145] =3D> 1.sw[32767,-3= 2768,19800,-19800,32767,-32768,19800,-19800] +psubsw m128.sw[-32145,32145,12345,-12345,-32145,32145,12345,-12345] xmm.= sw[12345,-12345,32145,-32145,12345,-12345,32145,-32145] =3D> 1.sw[32767,-= 32768,19800,-19800,32767,-32768,19800,-19800] +psubusb xmm.ub[11,22,33,44,55,66,77,88,11,22,33,44,55,66,77,88] xmm.ub[8= 8,77,66,55,44,33,22,11,88,77,66,55,44,33,22,11] =3D> 1.ub[77,55,33,11,0,0= ,0,0,77,55,33,11,0,0,0,0] +psubusb m128.ub[11,22,33,44,55,66,77,88,11,22,33,44,55,66,77,88] xmm.ub[= 88,77,66,55,44,33,22,11,88,77,66,55,44,33,22,11] =3D> 1.ub[77,55,33,11,0,= 0,0,0,77,55,33,11,0,0,0,0] +psubusw xmm.uw[1122,3344,5566,7788,1122,3344,5566,7788] xmm.uw[8877,6655= ,4433,2211,8877,6655,4433,2211] =3D> 1.uw[7755,3311,0,0,7755,3311,0,0] +psubusw m128.uw[1122,3344,5566,7788,1122,3344,5566,7788] xmm.uw[8877,665= 5,4433,2211,8877,6655,4433,2211] =3D> 1.uw[7755,3311,0,0,7755,3311,0,0] +psubw xmm.sw[87,65,43,21,87,65,43,21] xmm.sw[1234,5678,4321,8765,1234,56= 78,4321,8765] =3D> 1.sw[1147,5613,4278,8744,1147,5613,4278,8744] +psubw m128.sw[87,65,43,21,87,65,43,21] xmm.sw[1234,5678,4321,8765,1234,5= 678,4321,8765] =3D> 1.sw[1147,5613,4278,8744,1147,5613,4278,8744] +punpckhbw xmm.ub[12,34,56,78,21,43,65,87,78,56,34,12,87,65,43,21] xmm.ub= [11,22,33,44,55,66,77,88,88,77,66,55,44,33,22,11] =3D> 1.ub[88,78,77,56,6= 6,34,55,12,44,87,33,65,22,43,11,21] +punpckhbw m128.ub[12,34,56,78,21,43,65,87,78,56,34,12,87,65,43,21] xmm.u= b[11,22,33,44,55,66,77,88,88,77,66,55,44,33,22,11] =3D> 1.ub[88,78,77,56,= 66,34,55,12,44,87,33,65,22,43,11,21] +punpckhdq xmm.ud[12345678,21436587,78563412,87654321] xmm.ud[11223344,55= 667788,88776655,44332211] =3D> 1.ud[88776655,78563412,44332211,87654321] +punpckhdq m128.ud[12345678,21436587,78563412,87654321] xmm.ud[11223344,5= 5667788,88776655,44332211] =3D> 1.ud[88776655,78563412,44332211,87654321] +punpckhqdq xmm.uq[1234567821436587,7856341287654321] xmm.uq[112233445566= 7788,8877665544332211] =3D> 1.uq[8877665544332211,7856341287654321] +punpckhqdq m128.uq[1234567821436587,7856341287654321] xmm.uq[11223344556= 67788,8877665544332211] =3D> 1.uq[8877665544332211,7856341287654321] +punpckhwd xmm.uw[1234,5678,2143,6587,7856,3412,8765,4321] xmm.uw[1122,33= 44,5566,7788,8877,6655,4433,2211] =3D> 1.uw[8877,7856,6655,3412,4433,8765= ,2211,4321] +punpckhwd m128.uw[1234,5678,2143,6587,7856,3412,8765,4321] xmm.uw[1122,3= 344,5566,7788,8877,6655,4433,2211] =3D> 1.uw[8877,7856,6655,3412,4433,876= 5,2211,4321] +punpcklbw xmm.ub[12,34,56,78,21,43,65,87,78,56,34,12,87,65,43,21] xmm.ub= [11,22,33,44,55,66,77,88,88,77,66,55,44,33,22,11] =3D> 1.ub[11,12,22,34,3= 3,56,44,78,55,21,66,43,77,65,88,87] +punpcklbw m128.ub[12,34,56,78,21,43,65,87,78,56,34,12,87,65,43,21] xmm.u= b[11,22,33,44,55,66,77,88,88,77,66,55,44,33,22,11] =3D> 1.ub[11,12,22,34,= 33,56,44,78,55,21,66,43,77,65,88,87] +punpckldq xmm.ud[12345678,21436587,78563412,87654321] xmm.ud[11223344,55= 667788,88776655,44332211] =3D> 1.ud[11223344,12345678,55667788,21436587] +punpckldq m128.ud[12345678,21436587,78563412,87654321] xmm.ud[11223344,5= 5667788,88776655,44332211] =3D> 1.ud[11223344,12345678,55667788,21436587] +punpcklqdq xmm.uq[1234567821436587,7856341287654321] xmm.uq[112233445566= 7788,8877665544332211] =3D> 1.uq[1122334455667788,1234567821436587] +punpcklqdq m128.uq[1234567821436587,7856341287654321] xmm.uq[11223344556= 67788,8877665544332211] =3D> 1.uq[1122334455667788,1234567821436587] +punpcklwd xmm.uw[1234,5678,2143,6587,7856,3412,8765,4321] xmm.uw[1122,33= 44,5566,7788,8877,6655,4433,2211] =3D> 1.uw[1122,1234,3344,5678,5566,2143= ,7788,6587] +punpcklwd m128.uw[1234,5678,2143,6587,7856,3412,8765,4321] xmm.uw[1122,3= 344,5566,7788,8877,6655,4433,2211] =3D> 1.uw[1122,1234,3344,5678,5566,214= 3,7788,6587] +pxor xmm.uq[0xfdb97531eca86420,0x0123456789abcdef] xmm.uq[0x0123456789ab= cdef,0xfdb97531eca86420] =3D> 1.uq[0xfc9a30566503a9cf,0xfc9a30566503a9cf] +pxor m128.uq[0xfdb97531eca86420,0x0123456789abcdef] xmm.uq[0x0123456789a= bcdef,0xfdb97531eca86420] =3D> 1.uq[0xfc9a30566503a9cf,0xfc9a30566503a9cf= ] +shufpd imm8[0x0] xmm.pd[1234.5678,8765.4321] xmm.pd[1234.5678,8765.4321]= =3D> 2.pd[1234.5678,1234.5678] +shufpd imm8[0x3] m128.pd[1234.5678,8765.4321] xmm.pd[1234.5678,8765.4321= ] =3D> 2.pd[8765.4321,8765.4321] +sqrtpd xmm.pd[36.0,49.0] xmm.pd[1.11,2.22] =3D> 1.pd[6.0,7.0] +sqrtpd m128.pd[36.0,49.0] xmm.pd[1.11,2.22] =3D> 1.pd[6.0,7.0] +sqrtsd xmm.pd[36.0,5.55] xmm.pd[1.11,2.22] =3D> 1.pd[6.0,2.22] +sqrtsd m128.pd[36.0,5.55] xmm.pd[1.11,2.22] =3D> 1.pd[6.0,2.22] +subpd xmm.pd[1234.5678,8765.4321] xmm.pd[2222.0,1111.0] =3D> 1.pd[987.43= 22,-7654.4321] +subpd m128.pd[1234.5678,8765.4321] xmm.pd[2222.0,1111.0] =3D> 1.pd[987.4= 322,-7654.4321] +subsd xmm.pd[1234.5678,8765.4321] xmm.pd[2222.0,1111.0] =3D> 1.pd[987.43= 22,1111.0] +subsd m128.pd[1234.5678,8765.4321] xmm.pd[2222.0,1111.0] =3D> 1.pd[987.4= 322,1111.0] +ucomisd xmm.pd[1234.5678,0.0] xmm.pd[1234.5679,0.0] =3D> eflags[0x8d5,0x= 000] +ucomisd xmm.pd[1234.5678,0.0] xmm.pd[1234.5677,0.0] =3D> eflags[0x8d5,0x= 001] +ucomisd xmm.pd[1234.5678,0.0] xmm.pd[1234.5678,0.0] =3D> eflags[0x8d5,0x= 040] +ucomisd m64.pd[1234.5678] xmm.pd[1234.5679,0.0] =3D> eflags[0x8d5,0x000] +ucomisd m64.pd[1234.5678] xmm.pd[1234.5677,0.0] =3D> eflags[0x8d5,0x001] +ucomisd m64.pd[1234.5678] xmm.pd[1234.5678,0.0] =3D> eflags[0x8d5,0x040] +unpckhpd xmm.pd[1234.5678,8765.4321] xmm.pd[1122.3344,5566.7788] =3D> 1.= pd[5566.7788,8765.4321] +unpckhpd m128.pd[1234.5678,8765.4321] xmm.pd[1122.3344,5566.7788] =3D> 1= .pd[5566.7788,8765.4321] +unpcklpd xmm.pd[1234.5678,8765.4321] xmm.pd[1122.3344,5566.7788] =3D> 1.= pd[1122.3344,1234.5678] +unpcklpd m128.pd[1234.5678,8765.4321] xmm.pd[1122.3344,5566.7788] =3D> 1= .pd[1122.3344,1234.5678] +xorpd xmm.uq[0xfdb97531eca86420,0x0123456789abcdef] xmm.uq[0x0123456789a= bcdef,0xfdb97531eca86420] =3D> 1.uq[0xfc9a30566503a9cf,0xfc9a30566503a9cf= ] +xorpd m128.uq[0xfdb97531eca86420,0x0123456789abcdef] xmm.uq[0x0123456789= abcdef,0xfdb97531eca86420] =3D> 1.uq[0xfc9a30566503a9cf,0xfc9a30566503a9c= f] |
|
From: <sv...@va...> - 2005-05-11 00:03:23
|
Author: sewardj
Date: 2005-05-11 01:03:06 +0100 (Wed, 11 May 2005)
New Revision: 1179
Modified:
trunk/priv/guest-amd64/ghelpers.c
trunk/priv/guest-amd64/toIR.c
trunk/priv/host-amd64/hdefs.c
trunk/priv/host-amd64/hdefs.h
trunk/priv/host-amd64/isel.c
Log:
SSE2, on and on and on. There are more different SSE2 instructions
than there are atoms in the universe. This much, at least, I now
know.
Modified: trunk/priv/guest-amd64/ghelpers.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-amd64/ghelpers.c 2005-05-10 22:42:54 UTC (rev 1178)
+++ trunk/priv/guest-amd64/ghelpers.c 2005-05-11 00:03:06 UTC (rev 1179)
@@ -1488,7 +1488,15 @@
return (ULong)t;
}
=20
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+ULong amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo )
+{
+ ULong rHi8 =3D amd64g_calculate_mmx_pmovmskb ( w64hi );
+ ULong rLo8 =3D amd64g_calculate_mmx_pmovmskb ( w64lo );
+ return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF);
+}
=20
+
/*---------------------------------------------------------------*/
/*--- Helpers for dealing with, and describing, ---*/
/*--- guest state as a whole. ---*/
Modified: trunk/priv/guest-amd64/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-amd64/toIR.c 2005-05-10 22:42:54 UTC (rev 1178)
+++ trunk/priv/guest-amd64/toIR.c 2005-05-11 00:03:06 UTC (rev 1179)
@@ -7665,80 +7665,82 @@
}
=20
=20
-//.. /* Vector by scalar shift of G by the amount specified at the botto=
m
-//.. of E. */
-//..=20
-//.. static UInt dis_SSE_shiftG_byE ( UChar sorb, ULong delta,=20
-//.. HChar* opname, IROp op )
-//.. {
-//.. HChar dis_buf[50];
-//.. Int alen, size;
-//.. IRTemp addr;
-//.. Bool shl, shr, sar;
-//.. UChar rm =3D getUChar(delta);
-//.. IRTemp g0 =3D newTemp(Ity_V128);
-//.. IRTemp g1 =3D newTemp(Ity_V128);
-//.. IRTemp amt =3D newTemp(Ity_I32);
-//.. IRTemp amt8 =3D newTemp(Ity_I8);
-//.. if (epartIsReg(rm)) {
-//.. assign( amt, getXMMRegLane32(eregOfRM(rm), 0) );
-//.. DIP("%s %s,%s\n", opname,
-//.. nameXMMReg(eregOfRM(rm)),
-//.. nameXMMReg(gregOfRM(rm)) );
-//.. delta++;
-//.. } else {
-//.. addr =3D disAMode ( &alen, sorb, delta, dis_buf );
-//.. assign( amt, loadLE(Ity_I32, mkexpr(addr)) );
-//.. DIP("%s %s,%s\n", opname,
-//.. dis_buf,
-//.. nameXMMReg(gregOfRM(rm)) );
-//.. delta +=3D alen;
-//.. }
-//.. assign( g0, getXMMReg(gregOfRM(rm)) );
-//.. assign( amt8, unop(Iop_32to8, mkexpr(amt)) );
-//..=20
-//.. shl =3D shr =3D sar =3D False;
-//.. size =3D 0;
-//.. switch (op) {
-//.. case Iop_ShlN16x8: shl =3D True; size =3D 32; break;
-//.. case Iop_ShlN32x4: shl =3D True; size =3D 32; break;
-//.. case Iop_ShlN64x2: shl =3D True; size =3D 64; break;
-//.. case Iop_SarN16x8: sar =3D True; size =3D 16; break;
-//.. case Iop_SarN32x4: sar =3D True; size =3D 32; break;
-//.. case Iop_ShrN16x8: shr =3D True; size =3D 16; break;
-//.. case Iop_ShrN32x4: shr =3D True; size =3D 32; break;
-//.. case Iop_ShrN64x2: shr =3D True; size =3D 64; break;
-//.. default: vassert(0);
-//.. }
-//..=20
-//.. if (shl || shr) {
-//.. assign(=20
-//.. g1,
-//.. IRExpr_Mux0X(
-//.. unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size=
))),
-//.. mkV128(0x0000),
-//.. binop(op, mkexpr(g0), mkexpr(amt8))
-//.. )
-//.. );
-//.. } else=20
-//.. if (sar) {
-//.. assign(=20
-//.. g1,
-//.. IRExpr_Mux0X(
-//.. unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size=
))),
-//.. binop(op, mkexpr(g0), mkU8(size-1)),
-//.. binop(op, mkexpr(g0), mkexpr(amt8))
-//.. )
-//.. );
-//.. } else {
-//.. vassert(0);
-//.. }
-//..=20
-//.. putXMMReg( gregOfRM(rm), mkexpr(g1) );
-//.. return delta;
-//.. }
+/* Vector by scalar shift of G by the amount specified at the bottom
+ of E. */
=20
+static ULong dis_SSE_shiftG_byE ( Prefix pfx, ULong delta,=20
+ HChar* opname, IROp op )
+{
+ HChar dis_buf[50];
+ Int alen, size;
+ IRTemp addr;
+ Bool shl, shr, sar;
+ UChar rm =3D getUChar(delta);
+ IRTemp g0 =3D newTemp(Ity_V128);
+ IRTemp g1 =3D newTemp(Ity_V128);
+ IRTemp amt =3D newTemp(Ity_I32);
+ IRTemp amt8 =3D newTemp(Ity_I8);
+ if (epartIsReg(rm)) {
+ assign( amt, getXMMRegLane32(eregOfRexRM(pfx,rm), 0) );
+ DIP("%s %s,%s\n", opname,
+ nameXMMReg(eregOfRexRM(pfx,rm)),
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ delta++;
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta, dis_buf, 0 );
+ assign( amt, loadLE(Ity_I32, mkexpr(addr)) );
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ delta +=3D alen;
+ }
+ assign( g0, getXMMReg(gregOfRexRM(pfx,rm)) );
+ assign( amt8, unop(Iop_32to8, mkexpr(amt)) );
=20
+ shl =3D shr =3D sar =3D False;
+ size =3D 0;
+ switch (op) {
+ case Iop_ShlN16x8: shl =3D True; size =3D 32; break;
+ case Iop_ShlN32x4: shl =3D True; size =3D 32; break;
+ case Iop_ShlN64x2: shl =3D True; size =3D 64; break;
+ case Iop_SarN16x8: sar =3D True; size =3D 16; break;
+ case Iop_SarN32x4: sar =3D True; size =3D 32; break;
+ case Iop_ShrN16x8: shr =3D True; size =3D 16; break;
+ case Iop_ShrN32x4: shr =3D True; size =3D 32; break;
+ case Iop_ShrN64x2: shr =3D True; size =3D 64; break;
+ default: vassert(0);
+ }
+
+ if (shl || shr) {
+ assign(=20
+ g1,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ binop(Iop_CmpLT64U, unop(Iop_32Uto64,mkexpr(amt)), mkU64=
(size))),
+ mkV128(0x0000),
+ binop(op, mkexpr(g0), mkexpr(amt8))
+ )
+ );
+ } else=20
+ if (sar) {
+ assign(=20
+ g1,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ binop(Iop_CmpLT64U, unop(Iop_32Uto64,mkexpr(amt)), mkU64=
(size))),
+ binop(op, mkexpr(g0), mkU8(size-1)),
+ binop(op, mkexpr(g0), mkexpr(amt8))
+ )
+ );
+ } else {
+ vassert(0);
+ }
+
+ putXMMReg( gregOfRexRM(pfx,rm), mkexpr(g1) );
+ return delta;
+}
+
+
/* Vector by scalar shift of E by an immediate byte. */
=20
static=20
@@ -10756,154 +10758,164 @@
goto decode_success;
}
=20
-//.. /* 66 0F EE =3D PMAXSW -- 16x8 signed max */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xEE) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "pmaxsw", Iop_Max16Sx8, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F DE =3D PMAXUB -- 8x16 unsigned max */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xDE) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "pmaxub", Iop_Max8Ux16, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F EA =3D PMINSW -- 16x8 signed min */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xEA) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "pminsw", Iop_Min16Sx8, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F DA =3D PMINUB -- 8x16 unsigned min */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xDA) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "pminub", Iop_Min8Ux16, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F D7 =3D PMOVMSKB -- extract sign bits from each of 16 la=
nes in
-//.. xmm(G), turn them into a byte, and put zero-extend of it in
-//.. ireg(G). Doing this directly is just too cumbersome; give up
-//.. therefore and call a helper. */
-//.. /* UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo )=
; */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD7) {
-//.. modrm =3D insn[2];
-//.. if (epartIsReg(modrm)) {
-//.. t0 =3D newTemp(Ity_I64);
-//.. t1 =3D newTemp(Ity_I64);
-//.. assign(t0, getXMMRegLane64(eregOfRM(modrm), 0));
-//.. assign(t1, getXMMRegLane64(eregOfRM(modrm), 1));
-//.. t5 =3D newTemp(Ity_I32);
-//.. assign(t5, mkIRExprCCall(
-//.. Ity_I32, 0/*regparms*/,=20
-//.. "x86g_calculate_sse_pmovmskb",
-//.. &x86g_calculate_sse_pmovmskb,
-//.. mkIRExprVec_2( mkexpr(t1), mkexpr(t0) )));
-//.. putIReg(4, gregOfRM(modrm), mkexpr(t5));
-//.. DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRM(modrm)),
-//.. nameIReg(4,gregOfRM(modrm)));
-//.. delta +=3D 3;
-//.. goto decode_success;
-//.. }=20
-//.. /* else fall through */
-//.. }
-//..=20
-//.. /* 66 0F E4 =3D PMULHUW -- 16x8 hi-half of unsigned widening mul=
tiply */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE4) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "pmulhuw", Iop_MulHi16Ux8, False )=
;
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F E5 =3D PMULHW -- 16x8 hi-half of signed widening multip=
ly */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE5) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "pmulhw", Iop_MulHi16Sx8, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F D5 =3D PMULHL -- 16x8 multiply */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD5) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "pmullw", Iop_Mul16x8, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
-//.. /* 0F F4 =3D PMULUDQ -- unsigned widening multiply of 32-lanes 0=
x
-//.. 0 to form 64-bit result */
-//.. if (sz =3D=3D 4 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xF4) {
-//.. IRTemp sV =3D newTemp(Ity_I64);
-//.. IRTemp dV =3D newTemp(Ity_I64);
-//.. t1 =3D newTemp(Ity_I32);
-//.. t0 =3D newTemp(Ity_I32);
-//.. modrm =3D insn[2];
-//..=20
-//.. do_MMX_preamble();
-//.. assign( dV, getMMXReg(gregOfRM(modrm)) );
-//..=20
-//.. if (epartIsReg(modrm)) {
-//.. assign( sV, getMMXReg(eregOfRM(modrm)) );
-//.. delta +=3D 2+1;
-//.. DIP("pmuludq %s,%s\n", nameMMXReg(eregOfRM(modrm)),
-//.. nameMMXReg(gregOfRM(modrm)));
-//.. } else {
-//.. addr =3D disAMode ( &alen, sorb, delta+2, dis_buf );
-//.. assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
-//.. delta +=3D 2+alen;
-//.. DIP("pmuludq %s,%s\n", dis_buf,
-//.. nameMMXReg(gregOfRM(modrm)));
-//.. }
-//..=20
-//.. assign( t0, unop(Iop_64to32, mkexpr(dV)) );
-//.. assign( t1, unop(Iop_64to32, mkexpr(sV)) );
-//.. putMMXReg( gregOfRM(modrm),
-//.. binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F F4 =3D PMULUDQ -- unsigned widening multiply of 32-lane=
s 0 x
-//.. 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-=
bit
-//.. half */
-//.. /* This is a really poor translation -- could be improved if
-//.. performance critical */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xF4) {
-//.. IRTemp sV, dV;
-//.. IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
-//.. sV =3D newTemp(Ity_V128);
-//.. dV =3D newTemp(Ity_V128);
-//.. s3 =3D s2 =3D s1 =3D s0 =3D d3 =3D d2 =3D d1 =3D d0 =3D IRTem=
p_INVALID;
-//.. t1 =3D newTemp(Ity_I64);
-//.. t0 =3D newTemp(Ity_I64);
-//.. modrm =3D insn[2];
-//.. assign( dV, getXMMReg(gregOfRM(modrm)) );
-//..=20
-//.. if (epartIsReg(modrm)) {
-//.. assign( sV, getXMMReg(eregOfRM(modrm)) );
-//.. delta +=3D 2+1;
-//.. DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. } else {
-//.. addr =3D disAMode ( &alen, sorb, delta+2, dis_buf );
-//.. assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
-//.. delta +=3D 2+alen;
-//.. DIP("pmuludq %s,%s\n", dis_buf,
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. }
-//..=20
-//.. breakup128to32s( dV, &d3, &d2, &d1, &d0 );
-//.. breakup128to32s( sV, &s3, &s2, &s1, &s0 );
-//..=20
-//.. assign( t0, binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) );
-//.. putXMMRegLane64( gregOfRM(modrm), 0, mkexpr(t0) );
-//.. assign( t1, binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)) );
-//.. putXMMRegLane64( gregOfRM(modrm), 1, mkexpr(t1) );
-//.. goto decode_success;
-//.. }
+ /* 66 0F EE =3D PMAXSW -- 16x8 signed max */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xEE) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "pmaxsw", Iop_Max16Sx8, False );
+ goto decode_success;
+ }
=20
+ /* 66 0F DE =3D PMAXUB -- 8x16 unsigned max */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xDE) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "pmaxub", Iop_Max8Ux16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F EA =3D PMINSW -- 16x8 signed min */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xEA) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "pminsw", Iop_Min16Sx8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F DA =3D PMINUB -- 8x16 unsigned min */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xDA) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "pminub", Iop_Min8Ux16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F D7 =3D PMOVMSKB -- extract sign bits from each of 16 lanes i=
n
+ xmm(E), turn them into a byte, and put zero-extend of it in
+ ireg(G). Doing this directly is just too cumbersome; give up
+ therefore and call a helper. */
+ /* UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD7) {
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ t0 =3D newTemp(Ity_I64);
+ t1 =3D newTemp(Ity_I64);
+ assign(t0, getXMMRegLane64(eregOfRexRM(pfx,modrm), 0));
+ assign(t1, getXMMRegLane64(eregOfRexRM(pfx,modrm), 1));
+ t5 =3D newTemp(Ity_I64);
+ assign(t5, mkIRExprCCall(
+ Ity_I64, 0/*regparms*/,=20
+ "amd64g_calculate_sse_pmovmskb",
+ &amd64g_calculate_sse_pmovmskb,
+ mkIRExprVec_2( mkexpr(t1), mkexpr(t0) )));
+ putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_64to32,mkexpr(t5)));
+ DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameIReg32(gregOfRexRM(pfx,modrm)));
+ delta +=3D 3;
+ goto decode_success;
+ }=20
+ /* else fall through */
+ }
+
+ /* 66 0F E4 =3D PMULHUW -- 16x8 hi-half of unsigned widening multiply=
*/
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE4) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "pmulhuw", Iop_MulHi16Ux8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F E5 =3D PMULHW -- 16x8 hi-half of signed widening multiply */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE5) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "pmulhw", Iop_MulHi16Sx8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F D5 =3D PMULHL -- 16x8 multiply */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD5) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "pmullw", Iop_Mul16x8, False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
+ /* 0F F4 =3D PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
+ 0 to form 64-bit result */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xF4) {
+ IRTemp sV =3D newTemp(Ity_I64);
+ IRTemp dV =3D newTemp(Ity_I64);
+ t1 =3D newTemp(Ity_I32);
+ t0 =3D newTemp(Ity_I32);
+ modrm =3D insn[2];
+
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
+ delta +=3D 2+1;
+ DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta +=3D 2+alen;
+ DIP("pmuludq %s,%s\n", dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+
+ assign( t0, unop(Iop_64to32, mkexpr(dV)) );
+ assign( t1, unop(Iop_64to32, mkexpr(sV)) );
+ putMMXReg( gregLO3ofRM(modrm),
+ binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) );
+ goto decode_success;
+ }
+
+ /* 66 0F F4 =3D PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
+ 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
+ half */
+ /* This is a really poor translation -- could be improved if
+ performance critical */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xF4) {
+ IRTemp sV, dV;
+ IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
+ sV =3D newTemp(Ity_V128);
+ dV =3D newTemp(Ity_V128);
+ s3 =3D s2 =3D s1 =3D s0 =3D d3 =3D d2 =3D d1 =3D d0 =3D IRTemp_INV=
ALID;
+ t1 =3D newTemp(Ity_I64);
+ t0 =3D newTemp(Ity_I64);
+ modrm =3D insn[2];
+ assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta +=3D 2+1;
+ DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta +=3D 2+alen;
+ DIP("pmuludq %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ breakup128to32s( dV, &d3, &d2, &d1, &d0 );
+ breakup128to32s( sV, &s3, &s2, &s1, &s0 );
+
+ assign( t0, binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) );
+ putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, mkexpr(t0) );
+ assign( t1, binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)) );
+ putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkexpr(t1) );
+ goto decode_success;
+ }
+
/* 66 0F EB =3D POR */
if (have66noF2noF3(pfx) && sz =3D=3D 2=20
&& insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xEB) {
@@ -10911,151 +10923,153 @@
goto decode_success;
}
=20
-//.. /* 66 0F 70 =3D PSHUFD -- rearrange 4x32 from E(xmm or mem) to G=
(xmm) */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x70) {
-//.. Int order;
-//.. IRTemp sV, dV, s3, s2, s1, s0;
-//.. s3 =3D s2 =3D s1 =3D s0 =3D IRTemp_INVALID;
-//.. sV =3D newTemp(Ity_V128);
-//.. dV =3D newTemp(Ity_V128);
-//.. modrm =3D insn[2];
-//.. if (epartIsReg(modrm)) {
-//.. assign( sV, getXMMReg(eregOfRM(modrm)) );
-//.. order =3D (Int)insn[3];
-//.. delta +=3D 2+2;
-//.. DIP("pshufd $%d,%s,%s\n", order,=20
-//.. nameXMMReg(eregOfRM(modrm)),
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. } else {
-//.. addr =3D disAMode ( &alen, sorb, delta+2, dis_buf );
-//.. assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
-//.. order =3D (Int)insn[2+alen];
-//.. delta +=3D 3+alen;
-//.. DIP("pshufd $%d,%s,%s\n", order,=20
-//.. dis_buf,
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. }
-//.. breakup128to32s( sV, &s3, &s2, &s1, &s0 );
-//..=20
-#if 0 /* stop gcc multi-line comment warning */
-/.. # define SEL(n) \
-/.. ((n)=3D=3D0 ? s0 : ((n)=3D=3D1 ? s1 : ((n)=3D=3D2 ? =
s2 : s3)))
-#endif /* stop gcc multi-line comment warning */
-//.. assign(dV,
-//.. mk128from32s( SEL((order>>6)&3), SEL((order>>4)&3),
-//.. SEL((order>>2)&3), SEL((order>>0)&3) )
-//.. );
-//.. putXMMReg(gregOfRM(modrm), mkexpr(dV));
-//.. # undef SEL
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* F3 0F 70 =3D PSHUFHW -- rearrange upper half 4x16 from E(xmm =
or
-//.. mem) to G(xmm), and copy lower half */
-//.. if (insn[0] =3D=3D 0xF3 && insn[1] =3D=3D 0x0F && insn[2] =3D=3D=
0x70) {
-//.. Int order;
-//.. IRTemp sVhi, dVhi, sV, dV, s3, s2, s1, s0;
-//.. s3 =3D s2 =3D s1 =3D s0 =3D IRTemp_INVALID;
-//.. sV =3D newTemp(Ity_V128);
-//.. dV =3D newTemp(Ity_V128);
-//.. sVhi =3D newTemp(Ity_I64);
-//.. dVhi =3D newTemp(Ity_I64);
-//.. modrm =3D insn[3];
-//.. if (epartIsReg(modrm)) {
-//.. assign( sV, getXMMReg(eregOfRM(modrm)) );
-//.. order =3D (Int)insn[4];
-//.. delta +=3D 4+1;
-//.. DIP("pshufhw $%d,%s,%s\n", order,=20
-//.. nameXMMReg(eregOfRM(modrm)),
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. } else {
-//.. addr =3D disAMode ( &alen, sorb, delta+3, dis_buf );
-//.. assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
-//.. order =3D (Int)insn[3+alen];
-//.. delta +=3D 4+alen;
-//.. DIP("pshufhw $%d,%s,%s\n", order,=20
-//.. dis_buf,
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. }
-//.. assign( sVhi, unop(Iop_128HIto64, mkexpr(sV)) );
-//.. breakup64to16s( sVhi, &s3, &s2, &s1, &s0 );
-//..=20
-#if 0 /* stop gcc multi-line comment warning */
-/.. # define SEL(n) \
-/.. ((n)=3D=3D0 ? s0 : ((n)=3D=3D1 ? s1 : ((n)=3D=3D2 ? =
s2 : s3)))
-#endif /* stop gcc multi-line comment warning */
-//.. assign(dVhi,
-//.. mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
-//.. SEL((order>>2)&3), SEL((order>>0)&3) )
-//.. );
-//.. assign(dV, binop( Iop_64HLto128,=20
-//.. mkexpr(dVhi),
-//.. unop(Iop_128to64, mkexpr(sV))) );
-//.. putXMMReg(gregOfRM(modrm), mkexpr(dV));
-//.. # undef SEL
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* F2 0F 70 =3D PSHUFLW -- rearrange lower half 4x16 from E(xmm =
or
-//.. mem) to G(xmm), and copy upper half */
-//.. if (insn[0] =3D=3D 0xF2 && insn[1] =3D=3D 0x0F && insn[2] =3D=3D=
0x70) {
-//.. Int order;
-//.. IRTemp sVlo, dVlo, sV, dV, s3, s2, s1, s0;
-//.. s3 =3D s2 =3D s1 =3D s0 =3D IRTemp_INVALID;
-//.. sV =3D newTemp(Ity_V128);
-//.. dV =3D newTemp(Ity_V128);
-//.. sVlo =3D newTemp(Ity_I64);
-//.. dVlo =3D newTemp(Ity_I64);
-//.. modrm =3D insn[3];
-//.. if (epartIsReg(modrm)) {
-//.. assign( sV, getXMMReg(eregOfRM(modrm)) );
-//.. order =3D (Int)insn[4];
-//.. delta +=3D 4+1;
-//.. DIP("pshuflw $%d,%s,%s\n", order,=20
-//.. nameXMMReg(eregOfRM(modrm)),
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. } else {
-//.. addr =3D disAMode ( &alen, sorb, delta+3, dis_buf );
-//.. assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
-//.. order =3D (Int)insn[3+alen];
-//.. delta +=3D 4+alen;
-//.. DIP("pshuflw $%d,%s,%s\n", order,=20
-//.. dis_buf,
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. }
-//.. assign( sVlo, unop(Iop_128to64, mkexpr(sV)) );
-//.. breakup64to16s( sVlo, &s3, &s2, &s1, &s0 );
-//..=20
-#if 0 /* stop gcc multi-line comment warning */
-/.. # define SEL(n) \
-/.. ((n)=3D=3D0 ? s0 : ((n)=3D=3D1 ? s1 : ((n)=3D=3D2 ? =
s2 : s3)))
-#endif /* stop gcc multi-line comment warning */
-//.. assign(dVlo,
-//.. mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
-//.. SEL((order>>2)&3), SEL((order>>0)&3) )
-//.. );
-//.. assign(dV, binop( Iop_64HLto128,
-//.. unop(Iop_128HIto64, mkexpr(sV)),
-//.. mkexpr(dVlo) ) );
-//.. putXMMReg(gregOfRM(modrm), mkexpr(dV));
-//.. # undef SEL
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 72 /6 ib =3D PSLLD by immediate */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x72
-//.. && epartIsReg(insn[2])
-//.. && gregOfRM(insn[2]) =3D=3D 6) {
-//.. delta =3D dis_SSE_shiftE_imm( delta+2, "pslld", Iop_ShlN32x4 =
);
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F F2 =3D PSLLD by E */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xF2) {
-//.. delta =3D dis_SSE_shiftG_byE( sorb, delta+2, "pslld", Iop_Shl=
N32x4 );
-//.. goto decode_success;
-//.. }
+ /* 66 0F 70 =3D PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm)=
*/
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x70) {
+ Int order;
+ IRTemp sV, dV, s3, s2, s1, s0;
+ s3 =3D s2 =3D s1 =3D s0 =3D IRTemp_INVALID;
+ sV =3D newTemp(Ity_V128);
+ dV =3D newTemp(Ity_V128);
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ order =3D (Int)insn[3];
+ delta +=3D 3+1;
+ DIP("pshufd $%d,%s,%s\n", order,=20
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf,=20
+ 1/*byte after the amode*/ );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ order =3D (Int)insn[2+alen];
+ delta +=3D 2+alen+1;
+ DIP("pshufd $%d,%s,%s\n", order,=20
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+ breakup128to32s( sV, &s3, &s2, &s1, &s0 );
=20
+# define SEL(n) \
+ ((n)=3D=3D0 ? s0 : ((n)=3D=3D1 ? s1 : ((n)=3D=3D2 ? s2 :=
s3)))
+ assign(dV,
+ mk128from32s( SEL((order>>6)&3), SEL((order>>4)&3),
+ SEL((order>>2)&3), SEL((order>>0)&3) )
+ );
+ putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV));
+# undef SEL
+ goto decode_success;
+ }
+
+ /* F3 0F 70 =3D PSHUFHW -- rearrange upper half 4x16 from E(xmm or
+ mem) to G(xmm), and copy lower half */
+ if (haveF3no66noF2(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x70) {
+ Int order;
+ IRTemp sVhi, dVhi, sV, dV, s3, s2, s1, s0;
+ s3 =3D s2 =3D s1 =3D s0 =3D IRTemp_INVALID;
+ sV =3D newTemp(Ity_V128);
+ dV =3D newTemp(Ity_V128);
+ sVhi =3D newTemp(Ity_I64);
+ dVhi =3D newTemp(Ity_I64);
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ order =3D (Int)insn[3];
+ delta +=3D 3+1;
+ DIP("pshufhw $%d,%s,%s\n", order,=20
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf,=20
+ 1/*byte after the amode*/ );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ order =3D (Int)insn[2+alen];
+ delta +=3D 2+alen+1;
+ DIP("pshufhw $%d,%s,%s\n", order,=20
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+ assign( sVhi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ breakup64to16s( sVhi, &s3, &s2, &s1, &s0 );
+
+# define SEL(n) \
+ ((n)=3D=3D0 ? s0 : ((n)=3D=3D1 ? s1 : ((n)=3D=3D2 ? s2 :=
s3)))
+ assign(dVhi,
+ mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
+ SEL((order>>2)&3), SEL((order>>0)&3) )
+ );
+ assign(dV, binop( Iop_64HLtoV128,=20
+ mkexpr(dVhi),
+ unop(Iop_V128to64, mkexpr(sV))) );
+ putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV));
+# undef SEL
+ goto decode_success;
+ }
+
+ /* F2 0F 70 =3D PSHUFLW -- rearrange lower half 4x16 from E(xmm or
+ mem) to G(xmm), and copy upper half */
+ if (haveF2no66noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x70) {
+ Int order;
+ IRTemp sVlo, dVlo, sV, dV, s3, s2, s1, s0;
+ s3 =3D s2 =3D s1 =3D s0 =3D IRTemp_INVALID;
+ sV =3D newTemp(Ity_V128);
+ dV =3D newTemp(Ity_V128);
+ sVlo =3D newTemp(Ity_I64);
+ dVlo =3D newTemp(Ity_I64);
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ order =3D (Int)insn[3];
+ delta +=3D 3+1;
+ DIP("pshuflw $%d,%s,%s\n", order,=20
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf,=20
+ 1/*byte after the amode*/ );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ order =3D (Int)insn[2+alen];
+ delta +=3D 2+alen+1;
+ DIP("pshuflw $%d,%s,%s\n", order,=20
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+ assign( sVlo, unop(Iop_V128to64, mkexpr(sV)) );
+ breakup64to16s( sVlo, &s3, &s2, &s1, &s0 );
+
+# define SEL(n) \
+ ((n)=3D=3D0 ? s0 : ((n)=3D=3D1 ? s1 : ((n)=3D=3D2 ? s2 :=
s3)))
+ assign(dVlo,
+ mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
+ SEL((order>>2)&3), SEL((order>>0)&3) )
+ );
+ assign(dV, binop( Iop_64HLtoV128,
+ unop(Iop_V128HIto64, mkexpr(sV)),
+ mkexpr(dVlo) ) );
+ putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV));
+# undef SEL
+ goto decode_success;
+ }
+
+ /* 66 0F 72 /6 ib =3D PSLLD by immediate */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x72
+ && epartIsReg(insn[2])
+ && gregLO3ofRM(insn[2]) =3D=3D 6) {
+ delta =3D dis_SSE_shiftE_imm( pfx, delta+2, "pslld", Iop_ShlN32x4 =
);
+ goto decode_success;
+ }
+
+ /* 66 0F F2 =3D PSLLD by E */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xF2) {
+ delta =3D dis_SSE_shiftG_byE( pfx, delta+2, "pslld", Iop_ShlN32x4 =
);
+ goto decode_success;
+ }
+
/* 66 0F 73 /7 ib =3D PSLLDQ by immediate */
/* note, if mem case ever filled in, 1 byte after amode */
if (have66noF2noF3(pfx) && sz =3D=3D 2=20
@@ -11118,76 +11132,86 @@
goto decode_success;
}
=20
-//.. /* 66 0F 73 /6 ib =3D PSLLQ by immediate */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x73
-//.. && epartIsReg(insn[2])
-//.. && gregOfRM(insn[2]) =3D=3D 6) {
-//.. delta =3D dis_SSE_shiftE_imm( delta+2, "psllq", Iop_ShlN64x2 =
);
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F F3 =3D PSLLQ by E */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xF3) {
-//.. delta =3D dis_SSE_shiftG_byE( sorb, delta+2, "psllq", Iop_Shl=
N64x2 );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 71 /6 ib =3D PSLLW by immediate */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x71
-//.. && epartIsReg(insn[2])
-//.. && gregOfRM(insn[2]) =3D=3D 6) {
-//.. delta =3D dis_SSE_shiftE_imm( delta+2, "psllw", Iop_ShlN16x8 =
);
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F F1 =3D PSLLW by E */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xF1) {
-//.. delta =3D dis_SSE_shiftG_byE( sorb, delta+2, "psllw", Iop_Shl=
N16x8 );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 72 /4 ib =3D PSRAD by immediate */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x72
-//.. && epartIsReg(insn[2])
-//.. && gregOfRM(insn[2]) =3D=3D 4) {
-//.. delta =3D dis_SSE_shiftE_imm( delta+2, "psrad", Iop_SarN32x4 =
);
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F E2 =3D PSRAD by E */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE2) {
-//.. delta =3D dis_SSE_shiftG_byE( sorb, delta+2, "psrad", Iop_Sar=
N32x4 );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 71 /4 ib =3D PSRAW by immediate */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x71
-//.. && epartIsReg(insn[2])
-//.. && gregOfRM(insn[2]) =3D=3D 4) {
-//.. delta =3D dis_SSE_shiftE_imm( delta+2, "psraw", Iop_SarN16x8 =
);
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F E1 =3D PSRAW by E */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE1) {
-//.. delta =3D dis_SSE_shiftG_byE( sorb, delta+2, "psraw", Iop_Sar=
N16x8 );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 72 /2 ib =3D PSRLD by immediate */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x72
-//.. && epartIsReg(insn[2])
-//.. && gregOfRM(insn[2]) =3D=3D 2) {
-//.. delta =3D dis_SSE_shiftE_imm( delta+2, "psrld", Iop_ShrN32x4 =
);
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F D2 =3D PSRLD by E */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD2) {
-//.. delta =3D dis_SSE_shiftG_byE( sorb, delta+2, "psrld", Iop_Shr=
N32x4 );
-//.. goto decode_success;
-//.. }
+ /* 66 0F 73 /6 ib =3D PSLLQ by immediate */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x73
+ && epartIsReg(insn[2])
+ && gregLO3ofRM(insn[2]) =3D=3D 6) {
+ delta =3D dis_SSE_shiftE_imm( pfx, delta+2, "psllq", Iop_ShlN64x2 =
);
+ goto decode_success;
+ }
=20
+ /* 66 0F F3 =3D PSLLQ by E */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xF3) {
+ delta =3D dis_SSE_shiftG_byE( pfx, delta+2, "psllq", Iop_ShlN64x2 =
);
+ goto decode_success;
+ }
+
+ /* 66 0F 71 /6 ib =3D PSLLW by immediate */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x71
+ && epartIsReg(insn[2])
+ && gregLO3ofRM(insn[2]) =3D=3D 6) {
+ delta =3D dis_SSE_shiftE_imm( pfx, delta+2, "psllw", Iop_ShlN16x8 =
);
+ goto decode_success;
+ }
+
+ /* 66 0F F1 =3D PSLLW by E */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xF1) {
+ delta =3D dis_SSE_shiftG_byE( pfx, delta+2, "psllw", Iop_ShlN16x8 =
);
+ goto decode_success;
+ }
+
+ /* 66 0F 72 /4 ib =3D PSRAD by immediate */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x72
+ && epartIsReg(insn[2])
+ && gregLO3ofRM(insn[2]) =3D=3D 4) {
+ delta =3D dis_SSE_shiftE_imm( pfx, delta+2, "psrad", Iop_SarN32x4 =
);
+ goto decode_success;
+ }
+
+ /* 66 0F E2 =3D PSRAD by E */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE2) {
+ delta =3D dis_SSE_shiftG_byE( pfx, delta+2, "psrad", Iop_SarN32x4 =
);
+ goto decode_success;
+ }
+
+ /* 66 0F 71 /4 ib =3D PSRAW by immediate */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x71
+ && epartIsReg(insn[2])
+ && gregLO3ofRM(insn[2]) =3D=3D 4) {
+ delta =3D dis_SSE_shiftE_imm( pfx, delta+2, "psraw", Iop_SarN16x8 =
);
+ goto decode_success;
+ }
+
+ /* 66 0F E1 =3D PSRAW by E */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE1) {
+ delta =3D dis_SSE_shiftG_byE( pfx, delta+2, "psraw", Iop_SarN16x8 =
);
+ goto decode_success;
+ }
+
+ /* 66 0F 72 /2 ib =3D PSRLD by immediate */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x72
+ && epartIsReg(insn[2])
+ && gregLO3ofRM(insn[2]) =3D=3D 2) {
+ delta =3D dis_SSE_shiftE_imm( pfx, delta+2, "psrld", Iop_ShrN32x4 =
);
+ goto decode_success;
+ }
+
+ /* 66 0F D2 =3D PSRLD by E */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD2) {
+ delta =3D dis_SSE_shiftG_byE( pfx, delta+2, "psrld", Iop_ShrN32x4 =
);
+ goto decode_success;
+ }
+
/* 66 0F 73 /3 ib =3D PSRLDQ by immediate */
/* note, if mem case ever filled in, 1 byte after amode */
if (have66noF2noF3(pfx) && sz =3D=3D 2=20
@@ -11260,26 +11284,29 @@
goto decode_success;
}
=20
-//.. /* 66 0F D3 =3D PSRLQ by E */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD3) {
-//.. delta =3D dis_SSE_shiftG_byE( sorb, delta+2, "psrlq", Iop_Shr=
N64x2 );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 71 /2 ib =3D PSRLW by immediate */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x71
-//.. && epartIsReg(insn[2])
-//.. && gregOfRM(insn[2]) =3D=3D 2) {
-//.. delta =3D dis_SSE_shiftE_imm( delta+2, "psrlw", Iop_ShrN16x8 =
);
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F D1 =3D PSRLW by E */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD1) {
-//.. delta =3D dis_SSE_shiftG_byE( sorb, delta+2, "psrlw", Iop_Shr=
N16x8 );
-//.. goto decode_success;
-//.. }
+ /* 66 0F D3 =3D PSRLQ by E */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD3) {
+ delta =3D dis_SSE_shiftG_byE( pfx, delta+2, "psrlq", Iop_ShrN64x2 =
);
+ goto decode_success;
+ }
=20
+ /* 66 0F 71 /2 ib =3D PSRLW by immediate */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x71
+ && epartIsReg(insn[2])
+ && gregLO3ofRM(insn[2]) =3D=3D 2) {
+ delta =3D dis_SSE_shiftE_imm( pfx, delta+2, "psrlw", Iop_ShrN16x8 =
);
+ goto decode_success;
+ }
+
+ /* 66 0F D1 =3D PSRLW by E */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD1) {
+ delta =3D dis_SSE_shiftG_byE( pfx, delta+2, "psrlw", Iop_ShrN16x8 =
);
+ goto decode_success;
+ }
+
/* 66 0F F8 =3D PSUBB */
if (have66noF2noF3(pfx) && sz =3D=3D 2=20
&& insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xF8) {
Modified: trunk/priv/host-amd64/hdefs.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-amd64/hdefs.c 2005-05-10 22:42:54 UTC (rev 1178)
+++ trunk/priv/host-amd64/hdefs.c 2005-05-11 00:03:06 UTC (rev 1179)
@@ -619,29 +619,29 @@
case Asse_QSUB16U: return "psubusw";
case Asse_QSUB8S: return "psubsb";
case Asse_QSUB16S: return "psubsw";
-//.. case Xsse_MUL16: return "pmullw";
-//.. case Xsse_MULHI16U: return "pmulhuw";
-//.. case Xsse_MULHI16S: return "pmulhw";
+ case Asse_MUL16: return "pmullw";
+ case Asse_MULHI16U: return "pmulhuw";
+ case Asse_MULHI16S: return "pmulhw";
//.. case Xsse_AVG8U: return "pavgb";
//.. case Xsse_AVG16U: return "pavgw";
-//.. case Xsse_MAX16S: return "pmaxw";
-//.. case Xsse_MAX8U: return "pmaxub";
-//.. case Xsse_MIN16S: return "pminw";
-//.. case Xsse_MIN8U: return "pminub";
+ case Asse_MAX16S: return "pmaxw";
+ case Asse_MAX8U: return "pmaxub";
+ case Asse_MIN16S: return "pminw";
+ case Asse_MIN8U: return "pminub";
//.. case Xsse_CMPEQ8: return "pcmpeqb";
//.. case Xsse_CMPEQ16: return "pcmpeqw";
case Asse_CMPEQ32: return "pcmpeqd";
//.. case Xsse_CMPGT8S: return "pcmpgtb";
//.. case Xsse_CMPGT16S: return "pcmpgtw";
//.. case Xsse_CMPGT32S: return "pcmpgtd";
-//.. case Xsse_SHL16: return "psllw";
-//.. case Xsse_SHL32: return "pslld";
-//.. case Xsse_SHL64: return "psllq";
-//.. case Xsse_SHR16: return "psrlw";
-//.. case Xsse_SHR32: return "psrld";
+ case Asse_SHL16: return "psllw";
+ case Asse_SHL32: return "pslld";
+ case Asse_SHL64: return "psllq";
+ case Asse_SHR16: return "psrlw";
+ case Asse_SHR32: return "psrld";
case Asse_SHR64: return "psrlq";
-//.. case Xsse_SAR16: return "psraw";
-//.. case Xsse_SAR32: return "psrad";
+ case Asse_SAR16: return "psraw";
+ case Asse_SAR32: return "psrad";
case Asse_PACKSSD: return "packssdw";
case Asse_PACKSSW: return "packsswb";
case Asse_PACKUSW: return "packuswb";
@@ -3329,20 +3329,20 @@
//.. case Xsse_CMPGT8S: XX(0x66); XX(rex); XX(0x0F); XX(0x64);=
break;
//.. case Xsse_CMPGT16S: XX(0x66); XX(rex); XX(0x0F); XX(0x65);=
break;
//.. case Xsse_CMPGT32S: XX(0x66); XX(rex); XX(0x0F); XX(0x66);=
break;
-//.. case Xsse_MAX16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEE);=
break;
-//.. case Xsse_MAX8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDE);=
break;
-//.. case Xsse_MIN16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEA);=
break;
-//.. case Xsse_MIN8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDA);=
break;
-//.. case Xsse_MULHI16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE4);=
break;
-//.. case Xsse_MULHI16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE5);=
break;
-//.. case Xsse_MUL16: XX(0x66); XX(rex); XX(0x0F); XX(0xD5);=
break;
-//.. case Xsse_SHL16: XX(0x66); XX(rex); XX(0x0F); XX(0xF1);=
break;
-//.. case Xsse_SHL32: XX(0x66); XX(rex); XX(0x0F); XX(0xF2);=
break;
-//.. case Xsse_SHL64: XX(0x66); XX(rex); XX(0x0F); XX(0xF3);=
break;
-//.. case Xsse_SAR16: XX(0x66); XX(rex); XX(0x0F); XX(0xE1);=
break;
-//.. case Xsse_SAR32: XX(0x66); XX(rex); XX(0x0F); XX(0xE2);=
break;
-//.. case Xsse_SHR16: XX(0x66); XX(rex); XX(0x0F); XX(0xD1);=
break;
-//.. case Xsse_SHR32: XX(0x66); XX(rex); XX(0x0F); XX(0xD2);=
break;
+ case Asse_MAX16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEE); brea=
k;
+ case Asse_MAX8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDE); brea=
k;
+ case Asse_MIN16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEA); brea=
k;
+ case Asse_MIN8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDA); brea=
k;
+ case Asse_MULHI16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE4); brea=
k;
+ case Asse_MULHI16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE5); brea=
k;
+ case Asse_MUL16: XX(0x66); XX(rex); XX(0x0F); XX(0xD5); brea=
k;
+ case Asse_SHL16: XX(0x66); XX(rex); XX(0x0F); XX(0xF1); brea=
k;
+ case Asse_SHL32: XX(0x66); XX(rex); XX(0x0F); XX(0xF2); brea=
k;
+ case Asse_SHL64: XX(0x66); XX(rex); XX(0x0F); XX(0xF3); brea=
k;
+ case Asse_SAR16: XX(0x66); XX(rex); XX(0x0F); XX(0xE1); brea=
k;
+ case Asse_SAR32: XX(0x66); XX(rex); XX(0x0F); XX(0xE2); brea=
k;
+ case Asse_SHR16: XX(0x66); XX(rex); XX(0x0F); XX(0xD1); brea=
k;
+ case Asse_SHR32: XX(0x66); XX(rex); XX(0x0F); XX(0xD2); brea=
k;
case Asse_SHR64: XX(0x66); XX(rex); XX(0x0F); XX(0xD3); brea=
k;
case Asse_SUB8: XX(0x66); XX(rex); XX(0x0F); XX(0xF8); brea=
k;
case Asse_SUB16: XX(0x66); XX(rex); XX(0x0F); XX(0xF9); brea=
k;
Modified: trunk/priv/host-amd64/hdefs.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-amd64/hdefs.h 2005-05-10 22:42:54 UTC (rev 1178)
+++ trunk/priv/host-amd64/hdefs.h 2005-05-11 00:03:06 UTC (rev 1179)
@@ -329,21 +329,20 @@
Asse_SUB8, Asse_SUB16, Asse_SUB32, Asse_SUB64,
Asse_QSUB8U, Asse_QSUB16U,
Asse_QSUB8S, Asse_QSUB16S,
-//.. Xsse_MUL16,
-//.. Xsse_MULHI16U,
-//.. Xsse_MULHI16S,
+ Asse_MUL16,
+ Asse_MULHI16U,
+ Asse_MULHI16S,
//.. Xsse_AVG8U, Xsse_AVG16U,
-//.. Xsse_MAX16S,
-//.. Xsse_MAX8U,
-//.. Xsse_MIN16S,
-//.. Xsse_MIN8U,
+ Asse_MAX16S,
+ Asse_MAX8U,
+ Asse_MIN16S,
+ Asse_MIN8U,
//.. Xsse_CMPEQ8, Xsse_CMPEQ16, =20
Asse_CMPEQ32,
//.. Xsse_CMPGT8S, Xsse_CMPGT16S, Xsse_CMPGT32S,
-//.. Xsse_SHL16, Xsse_SHL32, Xsse_SHL64,
-//.. Xsse_SHR16, Xsse_SHR32,=20
- Asse_SHR64,
-//.. Xsse_SAR16, Xsse_SAR32,=20
+ Asse_SHL16, Asse_SHL32, Asse_SHL64,
+ Asse_SHR16, Asse_SHR32, Asse_SHR64,
+ Asse_SAR16, Asse_SAR32,=20
Asse_PACKSSD, Asse_PACKSSW, Asse_PACKUSW,
Asse_UNPCKHB, Asse_UNPCKHW, Asse_UNPCKHD, Asse_UNPCKHQ,
Asse_UNPCKLB, Asse_UNPCKLW, Asse_UNPCKLD, Asse_UNPCKLQ
Modified: trunk/priv/host-amd64/isel.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-amd64/isel.c 2005-05-10 22:42:54 UTC (rev 1178)
+++ trunk/priv/host-amd64/isel.c 2005-05-11 00:03:06 UTC (rev 1179)
@@ -3415,13 +3415,13 @@
//.. case Iop_CmpGT8Sx16: op =3D Xsse_CMPGT8S; goto do_SseReRg;
//.. case Iop_CmpGT16Sx8: op =3D Xsse_CMPGT16S; goto do_SseReRg;
//.. case Iop_CmpGT32Sx4: op =3D Xsse_CMPGT32S; goto do_SseReRg;
-//.. case Iop_Max16Sx8: op =3D Xsse_MAX16S; goto do_SseReRg;
-//.. case Iop_Max8Ux16: op =3D Xsse_MAX8U; goto do_SseReRg;
-//.. case Iop_Min16Sx8: op =3D Xsse_MIN16S; goto do_SseReRg;
-//.. case Iop_Min8Ux16: op =3D Xsse_MIN8U; goto do_SseReRg;
-//.. case Iop_MulHi16Ux8: op =3D Xsse_MULHI16U; goto do_SseReRg;
-//.. case Iop_MulHi16Sx8: op =3D Xsse_MULHI16S; goto do_SseReRg;
-//.. case Iop_Mul16x8: op =3D Xsse_MUL16; goto do_SseReRg;
+ case Iop_Max16Sx8: op =3D Asse_MAX16S; goto do_SseReRg;
+ case Iop_Max8Ux16: op =3D Asse_MAX8U; goto do_SseReRg;
+ case Iop_Min16Sx8: op =3D Asse_MIN16S; goto do_SseReRg;
+ case Iop_Min8Ux16: op =3D Asse_MIN8U; goto do_SseReRg;
+ case Iop_MulHi16Ux8: op =3D Asse_MULHI16U; goto do_SseReRg;
+ case Iop_MulHi16Sx8: op =3D Asse_MULHI16S; goto do_SseReRg;
+ case Iop_Mul16x8: op =3D Asse_MUL16; goto do_SseReRg;
case Iop_Sub8x16: op =3D Asse_SUB8; goto do_SseReRg;
case Iop_Sub16x8: op =3D Asse_SUB16; goto do_SseReRg;
case Iop_Sub32x4: op =3D Asse_SUB32; goto do_SseReRg;
@@ -3444,13 +3444,13 @@
return dst;
}
=20
-//.. case Iop_ShlN16x8: op =3D Xsse_SHL16; goto do_SseShift;
-//.. case Iop_ShlN32x4: op =3D Xsse_SHL32; goto do_SseShift;
-//.. case Iop_ShlN64x2: op =3D Xsse_SHL64; goto do_SseShift;
-//.. case Iop_SarN16x8: op =3D Xsse_SAR16; goto do_SseShift;
-//.. case Iop_SarN32x4: op =3D Xsse_SAR32; goto do_SseShift;
-//.. case Iop_ShrN16x8: op =3D Xsse_SHR16; goto do_SseShift;
-//.. case Iop_ShrN32x4: op =3D Xsse_SHR32; goto do_SseShift;
+ case Iop_ShlN16x8: op =3D Asse_SHL16; goto do_SseShift;
+ case Iop_ShlN32x4: op =3D Asse_SHL32; goto do_SseShift;
+ case Iop_ShlN64x2: op =3D Asse_SHL64; goto do_SseShift;
+ case Iop_SarN16x8: op =3D Asse_SAR16; goto do_SseShift;
+ case Iop_SarN32x4: op =3D Asse_SAR32; goto do_SseShift;
+ case Iop_ShrN16x8: op =3D Asse_SHR16; goto do_SseShift;
+ case Iop_ShrN32x4: op =3D Asse_SHR32; goto do_SseShift;
case Iop_ShrN64x2: op =3D Asse_SHR64; goto do_SseShift;
do_SseShift: {
HReg greg =3D iselVecExpr(env, e->Iex.Binop.arg1);
@@ -3472,17 +3472,17 @@
} /* switch (e->Iex.Binop.op) */
} /* if (e->tag =3D=3D Iex_Binop) */
=20
-//.. if (e->tag =3D=3D Iex_Mux0X) {
-//.. HReg r8 =3D iselIntExpr_R(env, e->Iex.Mux0X.cond);
-//.. HReg rX =3D iselVecExpr(env, e->Iex.Mux0X.exprX);
-//.. HReg r0 =3D iselVecExpr(env, e->Iex.Mux0X.expr0);
-//.. HReg dst =3D newVRegV(env);
-//.. addInstr(env, mk_vMOVsd_RR(rX,dst));
-//.. addInstr(env, X86Instr_Test32(X86RI_Imm(0xFF), X86RM_Reg(r8))=
);
-//.. addInstr(env, X86Instr_SseCMov(Xcc_Z,r0,dst));
-//.. return dst;
-//.. }
-//..=20
+ if (e->tag =3D=3D Iex_Mux0X) {
+ HReg r8 =3D iselIntExpr_R(env, e->Iex.Mux0X.cond);
+ HReg rX =3D iselVecExpr(env, e->Iex.Mux0X.exprX);
+ HReg r0 =3D iselVecExpr(env, e->Iex.Mux0X.expr0);
+ HReg dst =3D newVRegV(env);
+ addInstr(env, mk_vMOVsd_RR(rX,dst));
+ addInstr(env, AMD64Instr_Test64(AMD64RI_Imm(0xFF), AMD64RM_Reg(r8)=
));
+ addInstr(env, AMD64Instr_SseCMov(Acc_Z,r0,dst));
+ return dst;
+ }
+
vec_fail:
vex_printf("iselVecExpr (amd64, subarch =3D %s): can't reduce\n",
LibVEX_ppVexSubArch(env->subarch));
|